Commit 13a0fcb7 authored by Joshua's avatar Joshua
Browse files

Refactored the way tweepy does parsing to make it more customizable by developers.

All parsing of the response payload is now handled by a Parser class defined in
tweepy/parsers.py
The default parser used is ModelParser which parses a JSON payload into a model instance.

Developers may define and use their own custom parsers by extending the Parser class.
To use the custom parser:

    api = API(parser=MyParser())
parent 68c0f829
This diff is collapsed.
......@@ -7,25 +7,13 @@ import urllib
import time
import re
from tweepy.parsers import parse_error
from tweepy.error import TweepError
try:
import simplejson as json
except ImportError:
try:
import json # Python 2.6+
except ImportError:
try:
from django.utils import simplejson as json # Google App Engine
except ImportError:
raise ImportError, "Can't load a json library"
re_path_template = re.compile('{\w+}')
def bind_api(path, parser, allowed_param=[], method='GET', require_auth=False,
timeout=None, search_api = False):
def bind_api(path, payload_type=None, payload_list=False, allowed_param=[], method='GET',
require_auth=False, timeout=None, search_api = False):
def _call(api, *args, **kargs):
# If require auth, throw exception if credentials not provided
......@@ -159,36 +147,16 @@ def bind_api(path, parser, allowed_param=[], method='GET', require_auth=False,
error_msg = "Twitter error response: status code = %s" % resp.status
raise TweepError(error_msg)
# Parse json respone body
try:
jobject = json.loads(resp.read())
except Exception, e:
raise TweepError("Failed to parse json: %s" % e)
# Parse cursor infomation
if isinstance(jobject, dict):
next_cursor = jobject.get('next_cursor')
prev_cursor = jobject.get('previous_cursor')
else:
next_cursor = None
prev_cursor = None
# Pass json object into parser
try:
if parameters and 'cursor' in parameters:
out = parser(jobject, api), next_cursor, prev_cursor
else:
out = parser(jobject, api)
except Exception, e:
raise TweepError("Failed to parse response: %s" % e)
# Parse the response payload
result = api.parser.parse(api, payload_type, payload_list, resp.read())
conn.close()
# store result in cache
if api.cache and method == 'GET':
api.cache.store(url, out)
if api.cache and method == 'GET' and result:
api.cache.store(url, result)
return out
return result
# Set pagination mode
......
......@@ -3,10 +3,19 @@
# See LICENSE
from tweepy.error import TweepError
from tweepy.utils import parse_datetime, parse_html_value, parse_a_href, \
parse_search_datetime, unescape_html
class ResultSet(list):
"""A list like object that holds results from a Twitter API query."""
class Model(object):
def __init__(self, api=None):
self._api = api
def __getstate__(self):
# pickle
pickle = {}
......@@ -17,9 +26,44 @@ class Model(object):
pickle[k] = v
return pickle
@classmethod
def parse(cls, api, json):
"""Parse a JSON object into a model instance."""
raise NotImplementedError
@classmethod
def parse_list(cls, api, json_list):
"""Parse a list of JSON objects into a result set of model instances."""
results = ResultSet()
for obj in json_list:
results.append(cls.parse(api, obj))
return results
class Status(Model):
@classmethod
def parse(cls, api, json):
status = cls(api)
for k, v in json.items():
if k == 'user':
user = User.parse(api, v)
setattr(status, 'author', user)
setattr(status, 'user', user) # DEPRECIATED
elif k == 'created_at':
setattr(status, k, parse_datetime(v))
elif k == 'source':
if '<' in v:
setattr(status, k, parse_html_value(v))
setattr(status, 'source_url', parse_a_href(v))
else:
setattr(status, k, v)
elif k == 'retweeted_status':
setattr(status, k, User.parse(api, v))
else:
setattr(status, k, v)
return status
def destroy(self):
return self._api.destroy_status(self.id)
......@@ -35,6 +79,36 @@ class Status(Model):
class User(Model):
@classmethod
def parse(cls, api, json):
user = cls(api)
for k, v in json.items():
if k == 'created_at':
setattr(user, k, parse_datetime(v))
elif k == 'status':
setattr(user, k, Status.parse(api, v))
elif k == 'following':
# twitter sets this to null if it is false
if v is True:
setattr(user, k, True)
else:
setattr(user, k, False)
else:
setattr(user, k, v)
return user
@classmethod
def parse_list(cls, api, json_list):
if isinstance(json_list, list):
item_list = json_list
else:
item_list = json_list['users']
results = ResultSet()
for obj in item_list:
results.append(cls.parse(api, obj))
return results
def timeline(self, **kargs):
return self._api.user_timeline(user_id=self.id, **kargs)
......@@ -67,32 +141,113 @@ class User(Model):
class DirectMessage(Model):
@classmethod
def parse(cls, api, json):
dm = cls(api)
for k, v in json.items():
if k == 'sender' or k == 'recipient':
setattr(dm, k, User.parse(api, v))
elif k == 'created_at':
setattr(dm, k, parse_datetime(v))
else:
setattr(dm, k, v)
return dm
def destroy(self):
return self._api.destroy_direct_message(self.id)
class Friendship(Model):
pass
@classmethod
def parse(cls, api, json):
relationship = json['relationship']
# parse source
source = cls(api)
for k, v in relationship['source'].items():
setattr(source, k, v)
# parse target
target = cls(api)
for k, v in relationship['target'].items():
setattr(target, k, v)
return source, target
class SavedSearch(Model):
@classmethod
def parse(cls, api, json):
ss = cls(api)
for k, v in json.items():
if k == 'created_at':
setattr(ss, k, parse_datetime(v))
else:
setattr(ss, k, v)
return ss
def destroy(self):
return self._api.destroy_saved_search(self.id)
class SearchResult(Model):
pass
@classmethod
def parse(cls, api, json):
result = cls()
for k, v in json.items():
if k == 'created_at':
setattr(result, k, parse_search_datetime(v))
elif k == 'source':
setattr(result, k, parse_html_value(unescape_html(v)))
else:
setattr(result, k, v)
return result
@classmethod
def parse_list(cls, api, json_list, result_set=None):
results = ResultSet()
results.max_id = json_list.get('max_id')
results.since_id = json_list.get('since_id')
results.refresh_url = json_list.get('refresh_url')
results.next_page = json_list.get('next_page')
results.results_per_page = json_list.get('results_per_page')
results.page = json_list.get('page')
results.completed_in = json_list.get('completed_in')
results.query = json_list.get('query')
for obj in json_list['results']:
results.append(cls.parse(api, obj))
return results
class Retweet(Model):
#TODO: remove me
def destroy(self):
return self._api.destroy_status(self.id)
class List(Model):
@classmethod
def parse(cls, api, json):
lst = List(api)
for k,v in json.items():
if k == 'user':
setattr(lst, k, User.parse(api, v))
else:
setattr(lst, k, v)
return lst
@classmethod
def parse_list(cls, api, json_list, result_set=None):
results = ResultSet()
for obj in json_list['lists']:
results.append(cls.parse(api, obj))
return results
def update(self, **kargs):
return self._api.update_list(self.slug, **kargs)
......@@ -127,6 +282,23 @@ class List(Model):
return self._api.is_subscribed_list(self.user.screen_name, self.slug, id)
class JSONModel(Model):
@classmethod
def parse(cls, api, json):
return json
class IDModel(Model):
@classmethod
def parse(cls, api, json):
if isinstance(json, list):
return json
else:
return json['ids']
class ModelFactory(object):
"""
Used by parsers for creating instances
......@@ -143,3 +315,6 @@ class ModelFactory(object):
retweet = Retweet
list = List
json = JSONModel
ids = IDModel
......@@ -2,265 +2,39 @@
# Copyright 2009 Joshua Roesslein
# See LICENSE
import htmlentitydefs
import re
from datetime import datetime
import time
from tweepy.models import ModelFactory
from tweepy.utils import import_simplejson
class ResultSet(list):
"""A list like object that holds results from a Twitter API query."""
class Parser(object):
payload_format = 'json'
def _parse_cursor(obj):
def parse(self, api, payload_type, payload_list, payload):
"""Parse the response payload and return the result."""
raise NotImplementedError
return obj['next_cursor'], obj['prev_cursor']
def parse_json(obj, api):
class ModelParser(Parser):
return obj
def __init__(self, model_factory=None):
self.model_factory = model_factory or ModelFactory
self.json_lib = import_simplejson()
def parse(self, api, payload_type, payload_list, payload):
try:
if payload_type is None: return
model = getattr(self.model_factory, payload_type)
except AttributeError:
raise TweepError('No model for this payload type: %s' % method.payload_type)
def parse_return_true(obj, api):
try:
json = self.json_lib.loads(payload)
except Exception, e:
raise TweepError('Failed to parse JSON: %s' % e)
return True
def parse_none(obj, api):
return None
def parse_error(obj):
return obj['error']
def _parse_datetime(str):
# We must parse datetime this way to work in python 2.4
return datetime(*(time.strptime(str, '%a %b %d %H:%M:%S +0000 %Y')[0:6]))
def _parse_search_datetime(str):
# python 2.4
return datetime(*(time.strptime(str, '%a, %d %b %Y %H:%M:%S +0000')[0:6]))
def unescape_html(text):
"""Created by Fredrik Lundh (http://effbot.org/zone/re-sub.htm#unescape-html)"""
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
def _parse_html_value(html):
return html[html.find('>')+1:html.rfind('<')]
def _parse_a_href(atag):
start = atag.find('"') + 1
end = atag.find('"', start)
return atag[start:end]
def parse_user(obj, api):
user = api.model_factory.user()
user._api = api
for k, v in obj.items():
if k == 'created_at':
setattr(user, k, _parse_datetime(v))
elif k == 'status':
setattr(user, k, parse_status(v, api))
elif k == 'following':
# twitter sets this to null if it is false
if v is True:
setattr(user, k, True)
else:
setattr(user, k, False)
else:
setattr(user, k, v)
return user
def parse_users(obj, api):
if isinstance(obj, list) is False:
item_list = obj['users']
else:
item_list = obj
users = ResultSet()
for item in item_list:
if item is None: break # sometimes an empty list with a null in it
users.append(parse_user(item, api))
return users
def parse_status(obj, api):
status = api.model_factory.status()
status._api = api
for k, v in obj.items():
if k == 'user':
user = parse_user(v, api)
setattr(status, 'author', user)
setattr(status, 'user', user) # DEPRECIATED
elif k == 'created_at':
setattr(status, k, _parse_datetime(v))
elif k == 'source':
if '<' in v:
setattr(status, k, _parse_html_value(v))
setattr(status, 'source_url', _parse_a_href(v))
else:
setattr(status, k, v)
elif k == 'retweeted_status':
setattr(status, k, parse_status(v, api))
if payload_list:
return model.parse_list(api, json)
else:
setattr(status, k, v)
return status
def parse_statuses(obj, api):
statuses = ResultSet()
for item in obj:
statuses.append(parse_status(item, api))
return statuses
def parse_dm(obj, api):
dm = api.model_factory.direct_message()
dm._api = api
for k, v in obj.items():
if k == 'sender' or k == 'recipient':
setattr(dm, k, parse_user(v, api))
elif k == 'created_at':
setattr(dm, k, _parse_datetime(v))
else:
setattr(dm, k, v)
return dm
def parse_directmessages(obj, api):
directmessages = ResultSet()
for item in obj:
directmessages.append(parse_dm(item, api))
return directmessages
def parse_friendship(obj, api):
relationship = obj['relationship']
# parse source
source = api.model_factory.friendship()
for k, v in relationship['source'].items():
setattr(source, k, v)
# parse target
target = api.model_factory.friendship()
for k, v in relationship['target'].items():
setattr(target, k, v)
return source, target
def parse_ids(obj, api):
if isinstance(obj, list) is False:
return obj['ids']
else:
return obj
def parse_saved_search(obj, api):
ss = api.model_factory.saved_search()
ss._api = api
for k, v in obj.items():
if k == 'created_at':
setattr(ss, k, _parse_datetime(v))
else:
setattr(ss, k, v)
return ss
def parse_saved_searches(obj, api):
saved_searches = ResultSet()
saved_search = api.model_factory.saved_search()
for item in obj:
saved_searches.append(parse_saved_search(item, api))
return saved_searches
def parse_search_result(obj, api):
result = api.model_factory.search_result()
for k, v in obj.items():
if k == 'created_at':
setattr(result, k, _parse_search_datetime(v))
elif k == 'source':
setattr(result, k, _parse_html_value(unescape_html(v)))
else:
setattr(result, k, v)
return result
def parse_search_results(obj, api):
results = ResultSet()
results.max_id = obj.get('max_id')
results.since_id = obj.get('since_id')
results.refresh_url = obj.get('refresh_url')
results.next_page = obj.get('next_page')
results.results_per_page = obj.get('results_per_page')
results.page = obj.get('page')
results.completed_in = obj.get('completed_in')
results.query = obj.get('query')
for item in obj['results']:
results.append(parse_search_result(item, api))
return results
def parse_list(obj, api):
lst = api.model_factory.list()
lst._api = api
for k,v in obj.items():
if k == 'user':
setattr(lst, k, parse_user(v, api))
else:
setattr(lst, k, v)
return lst
def parse_lists(obj, api):
lists = ResultSet()
for item in obj['lists']:
lists.append(parse_list(item, api))
return lists
return model.parse(api, json)
......@@ -9,7 +9,7 @@ from time import sleep
import urllib
from tweepy.auth import BasicAuthHandler
from tweepy.parsers import parse_status
from tweepy.models import Status
from tweepy.api import API
from tweepy.error import TweepError
......@@ -40,7 +40,7 @@ class StreamListener(object):
"""
if 'in_reply_to_status_id' in data:
status = parse_status(json.loads(data), self.api)
status = Status.parse(</