Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix version conflicts, rearrange order of search parameters #110

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
12 changes: 6 additions & 6 deletions Exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def main(argv):

if len(argv) == 1 and argv[0] == '-h':
f = open('exporter_help_text.txt', 'r')
print f.read()
print(f.read())
f.close()

return
Expand Down Expand Up @@ -42,10 +42,10 @@ def main(argv):

elif opt == '--maxtweets':
tweetCriteria.maxTweets = int(arg)

elif opt == '--near':
tweetCriteria.near = '"' + arg + '"'

elif opt == '--within':
tweetCriteria.within = '"' + arg + '"'

Expand All @@ -54,7 +54,7 @@ def main(argv):

elif opt == '--output':
outputFileName = arg

outputFile = codecs.open(outputFileName, "w+", "utf-8")

outputFile.write('username;date;retweets;favorites;text;geo;mentions;hashtags;id;permalink')
Expand All @@ -69,8 +69,8 @@ def receiveBuffer(tweets):

got.manager.TweetManager.getTweets(tweetCriteria, receiveBuffer)

except arg:
print('Arguments parser error, try -h' + arg)
except Exception as exc:
print('Arguments parser error, try -h. %s' % (exc))
finally:
outputFile.close()
print('Done. Output file generated "%s".' % outputFileName)
Expand Down
60 changes: 30 additions & 30 deletions got/manager/TweetManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@
from pyquery import PyQuery

class TweetManager:

def __init__(self):
pass

@staticmethod
def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
refreshCursor = ''

results = []
resultsAux = []
cookieJar = cookielib.CookieJar()

if hasattr(tweetCriteria, 'username') and (tweetCriteria.username.startswith("\'") or tweetCriteria.username.startswith("\"")) and (tweetCriteria.username.endswith("\'") or tweetCriteria.username.endswith("\"")):
tweetCriteria.username = tweetCriteria.username[1:-1]

Expand All @@ -25,29 +25,29 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
if len(json['items_html'].strip()) == 0:
break

refreshCursor = json['min_position']
refreshCursor = json['min_position']
tweets = PyQuery(json['items_html'])('div.js-stream-tweet')

if len(tweets) == 0:
break

for tweetHTML in tweets:
tweetPQ = PyQuery(tweetHTML)
tweet = models.Tweet()

usernameTweet = tweetPQ("span:first.username.u-dir b").text();
txt = re.sub(r"\s+", " ", tweetPQ("p.js-tweet-text").text().replace('# ', '#').replace('@ ', '@'));
retweets = int(tweetPQ("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""));
favorites = int(tweetPQ("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""));
dateSec = int(tweetPQ("small.time span.js-short-timestamp").attr("data-time"));
id = tweetPQ.attr("data-tweet-id");
permalink = tweetPQ.attr("data-permalink-path");

geo = ''
geoSpan = tweetPQ('span.Tweet-geo')
if len(geoSpan) > 0:
geo = geoSpan.attr('title')

tweet.id = id
tweet.permalink = 'https://twitter.com' + permalink
tweet.username = usernameTweet
Expand All @@ -58,52 +58,52 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
tweet.mentions = " ".join(re.compile('(@\\w*)').findall(tweet.text))
tweet.hashtags = " ".join(re.compile('(#\\w*)').findall(tweet.text))
tweet.geo = geo

results.append(tweet)
resultsAux.append(tweet)

if receiveBuffer and len(resultsAux) >= bufferLength:
receiveBuffer(resultsAux)
resultsAux = []

if tweetCriteria.maxTweets > 0 and len(results) >= tweetCriteria.maxTweets:
active = False
break


if receiveBuffer and len(resultsAux) > 0:
receiveBuffer(resultsAux)

return results

@staticmethod
def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
url = "https://twitter.com/i/search/timeline?f=tweets&q=%s&src=typd&max_position=%s"

urlGetData = ''

if hasattr(tweetCriteria, 'username'):
urlGetData += ' from:' + tweetCriteria.username

if hasattr(tweetCriteria, 'querySearch'):
urlGetData += ' ' + tweetCriteria.querySearch

if hasattr(tweetCriteria, 'near'):
urlGetData += "&near:" + tweetCriteria.near + " within:" + tweetCriteria.within

if hasattr(tweetCriteria, 'since'):
urlGetData += ' since:' + tweetCriteria.since

if hasattr(tweetCriteria, 'until'):
urlGetData += ' until:' + tweetCriteria.until


if hasattr(tweetCriteria, 'topTweets'):
if tweetCriteria.topTweets:
url = "https://twitter.com/i/search/timeline?q=%s&src=typd&max_position=%s"



url = url % (urllib.quote(urlGetData), refreshCursor)

headers = [
Expand All @@ -129,7 +129,7 @@ def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
print "Twitter weird response. Try to see on browser: https://twitter.com/search?q=%s&src=typd" % urllib.quote(urlGetData)
sys.exit()
return

dataJson = json.loads(jsonResponse)
return dataJson

return dataJson
69 changes: 41 additions & 28 deletions got3/manager/TweetManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
from pyquery import PyQuery

class TweetManager:

def __init__(self):
pass

@staticmethod
def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
refreshCursor = ''

results = []
resultsAux = []
cookieJar = http.cookiejar.CookieJar()
Expand All @@ -22,16 +22,16 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
if len(json['items_html'].strip()) == 0:
break

refreshCursor = json['min_position']
refreshCursor = json['min_position']
tweets = PyQuery(json['items_html'])('div.js-stream-tweet')

if len(tweets) == 0:
break

for tweetHTML in tweets:
tweetPQ = PyQuery(tweetHTML)
tweet = models.Tweet()

usernameTweet = tweetPQ("span.username.js-action-profile-name b").text();
txt = re.sub(r"\s+", " ", tweetPQ("p.js-tweet-text").text().replace('# ', '#').replace('@ ', '@'));
retweets = int(tweetPQ("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""));
Expand All @@ -40,7 +40,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
id = tweetPQ.attr("data-tweet-id");
permalink = tweetPQ.attr("data-permalink-path");
user_id = int(tweetPQ("a.js-user-profile-link").attr("data-user-id"))

geo = ''
geoSpan = tweetPQ('span.Tweet-geo')
if len(geoSpan) > 0:
Expand All @@ -54,7 +54,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
tweet.id = id
tweet.permalink = 'https://twitter.com' + permalink
tweet.username = usernameTweet

tweet.text = txt
tweet.date = datetime.datetime.fromtimestamp(dateSec)
tweet.formatted_date = datetime.datetime.fromtimestamp(dateSec).strftime("%a %b %d %X +0000 %Y")
Expand All @@ -65,45 +65,58 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
tweet.geo = geo
tweet.urls = ",".join(urls)
tweet.author_id = user_id

results.append(tweet)
resultsAux.append(tweet)

if receiveBuffer and len(resultsAux) >= bufferLength:
receiveBuffer(resultsAux)
resultsAux = []

if tweetCriteria.maxTweets > 0 and len(results) >= tweetCriteria.maxTweets:
active = False
break


if receiveBuffer and len(resultsAux) > 0:
receiveBuffer(resultsAux)

return results

@staticmethod
def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
url = "https://twitter.com/i/search/timeline?f=tweets&q=%s&src=typd&%smax_position=%s"

urlGetData = ''
if hasattr(tweetCriteria, 'querySearch'):
urlGetData += ' ' + tweetCriteria.querySearch

if hasattr(tweetCriteria, 'username'):
urlGetData += ' from:' + tweetCriteria.username

if hasattr(tweetCriteria, 'since'):
urlGetData += ' since:' + tweetCriteria.since

if hasattr(tweetCriteria, 'until'):
urlGetData += ' until:' + tweetCriteria.until

if hasattr(tweetCriteria, 'querySearch'):
urlGetData += ' ' + tweetCriteria.querySearch


if hasattr(tweetCriteria, "near"):
urlGetData += " near:" + tweetCriteria.near

if hasattr(tweetCriteria, "within"):
urlGetData += " within:%imi" % (int(tweetCriteria.within))
else:
urlGetData += " within:15mi"

if hasattr(tweetCriteria, 'lang'):
urlLang = 'lang=' + tweetCriteria.lang + '&'
else:
urlLang = ''

if hasattr(tweetCriteria, 'topTweets'):
if tweetCriteria.topTweets:
url = "https://twitter.com/i/search/timeline?q=%s&src=typd&max_position=%s"

url = url % (urllib.parse.quote(urlGetData), urlLang, refreshCursor)
#print(url)

Expand All @@ -118,9 +131,9 @@ def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
]

if proxy:
opener = urllib2.build_opener(urllib2.ProxyHandler({'http': proxy, 'https': proxy}), urllib2.HTTPCookieProcessor(cookieJar))
opener = urllib.request.build_opener(urllib.request.ProxyHandler({'http': proxy, 'https': proxy}), urllib.request.HTTPCookieProcessor(cookieJar))
else:
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookieJar))
opener.addheaders = headers

try:
Expand All @@ -132,7 +145,7 @@ def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
print("Unexpected error:", sys.exc_info()[0])
sys.exit()
return

dataJson = json.loads(jsonResponse.decode())
return dataJson

return dataJson