From a19587bd484cc7a997c492c2032686b943e5dd71 Mon Sep 17 00:00:00 2001 From: David Limpo Date: Thu, 3 Mar 2016 15:36:19 +0000 Subject: [PATCH 1/2] Timeline + Advanced Search Previous solution uses Advanced Search URL that had many missing tweets. For that reason, I added Timeline Search URL. The Timeline Search URL only allows to download aprox. 3200 tweets, for that reason I implemented a solution that uses the Advanced Search URL after collecting the first 3200 tweets. Good solution if you want to get all possible tweets allowed by Twitter from a specific user. --- .../me/jhenrique/manager/TweetManager.java | 34 ++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/main/java/me/jhenrique/manager/TweetManager.java b/src/main/java/me/jhenrique/manager/TweetManager.java index c7e29c6..df38e38 100644 --- a/src/main/java/me/jhenrique/manager/TweetManager.java +++ b/src/main/java/me/jhenrique/manager/TweetManager.java @@ -43,7 +43,7 @@ public class TweetManager { * @return JSON response used by Twitter to build its results * @throws Exception */ - private static String getURLResponse(String username, String since, String until, String querySearch, String scrollCursor) throws Exception { + private static String getURLResponse(String username, String since, String until, String querySearch, String scrollCursor, boolean advancedSearch, String maxId) throws Exception { String appendQuery = ""; if (username != null) { appendQuery += "from:"+username; @@ -58,7 +58,15 @@ private static String getURLResponse(String username, String since, String until appendQuery += " "+querySearch; } - String url = String.format("https://twitter.com/i/search/timeline?f=realtime&q=%s&src=typd&max_position=%s", URLEncoder.encode(appendQuery, "UTF-8"), scrollCursor); + String url = null; + + if(advancedSearch) + url = String.format("https://twitter.com/i/search/timeline?f=realtime&q=%s&src=typd&max_position=%s", URLEncoder.encode(appendQuery, "UTF-8"), scrollCursor); + else + if(maxId == null) + url = String.format("https://twitter.com/i/profiles/show/%s/timeline/with_replies", username); + else + url = String.format("https://twitter.com/i/profiles/show/%s/timeline/with_replies?max_position=%s", username, maxId); HttpGet httpGet = new HttpGet(url); HttpEntity resp = defaultHttpClient.execute(httpGet).getEntity(); @@ -76,9 +84,24 @@ public static List getTweets(TwitterCriteria criteria) { try { String refreshCursor = null; + String refreshMaxId = null; + String mostRecentTweetId = null; + boolean advancedSearch = false; + if (criteria.getUsername() == null ) + advancedSearch = true; + outerLace: while (true) { - JSONObject json = new JSONObject(getURLResponse(criteria.getUsername(), criteria.getSince(), criteria.getUntil(), criteria.getQuerySearch(), refreshCursor)); - refreshCursor = json.getString("min_position"); + JSONObject json = new JSONObject(getURLResponse(criteria.getUsername(), criteria.getSince(), criteria.getUntil(), criteria.getQuerySearch(), refreshCursor, advancedSearch, refreshMaxId)); + + if (json.has("min_position") && !json.isNull("min_position")) + refreshCursor = json.getString("min_position"); + + if(json.has("min_position") && json.isNull("min_position") && !advancedSearch) { + advancedSearch = true; + refreshCursor = "TWEET-"+ refreshCursor + "-" + mostRecentTweetId; + json = new JSONObject(getURLResponse(criteria.getUsername(), criteria.getSince(), criteria.getUntil(), criteria.getQuerySearch(), refreshCursor, advancedSearch, refreshMaxId)); + } + Document doc = Jsoup.parse((String) json.get("items_html")); Elements tweets = doc.select("div.js-stream-tweet"); @@ -93,6 +116,8 @@ public static List getTweets(TwitterCriteria criteria) { int favorites = Integer.valueOf(tweet.select("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replaceAll(",", "")); long dateMs = Long.valueOf(tweet.select("small.time span.js-short-timestamp").attr("data-time-ms")); String id = tweet.attr("data-tweet-id"); + if (mostRecentTweetId == null) mostRecentTweetId = id; + String permalink = tweet.attr("data-permalink-path"); String geo = ""; @@ -120,6 +145,7 @@ public static List getTweets(TwitterCriteria criteria) { if (criteria.getMaxTweets() > 0 && results.size() >= criteria.getMaxTweets()) { break outerLace; } + refreshMaxId = id; } } } catch (Exception e) { From 28c86d74c6050e2fccce30c73d51a3f6144325d1 Mon Sep 17 00:00:00 2001 From: David Limpo Date: Thu, 3 Mar 2016 15:36:59 +0000 Subject: [PATCH 2/2] Main Example updated to Main --- src/main/java/me/jhenrique/main/Main.java | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/main/java/me/jhenrique/main/Main.java b/src/main/java/me/jhenrique/main/Main.java index 62bb88b..1f3ff36 100644 --- a/src/main/java/me/jhenrique/main/Main.java +++ b/src/main/java/me/jhenrique/main/Main.java @@ -4,6 +4,8 @@ import me.jhenrique.manager.TwitterCriteria; import me.jhenrique.model.Tweet; +import java.util.List; + public class Main { public static void main(String[] args) { @@ -12,6 +14,7 @@ public static void main(String[] args) { */ TwitterCriteria criteria = null; Tweet t = null; + List tweetList = null; /** * Example 1 - Get tweets by username @@ -65,6 +68,27 @@ public static void main(String[] args) { System.out.println("Mentions: " + t.getMentions()); System.out.println("Hashtags: " + t.getHashtags()); System.out.println(); + + + /** + * Example 4 - Get all possible tweets by username (timeline tweets + advanced search tweets) + **/ + System.out.println("### Example 4 - Get all possible tweets by username (timeline tweets + advanced search tweets) [github]"); + + System.out.println("Retrieving all tweets from github..."); + + criteria = TwitterCriteria.create() + .setUsername("@github"); + + tweetList = TweetManager.getTweets(criteria); + + for (Tweet tweet : tweetList) + System.out.println("@" + tweet.getUsername() + " : " + tweet.getText() + tweet.getDate()); + + + System.out.println(); + System.out.println("Printed " + tweetList.size() + " tweets from @github."); + } } \ No newline at end of file