From be6bfb22ed58ea49cc6cf658af99f712b6545f70 Mon Sep 17 00:00:00 2001 From: Cebtenzzre Date: Fri, 25 Sep 2020 02:24:43 -0400 Subject: [PATCH] tumblr_backup: Stop if API responses stop making forward progress Sometimes, at least when backing up likes, the API can get stuck endlessly returning the same set of posts instead of returning an empty list. Inspect _links and stop if the offset/before fails to change. Fixes #217 --- tumblr_backup.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tumblr_backup.py b/tumblr_backup.py index ca46b72..30bdc09 100755 --- a/tumblr_backup.py +++ b/tumblr_backup.py @@ -585,6 +585,7 @@ def _backup(posts): # Get the JSON entries from the API, which we can only do for MAX_POSTS posts at once. # Posts "arrive" in reverse chronological order. Post #0 is the most recent one. i = options.skip + last_next_offset = None while True: # find the upper bound log(account, "Getting posts %d to %d (of %d expected)\r" % (i, i + MAX_POSTS - 1, count_estimate)) @@ -596,9 +597,20 @@ def _backup(posts): continue posts = _get_content(soup) - # `_backup(posts)` can be empty even when `posts` is not if we don't backup reblogged posts - if not posts or not _backup(posts): - log(account, "Backing up posts found empty set of posts, finishing\r") + if not posts: + log(account, "Found empty set of posts, finishing\r") + break + + next_params = soup['response']['_links']['next']['query_params'] + next_offset = next_params.get('offset') or next_params.get('before') + if next_offset is not None: + if next_offset == last_next_offset: + log(account, "Found same API response twice, finishing\r") + break + last_next_offset = next_offset + + if not _backup(posts): + log(account, "Found last requested post, finishing\r") break i += MAX_POSTS