From a47e4ab0ff79cefec965c753745db3f9bf20719f Mon Sep 17 00:00:00 2001 From: Manuel Guzman Date: Tue, 10 Dec 2024 17:56:47 +0100 Subject: [PATCH 1/2] feat: allow rows parameter and no pagination (get first n elements) --- crossref/restful.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/crossref/restful.py b/crossref/restful.py index 67fdc9a..2a22d34 100644 --- a/crossref/restful.py +++ b/crossref/restful.py @@ -125,6 +125,7 @@ def __init__( # noqa: PLR0913 throttle=True, crossref_plus_token=None, timeout=30, + paginate=True, ): self.do_http_request = HTTPRequest(throttle=throttle).do_http_request self.etiquette = etiquette or Etiquette() @@ -136,6 +137,7 @@ def __init__( # noqa: PLR0913 self.request_params = request_params or {} self.context = context or "" self.timeout = timeout + self.paginate = paginate @property def _rate_limits(self): @@ -305,7 +307,8 @@ def __iter__(self): # noqa: PLR0912 - To many branches is not a problem. if self.CURSOR_AS_ITER_METHOD is True: request_params = dict(self.request_params) request_params["cursor"] = "*" - request_params["rows"] = LIMIT + if "rows" not in request_params: + request_params["rows"] = LIMIT while True: result = self.do_http_request( "get", @@ -326,11 +329,15 @@ def __iter__(self): # noqa: PLR0912 - To many branches is not a problem. for item in result["message"]["items"]: yield item + if not self.paginate: + return + request_params["cursor"] = result["message"]["next-cursor"] else: request_params = dict(self.request_params) request_params["offset"] = 0 - request_params["rows"] = LIMIT + real_limit = request_params["rows"] if "rows" in request_params else LIMIT + request_params["rows"] = real_limit while True: result = self.do_http_request( "get", @@ -351,7 +358,7 @@ def __iter__(self): # noqa: PLR0912 - To many branches is not a problem. for item in result["message"]["items"]: yield item - request_params["offset"] += LIMIT + request_params["offset"] += real_limit if request_params["offset"] >= MAXOFFSET: msg = "Offset exceded the max offset of %d" @@ -621,6 +628,7 @@ def order(self, order="asc"): context=context, etiquette=self.etiquette, timeout=self.timeout, + paginate=self.paginate, ) def select(self, *args): @@ -725,6 +733,7 @@ def select(self, *args): context=context, etiquette=self.etiquette, timeout=self.timeout, + paginate=self.paginate, ) def sort(self, sort="score"): @@ -786,6 +795,7 @@ def sort(self, sort="score"): context=context, etiquette=self.etiquette, timeout=self.timeout, + paginate=self.paginate, ) def filter(self, **kwargs): # noqa: A003 @@ -843,6 +853,7 @@ def filter(self, **kwargs): # noqa: A003 context=context, etiquette=self.etiquette, timeout=self.timeout, + paginate=self.paginate, ) def facet(self, facet_name, facet_count=100): @@ -1161,6 +1172,7 @@ def query(self, *args): request_params=request_params, etiquette=self.etiquette, timeout=self.timeout, + paginate=self.paginate, ) def filter(self, **kwargs): # noqa: A003 @@ -1217,6 +1229,7 @@ def filter(self, **kwargs): # noqa: A003 context=context, etiquette=self.etiquette, timeout=self.timeout, + paginate=self.paginate, ) def funder(self, funder_id, only_message=True): @@ -1375,6 +1388,7 @@ def query(self, *args): context=context, etiquette=self.etiquette, timeout=self.timeout, + paginate=self.paginate, ) def filter(self, **kwargs): # noqa: A003 @@ -1435,6 +1449,7 @@ def filter(self, **kwargs): # noqa: A003 context=context, etiquette=self.etiquette, timeout=self.timeout, + paginate=self.paginate, ) def member(self, member_id, only_message=True): From fdc0828920a24e535849d880fb7f64a223af8428 Mon Sep 17 00:00:00 2001 From: Manuel Guzman Date: Wed, 11 Dec 2024 08:36:18 +0100 Subject: [PATCH 2/2] fix: add pagination control when paginating with offset (CURSOR_AS_ITER_METHOD is False) --- crossref/restful.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crossref/restful.py b/crossref/restful.py index 2a22d34..3fc2bd8 100644 --- a/crossref/restful.py +++ b/crossref/restful.py @@ -358,6 +358,9 @@ def __iter__(self): # noqa: PLR0912 - To many branches is not a problem. for item in result["message"]["items"]: yield item + if not self.paginate: + return + request_params["offset"] += real_limit if request_params["offset"] >= MAXOFFSET: