diff --git a/dandiapi/api/tests/test_pagination.py b/dandiapi/api/tests/test_pagination.py new file mode 100644 index 000000000..bc890be81 --- /dev/null +++ b/dandiapi/api/tests/test_pagination.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import pytest + + +@pytest.mark.django_db() +def test_dandiset_pagination(api_client, dandiset_factory): + endpoint = '/api/dandisets/' + for _ in range(10): + dandiset_factory() + + # First page + resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5}).json() + assert resp['count'] == 10 + assert resp['next'] is not None + page_one = resp['results'] + assert len(page_one) == 5 + + # Second page + resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5, 'page': 2}).json() + assert resp['count'] is None + assert resp['next'] is None + page_two = resp['results'] + assert len(page_two) == 5 + + # Full page + resp = api_client.get(endpoint, {'order': 'id', 'page_size': 100}).json() + assert resp['count'] == 10 + assert resp['next'] is None + full_page = resp['results'] + assert len(full_page) == 10 + + # Assert full list is ordered the same as both paginated lists + assert full_page == page_one + page_two + + +@pytest.mark.django_db() +def test_version_pagination(api_client, dandiset, published_version_factory): + endpoint = f'/api/dandisets/{dandiset.identifier}/versions/' + + for _ in range(10): + published_version_factory(dandiset=dandiset) + + resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5}).json() + + assert resp['count'] == 10 + page_one = resp['results'] + assert len(page_one) == 5 + + resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5, 'page': 2}).json() + assert resp['count'] is None + assert resp['next'] is None + page_two = resp['results'] + assert len(page_two) == 5 + + resp = api_client.get(endpoint, {'order': 'created', 'page_size': 100}).json() + assert resp['count'] == 10 + assert resp['next'] is None + full_page = resp['results'] + assert len(full_page) == 10 + + assert full_page == page_one + page_two + + +@pytest.mark.django_db() +def test_asset_pagination(api_client, version, asset_factory): + endpoint = f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/' + + # Create assets and set their created time artificially apart + for _ in range(10): + version.assets.add(asset_factory()) + + resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5}).json() + assert resp['count'] == 10 + assert resp['next'] is not None + page_one = resp['results'] + assert len(page_one) == 5 + + # Second page + resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5, 'page': 2}).json() + assert resp['count'] is None + assert resp['next'] is None + page_two = resp['results'] + assert len(page_two) == 5 + + # Full page + resp = api_client.get(endpoint, {'order': 'created', 'page_size': 100}).json() + assert resp['count'] is not None + assert resp['next'] is None + full_page = resp['results'] + assert len(full_page) == 10 + + # Assert full list is ordered the same as both paginated lists + assert full_page == page_one + page_two + + +@pytest.mark.django_db() +def test_zarr_pagination(api_client, zarr_archive_factory): + endpoint = '/api/zarr/' + + for _ in range(10): + zarr_archive_factory() + + resp = api_client.get(endpoint, {'page_size': 5}).json() + assert resp['count'] == 10 + page_one = resp['results'] + assert len(page_one) == 5 + + resp = api_client.get(endpoint, {'page_size': 5, 'page': 2}).json() + assert resp['count'] is None + assert resp['next'] is None + page_two = resp['results'] + assert len(page_two) == 5 + + resp = api_client.get(endpoint, {'page_size': 100}).json() + assert resp['count'] == 10 + assert resp['next'] is None + full_page = resp['results'] + assert len(full_page) == 10 + + assert full_page == page_one + page_two diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index cef64a44c..6cc37d37c 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -44,8 +44,8 @@ ASSET_ID_PARAM, VERSIONS_DANDISET_PK_PARAM, VERSIONS_VERSION_PARAM, - DandiPagination, ) +from dandiapi.api.views.pagination import DandiPagination from dandiapi.api.views.serializers import ( AssetDetailSerializer, AssetDownloadQueryParameterSerializer, diff --git a/dandiapi/api/views/common.py b/dandiapi/api/views/common.py index d1391aa63..f4ab2b4e4 100644 --- a/dandiapi/api/views/common.py +++ b/dandiapi/api/views/common.py @@ -1,20 +1,6 @@ from __future__ import annotations from drf_yasg import openapi -from rest_framework.pagination import PageNumberPagination - -max_page_size = 1000 - - -class DandiPagination(PageNumberPagination): - page_size = 100 - max_page_size = max_page_size - page_size_query_param = 'page_size' - - page_size_query_description = ( - f'{PageNumberPagination.page_size_query_description[:-1]} (maximum {max_page_size}).' - ) - ASSET_ID_PARAM = openapi.Parameter( 'asset_id', diff --git a/dandiapi/api/views/dandiset.py b/dandiapi/api/views/dandiset.py index 040395aef..14b06e4ae 100644 --- a/dandiapi/api/views/dandiset.py +++ b/dandiapi/api/views/dandiset.py @@ -27,7 +27,8 @@ from dandiapi.api.services.dandiset import create_dandiset, delete_dandiset from dandiapi.api.services.dandiset.exceptions import UnauthorizedEmbargoAccessError from dandiapi.api.services.embargo import unembargo_dandiset -from dandiapi.api.views.common import DANDISET_PK_PARAM, DandiPagination +from dandiapi.api.views.common import DANDISET_PK_PARAM +from dandiapi.api.views.pagination import DandiPagination from dandiapi.api.views.serializers import ( CreateDandisetQueryParameterSerializer, DandisetDetailSerializer, diff --git a/dandiapi/api/views/pagination.py b/dandiapi/api/views/pagination.py new file mode 100644 index 000000000..c48b76f72 --- /dev/null +++ b/dandiapi/api/views/pagination.py @@ -0,0 +1,102 @@ +""" +Implement an optimized pagination scheme. + +This module provides a custom pagination implementation, as the existing `PageNumberPagination` +class returns a `count` field for every page returned. This can be very inefficient on large tables, +and in reality, the count is only necessary on the first page of results. This module implements +such a pagination scheme, only returning 'count' on the first page of results. +""" + +from __future__ import annotations + +from collections import OrderedDict + +from django.core.paginator import Page, Paginator +from django.utils.functional import cached_property +from rest_framework.pagination import PageNumberPagination +from rest_framework.response import Response + + +class LazyPage(Page): + """ + A page class that doesn't call .count() on the queryset it's paging from. + + This class should be used with `LazyPaginator` unless you know what you're doing. + """ + + # Override to store the real object list under self._object_last + def __init__(self, object_list, number, paginator): + self._object_list = list(object_list) + self.number = number + self.paginator = paginator + + # Override to prevent returning the extra record + @cached_property + def object_list(self): + return self._object_list[: self.paginator.per_page] + + # Because we fetch one extra object to check for more rows, we know that if the number of + # objects returned is the page size or less, we have no more pages. + def has_next(self) -> bool: + return len(self._object_list) > self.paginator.per_page + + # Override to prevent calling `.count` on the queryset. To my knowledge we don't use this. + def end_index(self) -> int: + raise NotImplementedError + + +class LazyPaginator(Paginator): + """A Paginator that doesn't call .count() on the queryset.""" + + # Set this to infinity so that inherited code doesn't assume we're done paginating + num_pages = float('inf') + + def page(self, number): + """Return a page with one extra row, used to determine if there are more pages.""" + number = self.validate_number(number) + bottom = (number - 1) * self.per_page + + # Intentionally fetch one extra to see if there are any more pages left + top = bottom + self.per_page + 1 + + return self._get_page(self.object_list[bottom:top], number, self) + + def _get_page(self, *args, **kwargs): + return LazyPage(*args, **kwargs) + + +class LazyPagination(PageNumberPagination): + page_size = 100 + max_page_size = 1000 + page_size_query_param = 'page_size' + django_paginator_class = LazyPaginator + + # Set to always false since we only know the full number of pages on page 1 + display_page_controls = False + + # Define as empty to prevent `get_page_number` from calling `count` + last_page_strings = () + + # Set to None to prevent `paginate_queryset` from setting `display_page_controls` to True + template = None + + @cached_property + def page_size_query_description(self): + return f'{super().page_size_query_description[:-1]} (maximum {self.max_page_size}).' + + def get_paginated_response(self, data) -> Response: + """Overridden to only include the count of the queryset on the first page.""" + page_dict = OrderedDict( + [ + ('count', self.page.paginator.count if self.page.number == 1 else None), + ('next', self.get_next_link()), + ('previous', self.get_previous_link()), + ('results', data), + ] + ) + + return Response(page_dict) + + +# Alias +DandiPagination = LazyPagination diff --git a/dandiapi/api/views/version.py b/dandiapi/api/views/version.py index b94592414..2cfc3335a 100644 --- a/dandiapi/api/views/version.py +++ b/dandiapi/api/views/version.py @@ -16,7 +16,8 @@ from dandiapi.api.models import Dandiset, Version from dandiapi.api.services.publish import publish_dandiset from dandiapi.api.tasks import delete_doi_task -from dandiapi.api.views.common import DANDISET_PK_PARAM, VERSION_PARAM, DandiPagination +from dandiapi.api.views.common import DANDISET_PK_PARAM, VERSION_PARAM +from dandiapi.api.views.pagination import DandiPagination from dandiapi.api.views.serializers import ( VersionDetailSerializer, VersionMetadataSerializer, diff --git a/dandiapi/settings.py b/dandiapi/settings.py index 0ba65368f..00de8f416 100644 --- a/dandiapi/settings.py +++ b/dandiapi/settings.py @@ -72,7 +72,7 @@ def mutate_configuration(configuration: type[ComposedConfiguration]): # Pagination configuration.REST_FRAMEWORK['DEFAULT_PAGINATION_CLASS'] = ( - 'dandiapi.api.views.common.DandiPagination' + 'dandiapi.api.views.pagination.DandiPagination' ) configuration.REST_FRAMEWORK['EXCEPTION_HANDLER'] = ( diff --git a/dandiapi/zarr/views/__init__.py b/dandiapi/zarr/views/__init__.py index 61a1057c6..24d6713da 100644 --- a/dandiapi/zarr/views/__init__.py +++ b/dandiapi/zarr/views/__init__.py @@ -16,7 +16,7 @@ from dandiapi.api.models.dandiset import Dandiset from dandiapi.api.storage import get_boto_client -from dandiapi.api.views.common import DandiPagination +from dandiapi.api.views.pagination import DandiPagination from dandiapi.zarr.models import ZarrArchive, ZarrArchiveStatus from dandiapi.zarr.tasks import ingest_zarr_archive