Skip to content

Commit

Permalink
Merge pull request #1911 from dandi/asset-list-excess-count
Browse files Browse the repository at this point in the history
Only include total count in the first page of list views
  • Loading branch information
jjnesbitt authored Mar 29, 2024
2 parents c9be998 + cc661ff commit dafbe3f
Show file tree
Hide file tree
Showing 8 changed files with 230 additions and 19 deletions.
121 changes: 121 additions & 0 deletions dandiapi/api/tests/test_pagination.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from __future__ import annotations

import pytest


@pytest.mark.django_db()
def test_dandiset_pagination(api_client, dandiset_factory):
endpoint = '/api/dandisets/'
for _ in range(10):
dandiset_factory()

# First page
resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5}).json()
assert resp['count'] == 10
assert resp['next'] is not None
page_one = resp['results']
assert len(page_one) == 5

# Second page
resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

# Full page
resp = api_client.get(endpoint, {'order': 'id', 'page_size': 100}).json()
assert resp['count'] == 10
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

# Assert full list is ordered the same as both paginated lists
assert full_page == page_one + page_two


@pytest.mark.django_db()
def test_version_pagination(api_client, dandiset, published_version_factory):
endpoint = f'/api/dandisets/{dandiset.identifier}/versions/'

for _ in range(10):
published_version_factory(dandiset=dandiset)

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5}).json()

assert resp['count'] == 10
page_one = resp['results']
assert len(page_one) == 5

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 100}).json()
assert resp['count'] == 10
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

assert full_page == page_one + page_two


@pytest.mark.django_db()
def test_asset_pagination(api_client, version, asset_factory):
endpoint = f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/'

# Create assets and set their created time artificially apart
for _ in range(10):
version.assets.add(asset_factory())

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5}).json()
assert resp['count'] == 10
assert resp['next'] is not None
page_one = resp['results']
assert len(page_one) == 5

# Second page
resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

# Full page
resp = api_client.get(endpoint, {'order': 'created', 'page_size': 100}).json()
assert resp['count'] is not None
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

# Assert full list is ordered the same as both paginated lists
assert full_page == page_one + page_two


@pytest.mark.django_db()
def test_zarr_pagination(api_client, zarr_archive_factory):
endpoint = '/api/zarr/'

for _ in range(10):
zarr_archive_factory()

resp = api_client.get(endpoint, {'page_size': 5}).json()
assert resp['count'] == 10
page_one = resp['results']
assert len(page_one) == 5

resp = api_client.get(endpoint, {'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

resp = api_client.get(endpoint, {'page_size': 100}).json()
assert resp['count'] == 10
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

assert full_page == page_one + page_two
2 changes: 1 addition & 1 deletion dandiapi/api/views/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
ASSET_ID_PARAM,
VERSIONS_DANDISET_PK_PARAM,
VERSIONS_VERSION_PARAM,
DandiPagination,
)
from dandiapi.api.views.pagination import DandiPagination
from dandiapi.api.views.serializers import (
AssetDetailSerializer,
AssetDownloadQueryParameterSerializer,
Expand Down
14 changes: 0 additions & 14 deletions dandiapi/api/views/common.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,6 @@
from __future__ import annotations

from drf_yasg import openapi
from rest_framework.pagination import PageNumberPagination

max_page_size = 1000


class DandiPagination(PageNumberPagination):
page_size = 100
max_page_size = max_page_size
page_size_query_param = 'page_size'

page_size_query_description = (
f'{PageNumberPagination.page_size_query_description[:-1]} (maximum {max_page_size}).'
)


ASSET_ID_PARAM = openapi.Parameter(
'asset_id',
Expand Down
3 changes: 2 additions & 1 deletion dandiapi/api/views/dandiset.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from dandiapi.api.services.dandiset import create_dandiset, delete_dandiset
from dandiapi.api.services.dandiset.exceptions import UnauthorizedEmbargoAccessError
from dandiapi.api.services.embargo import unembargo_dandiset
from dandiapi.api.views.common import DANDISET_PK_PARAM, DandiPagination
from dandiapi.api.views.common import DANDISET_PK_PARAM
from dandiapi.api.views.pagination import DandiPagination
from dandiapi.api.views.serializers import (
CreateDandisetQueryParameterSerializer,
DandisetDetailSerializer,
Expand Down
102 changes: 102 additions & 0 deletions dandiapi/api/views/pagination.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""
Implement an optimized pagination scheme.
This module provides a custom pagination implementation, as the existing `PageNumberPagination`
class returns a `count` field for every page returned. This can be very inefficient on large tables,
and in reality, the count is only necessary on the first page of results. This module implements
such a pagination scheme, only returning 'count' on the first page of results.
"""

from __future__ import annotations

from collections import OrderedDict

from django.core.paginator import Page, Paginator
from django.utils.functional import cached_property
from rest_framework.pagination import PageNumberPagination
from rest_framework.response import Response


class LazyPage(Page):
"""
A page class that doesn't call .count() on the queryset it's paging from.
This class should be used with `LazyPaginator` unless you know what you're doing.
"""

# Override to store the real object list under self._object_last
def __init__(self, object_list, number, paginator):
self._object_list = list(object_list)
self.number = number
self.paginator = paginator

# Override to prevent returning the extra record
@cached_property
def object_list(self):
return self._object_list[: self.paginator.per_page]

# Because we fetch one extra object to check for more rows, we know that if the number of
# objects returned is the page size or less, we have no more pages.
def has_next(self) -> bool:
return len(self._object_list) > self.paginator.per_page

# Override to prevent calling `.count` on the queryset. To my knowledge we don't use this.
def end_index(self) -> int:
raise NotImplementedError


class LazyPaginator(Paginator):
"""A Paginator that doesn't call .count() on the queryset."""

# Set this to infinity so that inherited code doesn't assume we're done paginating
num_pages = float('inf')

def page(self, number):
"""Return a page with one extra row, used to determine if there are more pages."""
number = self.validate_number(number)
bottom = (number - 1) * self.per_page

# Intentionally fetch one extra to see if there are any more pages left
top = bottom + self.per_page + 1

return self._get_page(self.object_list[bottom:top], number, self)

def _get_page(self, *args, **kwargs):
return LazyPage(*args, **kwargs)


class LazyPagination(PageNumberPagination):
page_size = 100
max_page_size = 1000
page_size_query_param = 'page_size'
django_paginator_class = LazyPaginator

# Set to always false since we only know the full number of pages on page 1
display_page_controls = False

# Define as empty to prevent `get_page_number` from calling `count`
last_page_strings = ()

# Set to None to prevent `paginate_queryset` from setting `display_page_controls` to True
template = None

@cached_property
def page_size_query_description(self):
return f'{super().page_size_query_description[:-1]} (maximum {self.max_page_size}).'

def get_paginated_response(self, data) -> Response:
"""Overridden to only include the count of the queryset on the first page."""
page_dict = OrderedDict(
[
('count', self.page.paginator.count if self.page.number == 1 else None),
('next', self.get_next_link()),
('previous', self.get_previous_link()),
('results', data),
]
)

return Response(page_dict)


# Alias
DandiPagination = LazyPagination
3 changes: 2 additions & 1 deletion dandiapi/api/views/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
from dandiapi.api.models import Dandiset, Version
from dandiapi.api.services.publish import publish_dandiset
from dandiapi.api.tasks import delete_doi_task
from dandiapi.api.views.common import DANDISET_PK_PARAM, VERSION_PARAM, DandiPagination
from dandiapi.api.views.common import DANDISET_PK_PARAM, VERSION_PARAM
from dandiapi.api.views.pagination import DandiPagination
from dandiapi.api.views.serializers import (
VersionDetailSerializer,
VersionMetadataSerializer,
Expand Down
2 changes: 1 addition & 1 deletion dandiapi/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def mutate_configuration(configuration: type[ComposedConfiguration]):

# Pagination
configuration.REST_FRAMEWORK['DEFAULT_PAGINATION_CLASS'] = (
'dandiapi.api.views.common.DandiPagination'
'dandiapi.api.views.pagination.DandiPagination'
)

configuration.REST_FRAMEWORK['EXCEPTION_HANDLER'] = (
Expand Down
2 changes: 1 addition & 1 deletion dandiapi/zarr/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from dandiapi.api.models.dandiset import Dandiset
from dandiapi.api.storage import get_boto_client
from dandiapi.api.views.common import DandiPagination
from dandiapi.api.views.pagination import DandiPagination
from dandiapi.zarr.models import ZarrArchive, ZarrArchiveStatus
from dandiapi.zarr.tasks import ingest_zarr_archive

Expand Down

0 comments on commit dafbe3f

Please sign in to comment.