Skip to content

Commit

Permalink
Merge pull request #312 from dandi/292-paths-endpoint
Browse files Browse the repository at this point in the history
Update asset paths endpoint to include more information
  • Loading branch information
jjnesbitt authored Jul 19, 2021
2 parents ac0c61a + 76cad87 commit ef3a682
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 72 deletions.
23 changes: 1 addition & 22 deletions dandiapi/api/models/asset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import datetime
from typing import Dict, List, Set
from typing import Dict
from urllib.parse import urlparse, urlunparse
import uuid

Expand Down Expand Up @@ -210,27 +210,6 @@ def strip_metadata(cls, metadata):
def __str__(self) -> str:
return self.path

@classmethod
def get_path(cls, path_prefix: str, qs: List[str]) -> Set:
"""
Return the unique files/directories that directly reside under the specified path.
The specified path must be a folder (must end with a slash).
"""
if not path_prefix:
path_prefix = '/'
prefix_parts = [part for part in path_prefix.split('/') if part]
paths = set()
for asset in qs:
path_parts = [part for part in asset['path'].split('/') if part]

# Pivot index is -1 (include all path parts) if prefix is '/'
pivot_index = path_parts.index(prefix_parts[-1]) if len(prefix_parts) else -1
base_path, *remainder = path_parts[pivot_index + 1 :]
paths.add(f'{base_path}/' if len(remainder) else base_path)

return sorted(paths)

@classmethod
def total_size(cls):
return (
Expand Down
103 changes: 66 additions & 37 deletions dandiapi/api/tests/test_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import pytest
import requests

from dandiapi.api.models import Asset, AssetBlob
from dandiapi.api.models import Asset, AssetBlob, Version
from dandiapi.api.views.serializers import AssetFolderSerializer, AssetSerializer

from .fuzzy import HTTP_URL_RE, TIMESTAMP_RE, URN_RE, UUID_RE

Expand All @@ -16,22 +17,73 @@

@pytest.mark.django_db
@pytest.mark.parametrize(
'path,qs,expected',
'path,asset_paths,expected',
[
('', [{'path': '/foo'}, {'path': '/bar/baz'}], ['bar/', 'foo']),
('/', [{'path': '/foo'}, {'path': '/bar/baz'}], ['bar/', 'foo']),
('////', [{'path': '/foo'}, {'path': '/bar/baz'}], ['bar/', 'foo']),
('a', [{'path': '/a/b'}, {'path': '/a/c/d'}], ['b', 'c/']),
('a/', [{'path': '/a/b'}, {'path': '/a/c/d'}], ['b', 'c/']),
('/a', [{'path': '/a/b'}, {'path': '/a/c/d'}], ['b', 'c/']),
('/a/', [{'path': '/a/b'}, {'path': '/a/c/d'}], ['b', 'c/']),
('a', [{'path': 'a/b'}, {'path': 'a/c/d'}], ['b', 'c/']),
('a', [{'path': 'a/b/'}, {'path': 'a/c/d/'}], ['b', 'c/']),
('a', [{'path': '/a/b/'}, {'path': '/a/c/d/'}], ['b', 'c/']),
('', ['foo', 'bar/baz'], {'folders': ['bar'], 'files': ['foo']}),
# ('', ['/foo', '/bar/baz'], {'folders': ['bar'], 'files': ['foo']}), # edge case
('/', ['/foo', '/bar/baz'], {'folders': ['bar'], 'files': ['foo']}),
('', ['foo/bar', 'foo/baz', 'foo/boo'], {'folders': ['foo'], 'files': []}),
('/', ['foo/bar', 'foo/baz', 'foo/boo'], {'folders': [], 'files': []}), # Negative test
('////', ['/foo', '/bar/baz'], {'folders': [], 'files': []}),
('a', ['a/b', 'a/c/d'], {'folders': ['c'], 'files': ['b']}),
('a/', ['a/b', 'a/c/d'], {'folders': ['c'], 'files': ['b']}),
('/a', ['/a/b', '/a/c/d'], {'folders': ['c'], 'files': ['b']}),
('/a/', ['/a/b', '/a/c/d'], {'folders': ['c'], 'files': ['b']}),
],
)
def test_asset_get_path(path, qs, expected):
assert expected == Asset.get_path(path, qs)
def test_asset_rest_path(
api_client,
draft_version_factory,
asset_factory,
asset_blob_factory,
path,
asset_paths,
expected,
):
# Initialize version and contained assets
asset_blob = asset_blob_factory()
assets = [asset_factory(blob=asset_blob, path=p) for p in asset_paths]
version: Version = draft_version_factory()
for asset in assets:
version.assets.add(asset)

# Retrieve paths from endpointF
paths = api_client.get(
f'/api/dandisets/{version.dandiset.identifier}/'
f'versions/{version.version}/assets/paths/',
{'path_prefix': path},
).data

# Ensure slash between path prefix and folders/files
query_prefix = path
if query_prefix and query_prefix[-1] != '/':
query_prefix = f'{query_prefix}/'

# Do folder assertions
for folder_path in expected['folders']:
assert folder_path in paths['folders']

folder_entry = paths['folders'][folder_path]
folder_assets = list(
Asset.objects.all().filter(path__startswith=f'{query_prefix}{folder_path}')
)
serialized_folder = AssetFolderSerializer(
{
'created': min(asset.created for asset in folder_assets),
'modified': max(asset.modified for asset in folder_assets),
'size': sum(asset.size for asset in folder_assets),
'num_files': len(folder_assets),
}
).data

assert folder_entry == serialized_folder

# Do file assertions
for file_path in expected['files']:
assert file_path in paths['files']

asset: Asset = Asset.objects.get(path=f'{query_prefix}{file_path}')
assert paths['files'][file_path] == AssetSerializer(asset).data


@pytest.mark.django_db
Expand Down Expand Up @@ -574,26 +626,3 @@ def test_asset_direct_download_head(api_client, storage, version, asset):

with asset.blob.blob.file.open('rb') as reader:
assert download.content == reader.read()


@pytest.mark.django_db
@pytest.mark.parametrize(
'path_prefix,results',
[
('', ['foo/', 'no-root.nwb', 'root.nwb']),
('/', ['foo/', 'root.nwb']),
('/foo', ['bar/', 'baz.nwb']),
('/foo/', ['bar/', 'baz.nwb']),
],
)
def test_asset_rest_path_filter(api_client, version, asset_factory, path_prefix, results):
paths = ['/foo/bar/file.nwb', '/foo/baz.nwb', '/root.nwb', 'no-root.nwb']
for path in paths:
version.assets.add(asset_factory(path=path))
partial_path_assets = api_client.get(
f'/api/dandisets/{version.dandiset.identifier}/'
f'versions/{version.version}/assets/paths/',
{'path_prefix': path_prefix},
).data

assert partial_path_assets == results
49 changes: 36 additions & 13 deletions dandiapi/api/views/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from dandiapi.api.views.common import DandiPagination
from dandiapi.api.views.serializers import (
AssetDetailSerializer,
AssetPathsSerializer,
AssetSerializer,
AssetValidationSerializer,
)
Expand Down Expand Up @@ -299,27 +300,49 @@ def download(self, request, **kwargs):
manual_parameters=[
openapi.Parameter('path_prefix', openapi.IN_QUERY, type=openapi.TYPE_STRING)
],
responses={
200: openapi.Schema(
type=openapi.TYPE_ARRAY,
items=openapi.Schema(type=openapi.TYPE_STRING),
)
},
responses={200: AssetPathsSerializer()},
)
@action(detail=False, methods=['GET'])
def paths(self, request, **kwargs):
def paths(self, request, versions__dandiset__pk: str, versions__version: str, **kwargs):
"""
Return the unique files/directories that directly reside under the specified path.
The specified path must be a folder; it either must end in a slash or
(to refer to the root folder) must be the empty string.
"""
path_prefix: str = self.request.query_params.get('path_prefix') or ''
# Enforce trailing slash
if path_prefix and path_prefix[-1] != '/':
path_prefix = f'{path_prefix}/'
qs = self.get_queryset().filter(path__startswith=path_prefix).values()

return Response(Asset.get_path(path_prefix, qs))
qs = self.get_queryset().select_related('blob').filter(path__startswith=path_prefix)

folders = {}
files = {}

for asset in qs:
# Get the remainder of the path after path_prefix
base_path: str = asset.path[len(path_prefix) :].strip('/')

# Since we stripped slashes, any remaining slashes indicate a folder
folder_index = base_path.find('/')
is_folder = folder_index >= 0

if not is_folder:
files[base_path] = AssetSerializer(asset).data
else:
base_path = base_path[:folder_index]
entry = folders.get(base_path)
if entry is None:
folders[base_path] = {
'size': asset.size,
'num_files': 1,
'created': asset.created,
'modified': asset.modified,
}
else:
entry['size'] += asset.size
entry['num_files'] += 1
entry['created'] = min(entry['created'], asset.created) # earliest
entry['modified'] = max(entry['modified'], asset.modified) # latest

paths = AssetPathsSerializer({'folders': folders, 'files': files})
return Response(paths.data)

# TODO: add create to forge an asset from a validation
12 changes: 12 additions & 0 deletions dandiapi/api/views/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,15 @@ class Meta(AssetSerializer.Meta):
fields = AssetSerializer.Meta.fields + ['metadata']

metadata = serializers.SlugRelatedField(read_only=True, slug_field='metadata')


class AssetFolderSerializer(serializers.Serializer):
size = serializers.IntegerField()
num_files = serializers.IntegerField()
created = serializers.DateTimeField()
modified = serializers.DateTimeField()


class AssetPathsSerializer(serializers.Serializer):
folders = serializers.DictField(child=AssetFolderSerializer())
files = serializers.DictField(child=AssetSerializer())

0 comments on commit ef3a682

Please sign in to comment.