Skip to content

Commit a72bd44

Browse files
authored
Add endpoints to list and return individual files from tar (#1151)
* In Progress - List contents of input_file and output_file * Filter files with extension in list_tar_file * Implement extracting files using new endpoint * Native tarinfo file check for list file. * Content type and error checking for extract file * Content type and error checking for extract file * Refactor + remove white spaces * Add extract output files * Rename endpoints * Remove accidental database commit * Update gitignore with potential db loc * Add swagger response to tar_list method * Fix pep8 errors * Remove comment
1 parent f642000 commit a72bd44

File tree

5 files changed

+116
-2
lines changed

5 files changed

+116
-2
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ ENV/
100100
db-data/
101101

102102
/src/server/db.sqlite3
103+
/src/server/oasisapi/db.sqlite3
103104
/src/server/media
104105
.DS_Store
105106

src/server/oasisapi/analyses/v2_api/viewsets.py

+40-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from ...decorators import requires_sql_reader
2323
from ...files.v2_api.serializers import RelatedFileSerializer, FileSQLSerializer, NestedRelatedFileSerializer
2424
from ...files.v1_api.views import handle_related_file, handle_json_data, handle_related_file_sql
25+
from ...files.v2_api.views import handle_get_related_file_tar
2526
from ...filters import TimeStampedFilter, CsvMultipleChoiceFilter, CsvModelMultipleChoiceFilter
2627
from ...permissions.group_auth import VerifyGroupAccessModelViewSet, verify_user_is_in_obj_groups
2728
from ...portfolios.models import Portfolio
@@ -31,6 +32,8 @@
3132
RUN_MODE_PARAM,
3233
SUBTASK_STATUS_PARAM,
3334
SUBTASK_SLUG_PARAM,
35+
FILENAME_PARAM,
36+
FILE_LIST_RESPONSE,
3437
)
3538

3639

@@ -419,7 +422,25 @@ def input_file(self, request, pk=None, version=None):
419422
"""
420423
return handle_related_file(self.get_object(), 'input_file', request, ['application/x-gzip', 'application/gzip', 'application/x-tar', 'application/tar'])
421424

422-
@swagger_auto_schema(methods=['get'], responses={200: FILE_RESPONSE})
425+
@swagger_auto_schema(methods=["get"], responses={200: FILE_LIST_RESPONSE})
426+
@action(methods=['get'], detail=True)
427+
def input_file_tar_list(self, request, pk=None, version=None):
428+
"""
429+
get:
430+
List the files in `input_file`.
431+
"""
432+
return handle_get_related_file_tar(self.get_object(), "input_file", request, ["application/x-gzip", "application/gzip", "application/x-tar", "application/tar"])
433+
434+
@swagger_auto_schema(methods=['get'], responses={200: FILE_RESPONSE}, manual_parameters=[FILENAME_PARAM])
435+
@action(methods=['get'], detail=True)
436+
def input_file_tar_extract(self, request, pk=None, version=None):
437+
"""
438+
get:
439+
Extract and get `input_file` content.
440+
"""
441+
return handle_get_related_file_tar(self.get_object(), 'input_file', request, ['application/x-gzip', 'application/gzip', 'application/x-tar', 'application/tar'])
442+
443+
@swagger_auto_schema(methods=["get"], responses={200: FILE_RESPONSE})
423444
@action(methods=['get'], detail=True)
424445
def lookup_errors_file(self, request, pk=None, version=None):
425446
"""
@@ -503,6 +524,24 @@ def output_file(self, request, pk=None, version=None):
503524
"""
504525
return handle_related_file(self.get_object(), 'output_file', request, ['application/x-gzip', 'application/gzip', 'application/x-tar', 'application/tar'])
505526

527+
@swagger_auto_schema(methods=['get'], responses={200: FILE_LIST_RESPONSE})
528+
@action(methods=['get'], detail=True)
529+
def output_file_tar_list(self, request, pk=None, version=None):
530+
"""
531+
get:
532+
List the files in `output_file`.
533+
"""
534+
return handle_get_related_file_tar(self.get_object(), "output_file", request, ["application/x-gzip", "application/gzip", "application/x-tar", "application/tar"])
535+
536+
@swagger_auto_schema(methods=['get'], responses={200: FILE_RESPONSE}, manual_parameters=[FILENAME_PARAM])
537+
@action(methods=['get'], detail=True)
538+
def output_file_tar_extract(self, request, pk=None, version=None):
539+
"""
540+
get:
541+
Extract and get `output_file` content.
542+
"""
543+
return handle_get_related_file_tar(self.get_object(), 'output_file', request, ['application/x-gzip', 'application/gzip', 'application/x-tar', 'application/tar'])
544+
506545
@requires_sql_reader
507546
@swagger_auto_schema(methods=['get'], responses={200: NestedRelatedFileSerializer})
508547
@action(methods=['get'], detail=True)

src/server/oasisapi/files/models.py

+20
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from io import BytesIO
44

55
import pandas as pd
6+
import tarfile
67
from uuid import uuid4
78

89
from django.conf import settings
@@ -25,6 +26,25 @@ def related_file_to_df(RelatedFile):
2526
return pd.read_csv(BytesIO(RelatedFile.read()))
2627

2728

29+
def list_tar_file(RelatedFile):
30+
if not RelatedFile:
31+
return None
32+
33+
tarf = tarfile.open(fileobj=BytesIO(RelatedFile.read()), mode='r')
34+
35+
files = [m.name for m in tarf.getmembers() if m.isfile()]
36+
return files
37+
38+
39+
def extract_file_from_tar(RelatedFile, fname):
40+
if not RelatedFile:
41+
return None
42+
43+
tarf = tarfile.open(fileobj=BytesIO(RelatedFile.read()), mode='r')
44+
45+
return tarf.extractfile(fname)
46+
47+
2848
def random_file_name(instance, filename):
2949
if getattr(instance, "store_as_filename", False):
3050
return filename

src/server/oasisapi/files/v2_api/views.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import io
3+
import os
34
from tempfile import TemporaryFile
45

56
from django.conf import settings
@@ -10,7 +11,7 @@
1011

1112
from oasis_data_manager.df_reader.config import get_df_reader
1213
from oasis_data_manager.df_reader.exceptions import InvalidSQLException
13-
from ..models import RelatedFile
14+
from ..models import RelatedFile, list_tar_file, extract_file_from_tar
1415
from .serializers import RelatedFileSerializer, EXPOSURE_ARGS
1516
from ...permissions.group_auth import verify_user_is_in_obj_groups
1617

@@ -180,6 +181,39 @@ def handle_json_data(parent, field, request, serializer):
180181
return _handle_delete_related_file(parent, field, request)
181182

182183

184+
def handle_get_related_file_tar(parent, field, request, content_types):
185+
f = getattr(parent, field)
186+
if not f:
187+
raise Http404()
188+
189+
verify_user_is_in_obj_groups(request.user, f, 'You do not have permission to read this file')
190+
191+
if 'list' in request.path:
192+
files = list_tar_file(f)
193+
return Response(files)
194+
elif 'extract' in request.path:
195+
filename = request.GET.get('filename', '')
196+
197+
try:
198+
output_buffer = extract_file_from_tar(f, filename)
199+
except KeyError:
200+
raise ValidationError('Invalid filename.')
201+
202+
output_buffer.seek(0)
203+
204+
extension_mapping = {
205+
'parquet': 'application/octet-stream',
206+
'pq': 'application/octet-stream',
207+
'csv': 'text/csv',
208+
'json': 'application/json',
209+
}
210+
211+
content_type = extension_mapping.get(os.path.splitext(filename)[1][1:], None)
212+
response = StreamingHttpResponse(output_buffer, content_type=content_type)
213+
response['Content-Disposition'] = f'attachment; filename="{os.path.basename(filename)}"'
214+
return response
215+
216+
183217
def handle_related_file_sql(parent, field, request, sql, m2m_file_pk=None):
184218
requested_format = request.GET.get('file_format', None)
185219
f = getattr(parent, field)

src/server/oasisapi/schemas/custom_swagger.py

+20
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
__all__ = [
22
'FILE_RESPONSE',
3+
'FILE_LIST_RESPONSE',
34
'HEALTHCHECK',
45
'TOKEN_REFRESH_HEADER',
56
'FILE_FORMAT_PARAM',
67
'RUN_MODE_PARAM',
78
'SUBTASK_STATUS_PARAM',
89
'SUBTASK_SLUG_PARAM',
910
'FILE_VALIDATION_PARAM',
11+
'FILENAME_PARAM',
1012
]
1113

1214
from drf_yasg import openapi
@@ -28,6 +30,16 @@
2830

2931
})
3032

33+
34+
FILE_LIST_RESPONSE = openapi.Response(
35+
"File List",
36+
schema=Schema(
37+
type=openapi.TYPE_ARRAY,
38+
items=Schema(title="File Name", type=openapi.TYPE_STRING),
39+
),
40+
)
41+
42+
3143
HEALTHCHECK = Schema(
3244
title='HealthCheck',
3345
type='object',
@@ -109,3 +121,11 @@
109121
description="Validate OED files on upload, default `True`",
110122
type=openapi.TYPE_BOOLEAN,
111123
)
124+
125+
FILENAME_PARAM = openapi.Parameter(
126+
'filename',
127+
openapi.IN_QUERY,
128+
required=True,
129+
description="Filename to extract from tarfile.",
130+
type=openapi.TYPE_STRING,
131+
)

0 commit comments

Comments
 (0)