Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
3061d94
discriminate nginx and direct requests and either use nginx or tornad…
sjanssen2 Sep 17, 2025
cfe606a
codestyle
sjanssen2 Sep 17, 2025
71526bb
don't complain about overwriting files IF in test mode
sjanssen2 Sep 18, 2025
c7924ca
don't read config from file, but from DB
sjanssen2 Sep 18, 2025
ae888a8
allow to push whole directories to qiita main
sjanssen2 Sep 18, 2025
81cb696
extended FetchFileFromCentralHandler to directories, but very limited…
sjanssen2 Nov 6, 2025
8ce0cdb
assert presence/absence of directory transfer flag
sjanssen2 Nov 6, 2025
736d1fd
codestyle
sjanssen2 Nov 6, 2025
f853f56
Merge pull request #13 from jlab/tornado_FetchFileFromCentralHandler_…
sjanssen2 Nov 6, 2025
c6f8357
adding debug information
sjanssen2 Nov 7, 2025
1ff9892
move debug to individual files
sjanssen2 Nov 7, 2025
4bd4a18
more debug when composing zip
sjanssen2 Nov 7, 2025
14d43f1
debug
sjanssen2 Nov 7, 2025
fc367bb
modify nginx file list for ZIP
sjanssen2 Nov 7, 2025
043c6be
use correct function
sjanssen2 Nov 7, 2025
3cb9cf7
fix path computation
sjanssen2 Nov 7, 2025
2cc787d
clean up
sjanssen2 Nov 7, 2025
9441772
clean download.py
sjanssen2 Nov 7, 2025
c446bf9
Merge pull request #14 from jlab/tornado_FetchFileFromCentralHandler_…
sjanssen2 Nov 7, 2025
017b150
add ability to delete files/dirs through API, but only in qiita test …
sjanssen2 Nov 12, 2025
7604d84
also delete non managed dirs
sjanssen2 Nov 12, 2025
50a1788
avoid deleting BIOM sub-dir :-/
sjanssen2 Nov 12, 2025
d3cf5f5
Merge pull request #15 from jlab/tornado_FetchFileFromCentralHandler_…
sjanssen2 Nov 12, 2025
dcedd02
also allow downloading of html summary directories
sjanssen2 Nov 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion qiita_pet/handlers/cloud_handlers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
from .file_transfer_handlers import (FetchFileFromCentralHandler,
PushFileToCentralHandler)
PushFileToCentralHandler,
DeleteFileFromCentralHandler)
from qiita_core.util import is_test_environment

__all__ = ['FetchFileFromCentralHandler']

ENDPOINTS = [
(r"/cloud/fetch_file_from_central/(.*)", FetchFileFromCentralHandler),
(r"/cloud/push_file_to_central/", PushFileToCentralHandler)
]

if is_test_environment():
ENDPOINTS.append(
(r"/cloud/delete_file_from_central/(.*)",
DeleteFileFromCentralHandler))
244 changes: 226 additions & 18 deletions qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,78 @@
import os
from pathlib import Path

from tornado.web import HTTPError, RequestHandler
from tornado.gen import coroutine
import zipfile
from io import BytesIO
from shutil import rmtree

from qiita_core.util import execute_as_transaction
from qiita_core.util import execute_as_transaction, is_test_environment
from qiita_db.handlers.oauth2 import authenticate_oauth
from qiita_pet.handlers.download import BaseHandlerDownload
from qiita_core.qiita_settings import qiita_config
import qiita_db as qdb


def is_directory(filepath):
"""Tests if given filepath is listed as directory in Qiita DB.

Note: this is independent of the actual filesystem, only checks DB entries.

Parameters
----------
filepath : str
The filepath to the directory that shall be tested for beeing listed
as directory in Qiita's DB

Returns
-------
Bool: True if the last part of the filepath is contained as filepath in
qiita.filepath AND part after base_data_dir is a mountpoint in
qiita.data_directory AND the filepath_type is 'directory or
'html_summary_dir'.
False otherwise.
"""
working_filepath = filepath
# chop off trailing / to ensure we point to a directory name properly
if working_filepath.endswith(os.sep):
working_filepath = os.path.dirname(working_filepath)

dirname = os.path.basename(working_filepath)
# file-objects foo are stored in <base_data_dir>/<mountpoint>/foo. To
# determine mountpoint from a given filepath, we need to chop of
# base_data_dir and then take the top directory level.
# Checking if user provided filepath contains a valid mountpoint adds
# to preventing users to download arbitrary file contents
try:
mount_dirname = Path(working_filepath).relative_to(
Path(qiita_config.base_data_dir)).parts[0]
except ValueError:
# base_data_dir is no proper prefix of given filepath
return False
except IndexError:
# only base_data_dir given
return False
if dirname == '' or mount_dirname == '':
# later should never be true due to above IndexError, but better save
# than sorry
return False

with qdb.sql_connection.TRN:
# find entries that
# a) are of filepath_type "directory" or "html_summary_dir"
# b) whose filepath ends with directory name
# c) whose mountpoint matches the provided parent_directory
sql = """SELECT filepath_id
FROM qiita.filepath
JOIN qiita.filepath_type USING (filepath_type_id)
JOIN qiita.data_directory USING (data_directory_id)
WHERE filepath_type IN ('directory', 'html_summary_dir') AND
filepath=%s AND
position(%s in mountpoint)>0;"""
qdb.sql_connection.TRN.add(sql, [dirname, mount_dirname])
hits = qdb.sql_connection.TRN.execute_fetchflatten()
return len(hits) > 0


class FetchFileFromCentralHandler(RequestHandler):
Expand Down Expand Up @@ -37,20 +104,95 @@ def get(self, requested_filepath):
raise HTTPError(403, reason=(
"The requested file is not present in Qiita's BASE_DATA_DIR!"))

# delivery of the file via nginx requires replacing the basedatadir
# with the prefix defined in the nginx configuration for the
# base_data_dir, '/protected/' by default
protected_filepath = filepath.replace(basedatadir, '/protected')
filename_directory = "qiita-main-data.zip"
if os.path.isdir(filepath):
# Test if this directory is managed by Qiita's DB as directory
# Thus we can prevent that a lazy client simply downloads the whole
# basa_data_directory
if not is_directory(filepath):
raise HTTPError(403, reason=(
"You cannot access this directory!"))
else:
# flag the response for qiita_client
self.set_header('Is-Qiita-Directory', 'yes')

self.set_header('Content-Type', 'application/octet-stream')
self.set_header('Content-Transfer-Encoding', 'binary')
self.set_header('X-Accel-Redirect', protected_filepath)
self.set_header('Content-Description', 'File Transfer')
self.set_header('Expires', '0')
self.set_header('Cache-Control', 'no-cache')
self.set_header('Content-Disposition',
'attachment; filename=%s' % os.path.basename(
protected_filepath))

# We here need to differentiate a request that comes directly to the
# qiita instance (happens in testing) or was redirected through nginx
# (should be the default). If nginx, we can use nginx' fast file
# delivery mechanisms, otherwise, we need to send via slower tornado.
# We indirectly infer this by looking for the "X-Forwarded-For" header,
# which should only exists when redirectred through nginx.
if self.request.headers.get('X-Forwarded-For') is None:
# delivery via tornado
if not is_directory(filepath):
# a single file
self.set_header(
'Content-Disposition',
'attachment; filename=%s' % os.path.basename(filepath))
with open(filepath, "rb") as f:
self.write(f.read())
else:
# a whole directory
memfile = BytesIO()
with zipfile.ZipFile(memfile, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, dirs, files in os.walk(filepath):
for file in files:
full_path = os.path.join(root, file)
# make path in zip file relative
rel_path = os.path.relpath(full_path, filepath)
zf.write(full_path, rel_path)
memfile.seek(0)
self.set_header('Content-Type', 'application/zip')
self.set_header('Content-Disposition',
'attachment; filename=%s' % filename_directory)
self.write(memfile.read())
else:
# delivery via nginx
if not is_directory(filepath):
# a single file:
# delivery of the file via nginx requires replacing the
# basedatadir with the prefix defined in the nginx
# configuration for the base_data_dir, '/protected/' by default
protected_filepath = filepath.replace(basedatadir,
'/protected')
self.set_header('X-Accel-Redirect', protected_filepath)
self.set_header(
'Content-Disposition',
'attachment; filename=%s' % os.path.basename(
protected_filepath))
else:
# a whole directory
to_download = BaseHandlerDownload._list_dir_files_nginx(
self, filepath)

# fp_subdir is the part of the filepath the user requested,
# without QIITA_BASE_DIR
fp_subdir = os.path.relpath(filepath, basedatadir)

# above function adds filepath to located files, which is
# different from the non-nginx version, e.g.
# fp = /protected/job/2_test_folder/testdir/fileA.txt
# fp_name = job/2_test_folder/testdir/fileA.txt
# where "job/2_test_folder" is what user requested and
# "testdir/fileA.txt" is a file within this directory.
# When extracting by qiita_client, the "job/2_test_folder"
# part would be added twice (one by user request, second by
# unzipping). Therefore, we need to correct these names here:
to_download = [
(fp, os.path.relpath(fp_name, fp_subdir), fp_checksum,
fp_size)
for fp, fp_name, fp_checksum, fp_size
in to_download]
BaseHandlerDownload._write_nginx_file_list(self, to_download)
BaseHandlerDownload._set_nginx_headers(
self, filename_directory)

self.finish()


Expand All @@ -65,30 +207,96 @@ def post(self):
# canonic version of base_data_dir
basedatadir = os.path.abspath(qiita_config.base_data_dir)
stored_files = []
stored_directories = []

for filespath, filelist in self.request.files.items():
if filespath.startswith(basedatadir):
filespath = filespath[len(basedatadir):]

for file in filelist:
# differentiate between regular files and whole directories,
# which must be zipped AND the client must provide the
# is_directory='true' body argument.
sent_directory = self.get_body_argument(
'is_directory', "false") == "true"

filepath = os.path.join(filespath, file['filename'])
# remove leading /
if filepath.startswith(os.sep):
filepath = filepath[len(os.sep):]
filepath = os.path.abspath(os.path.join(basedatadir, filepath))

if os.path.exists(filepath):
if sent_directory:
# if a whole directory was send, we want to store it at
# the given dirname of the filepath
filepath = os.path.dirname(filepath)

# prevent overwriting existing files, except in test mode
if os.path.exists(filepath) and (not is_test_environment()):
raise HTTPError(403, reason=(
"The requested file is already "
"present in Qiita's BASE_DATA_DIR!"))
"The requested %s is already "
"present in Qiita's BASE_DATA_DIR!" %
('directory' if sent_directory else 'file')))

os.makedirs(os.path.dirname(filepath), exist_ok=True)
with open(filepath, "wb") as f:
f.write(file['body'])
stored_files.append(filepath)
if sent_directory:
with zipfile.ZipFile(BytesIO(file['body'])) as zf:
zf.extractall(filepath)
stored_directories.append(filepath)
else:
with open(filepath, "wb") as f:
f.write(file['body'])
stored_files.append(filepath)

for (_type, objs) in [('files', stored_files),
('directories', stored_directories)]:
if len(objs) > 0:
self.write(
"Stored %i %s into BASE_DATA_DIR of Qiita:\n%s\n" % (
len(objs),
_type,
'\n'.join(map(lambda x: ' - %s' % x, objs))))

self.finish()


class DeleteFileFromCentralHandler(RequestHandler):
# Note: this function is NOT available in productive instances!
@authenticate_oauth
@coroutine
@execute_as_transaction
def get(self, requested_filepath):
if not is_test_environment():
raise HTTPError(403, reason=(
"You cannot delete files through this API endpoint, when "
"Qiita is not in test-mode!"))

# ensure we have an absolute path, i.e. starting at /
filepath = os.path.join(os.path.sep, requested_filepath)
# use a canonic version of the filepath
filepath = os.path.abspath(filepath)

# canonic version of base_data_dir
basedatadir = os.path.abspath(qiita_config.base_data_dir)

if not filepath.startswith(basedatadir):
# attempt to access files outside of the BASE_DATA_DIR
raise HTTPError(403, reason=(
"You cannot delete file '%s', which is outside of "
"the BASE_DATA_DIR of Qiita!" % filepath))

if not os.path.exists(filepath):
raise HTTPError(403, reason=(
"The requested file %s is not present "
"in Qiita's BASE_DATA_DIR!" % filepath))

self.write("Stored %i files into BASE_DATA_DIR of Qiita:\n%s\n" % (
len(stored_files),
'\n'.join(map(lambda x: ' - %s' % x, stored_files))))
if os.path.isdir(filepath):
rmtree(filepath)
self.write("Deleted directory %s from BASE_DATA_DIR of QIita" %
filepath)
else:
os.remove(filepath)
self.write("Deleted file %s from BASE_DATA_DIR of Qiita" %
filepath)

self.finish()
Loading
Loading