Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement file path serving at the level of the MiG WSGI wrapper. #129

Closed
wants to merge 34 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
3944a44
Extend MigTestCase with such that it can provide a configuration object.
albu-diku Sep 26, 2024
69aad05
fix path overrides in python3 when generating the testconfig
albu-diku Oct 2, 2024
18fd097
Basic coverage of migwsgi.
albu-diku Jul 26, 2024
d932375
wip
albu-diku Aug 1, 2024
c718999
fixup
albu-diku Aug 2, 2024
1030c2a
fixup
albu-diku Aug 2, 2024
30520dc
updare and relocate a comment
albu-diku Aug 5, 2024
6140621
start tightening up the code
albu-diku Aug 5, 2024
b6eb4c3
shift things around a little
albu-diku Aug 5, 2024
55ed288
work to make it readable with a nod towards further tests
albu-diku Aug 5, 2024
c4ffa9f
assert the response status
albu-diku Aug 5, 2024
43e1f8e
allow programming the response
albu-diku Aug 5, 2024
b20490f
repair previous
albu-diku Aug 5, 2024
8ce5e03
assert that a programmed title ends up in the page
albu-diku Aug 5, 2024
9e7df04
line naming up with other recent work in grid_openid
albu-diku Aug 14, 2024
b4933a0
fixup
albu-diku Aug 16, 2024
0ba4f50
fixup
albu-diku Sep 23, 2024
0872083
fixup
albu-diku Sep 23, 2024
b1e3840
fixup
albu-diku Sep 23, 2024
366a42c
split the testing infrastructure across multiple files
albu-diku Sep 24, 2024
9e554ff
collect common default kwargs
albu-diku Sep 24, 2024
9149a5b
use noop for set environ
albu-diku Sep 24, 2024
93cafe3
make the generic WSGI handling setup code more uniform
albu-diku Sep 24, 2024
4707173
bring over improvements to hmtlsupp from another branch
albu-diku Sep 25, 2024
5ffe792
simplify
albu-diku Oct 2, 2024
f9859bd
fixup
albu-diku Oct 2, 2024
e9444d4
Make responding with binary data work under PY3.
albu-diku Sep 24, 2024
f9d018b
WIP - cat paths
albu-diku Sep 25, 2024
3c212e1
adapt
albu-diku Oct 2, 2024
a8207df
limit
albu-diku Oct 2, 2024
ddc3787
fixup
albu-diku Oct 2, 2024
88afad8
abandon ship on float('inf') and use -1 for limit
albu-diku Oct 2, 2024
fea9679
carve out as a function
albu-diku Oct 2, 2024
c1246c9
tweak + lint
albu-diku Oct 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions envhelp/makeconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,17 @@ def write_testconfig(env_name, is_py2=False):
'destination': os.path.join(_ENVHELP_OUTPUT_DIR, confs_name),
'destination_suffix': "-py%s" % ('2' if is_py2 else '3',),
}

if is_py2:
overrides.update(**{
'mig_code': '/usr/src/app/mig',
'mig_certs': '/usr/src/app/envhelp/output/certs',
'mig_state': '/usr/src/app/envhelp/output/state',
})
conf_dir_path = '/usr/src/app'
else:
conf_dir_path = _ENVHELP_OUTPUT_DIR
overrides.update(**{
'mig_code': os.path.join(conf_dir_path, 'mig'),
'mig_certs': os.path.join(conf_dir_path, 'certs'),
'mig_state': os.path.join(conf_dir_path, 'state'),
})

generate_confs(_ENVHELP_OUTPUT_DIR, **overrides)


Expand Down
10 changes: 6 additions & 4 deletions mig/shared/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,8 +516,9 @@ def force_utf8_rec(input_obj, highlight=''):
if isinstance(input_obj, dict):
return {force_utf8_rec(i, highlight): force_utf8_rec(j, highlight) for (i, j) in
input_obj.items()}
elif isinstance(input_obj, list):
return [force_utf8_rec(i, highlight) for i in input_obj]
elif isinstance(input_obj, (list, tuple)):
thetype = type(input_obj)
return thetype(force_utf8_rec(i, highlight) for i in input_obj)
elif is_unicode(input_obj):
return force_utf8(input_obj, highlight)
else:
Expand All @@ -544,8 +545,9 @@ def force_unicode_rec(input_obj, highlight=''):
if isinstance(input_obj, dict):
return {force_unicode_rec(i, highlight): force_unicode_rec(j, highlight) for (i, j) in
input_obj.items()}
elif isinstance(input_obj, list):
return [force_unicode_rec(i, highlight) for i in input_obj]
elif isinstance(input_obj, (list, tuple)):
thetype = type(input_obj)
return thetype(force_utf8_rec(i, highlight) for i in input_obj)
elif not is_unicode(input_obj):
return force_unicode(input_obj, highlight)
else:
Expand Down
2 changes: 2 additions & 0 deletions mig/shared/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ def fix_missing(config_file, verbose=True):
'trac_id_field': 'email',
'migserver_http_url': 'http://%%(server_fqdn)s',
'migserver_https_url': '',
'migserver_server_maxsize': -1,
'myfiles_py_location': '',
'mig_server_id': '%s.0' % fqdn,
'empty_job_name': 'no_suitable_job-',
Expand Down Expand Up @@ -596,6 +597,7 @@ class Configuration:
migserver_https_mig_oidc_url = ''
migserver_https_ext_oidc_url = ''
migserver_https_sid_url = ''
migserver_server_maxsize = -1
sleep_period_for_empty_jobs = ''
min_seconds_between_live_update_requests = 0
cputime_for_empty_jobs = 0
Expand Down
4 changes: 4 additions & 0 deletions mig/shared/returnvalues.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@
USER_NOT_CREATED = (201, 'USER_NOT_CREATED')
OUTPUT_VALIDATION_ERROR = (202, 'The output the server '
+ 'has generated could not be validated')

# REQUEST ERRORS

REJECTED_ERROR = (422, 'REJECTED')
129 changes: 99 additions & 30 deletions mig/wsgi-bin/migwsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#

import cgi
import codecs
import importlib
import os
import sys
Expand All @@ -35,6 +36,7 @@
from mig.shared.bailout import bailout_helper, crash_helper, compact_string
from mig.shared.base import requested_backend, allow_script, \
is_default_str_coding, force_default_str_coding_rec
from mig.shared.compat import PY2
from mig.shared.defaults import download_block_size, default_fs_coding
from mig.shared.conf import get_configuration_object
from mig.shared.objecttypes import get_object_type_info
Expand All @@ -43,14 +45,53 @@
from mig.shared.scriptinput import fieldstorage_to_dict


if PY2:
def _ensure_encoded_string(chunk):
return chunk
else:
def _ensure_encoded_string(chunk):
return codecs.encode(chunk, 'utf8')


def _import_backend(backend):
import_path = 'mig.shared.functionality.%s' % backend
module_handle = importlib.import_module(import_path)
return module_handle.main


def _returnvalue_to_status(returnvalue):
return ' '.join((str(item) for item in returnvalue))


def object_type_info(object_type):
"""Lookup object type"""

return get_object_type_info(object_type)


def stub(configuration, client_id, import_path, backend, user_arguments_dict,
environ):
def serve_paths(configuration, paths, start_response):
serve_maxsize = configuration.migserver_server_maxsize

serve_paths_stat_results = (os.stat(path) for path in paths)
serve_paths_total_bytes = sum(st.st_size for st in serve_paths_stat_results)

if serve_maxsize > -1 and serve_paths_total_bytes > serve_maxsize:
start_response(_returnvalue_to_status(returnvalues.REJECTED_ERROR), {})
yield b''
return

# we are all good to respond.. do so
start_response(_returnvalue_to_status(returnvalues.OK), {
'Content-Type': 'application/octet-stream',
'Transfer-Encoding': 'chunked',
})

for path in paths:
with open(path, 'rb') as path_handle:
yield path_handle.read()


def stub(configuration, client_id, user_arguments_dict, environ, _retrieve_handler):
"""Run backend on behalf of client_id with supplied user_arguments_dict.
I.e. import main from import_path and execute it with supplied arguments.
"""
Expand All @@ -61,6 +102,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
before_time = time.time()

output_objects = []
backend = 'UNKNOWN'
main = dummy_main

# _logger.debug("stub for backend %r" % backend)
Expand All @@ -69,10 +111,12 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
# NEVER print/output it verbatim before it is validated below.

try:
default_page = configuration.site_landing_page # TODO: avoid doing this work a second time
backend = requested_backend(environ, fallback=default_page)
valid_backend_name(backend)
except InputException as iex:
_logger.error("%s refused to import invalid backend %r (%s): %s" %
(_addr, backend, import_path, iex))
_logger.error("%s refused to import invalid backend %r: %s" %
(_addr, backend, iex))
bailout_helper(configuration, backend, output_objects,
header_text='User Error')
output_objects.extend([
Expand All @@ -81,41 +125,40 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
{'object_type': 'link', 'text': 'Go to default interface',
'destination': configuration.site_landing_page}
])
return (output_objects, returnvalues.CLIENT_ERROR)
return backend, (output_objects, returnvalues.CLIENT_ERROR)

try:
# Import main from backend module

# _logger.debug("import main from %r" % import_path)
# NOTE: dynamic module loading to find corresponding main function
module_handle = importlib.import_module(import_path)
main = module_handle.main
main = _retrieve_handler(backend)
except Exception as err:
_logger.error("%s could not import %r (%s): %s" %
(_addr, backend, import_path, err))
_logger.error("%s could not import %r: %s" %
(_addr, backend, err))
bailout_helper(configuration, backend, output_objects)
output_objects.extend([
{'object_type': 'error_text', 'text':
'Could not load backend: %s' % html_escape(backend)},
{'object_type': 'link', 'text': 'Go to default interface',
'destination': configuration.site_landing_page}
])
return (output_objects, returnvalues.SYSTEM_ERROR)
return backend, (output_objects, returnvalues.SYSTEM_ERROR)

# _logger.debug("imported main %s" % main)

# Now backend value is validated to be safe for output

if not isinstance(user_arguments_dict, dict):
_logger.error("%s invalid user args %s for %s" % (_addr,
_logger.error("%s invalid user args %s for backend %r" % (_addr,
user_arguments_dict,
import_path))
backend))
bailout_helper(configuration, backend, output_objects,
header_text='Input Error')
output_objects.append(
{'object_type': 'error_text', 'text':
'User input is not on expected format!'})
return (output_objects, returnvalues.INVALID_ARGUMENT)
return backend, (output_objects, returnvalues.INVALID_ARGUMENT)

try:
(output_objects, (ret_code, ret_msg)) = main(client_id,
Expand All @@ -125,7 +168,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
_logger.error("%s script crashed:\n%s" % (_addr,
traceback.format_exc()))
crash_helper(configuration, backend, output_objects)
return (output_objects, returnvalues.ERROR)
return backend, (output_objects, returnvalues.ERROR)

(val_ret, val_msg) = validate(output_objects)
if not val_ret:
Expand All @@ -138,7 +181,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict,
after_time = time.time()
output_objects.append({'object_type': 'timing_info', 'text':
"done in %.3fs" % (after_time - before_time)})
return (output_objects, (ret_code, ret_msg))
return backend, (output_objects, (ret_code, ret_msg))


def wrap_wsgi_errors(environ, configuration, max_line_len=100):
Expand Down Expand Up @@ -193,6 +236,14 @@ def application(environ, start_response):
*start_response* is a helper function used to deliver the client response.
"""

def _set_os_environ(value):
os.environ = value

return _application(None, environ, start_response, _set_environ=_set_os_environ, _wrap_wsgi_errors=wrap_wsgi_errors)


def _application(configuration, environ, start_response, _set_environ, _fieldstorage_to_dict=fieldstorage_to_dict, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):

# NOTE: pass app environ including apache and query args on to sub handlers
# through the usual 'os.environ' channel expected in functionality
# handlers. Special care is needed to avoid various sub-interpreter
Expand Down Expand Up @@ -235,18 +286,20 @@ def application(environ, start_response):
os_env_value))

# Assign updated environ to LOCAL os.environ for the rest of this session
os.environ = environ
_set_environ(environ)

# NOTE: redirect stdout to stderr in python 2 only. It breaks logger in 3
# and stdout redirection apparently is already handled there.
if sys.version_info[0] < 3:
sys.stdout = sys.stderr

configuration = get_configuration_object()
if configuration is None:
configuration = get_configuration_object(_config_file, _skip_log)

_logger = configuration.logger

# NOTE: replace default wsgi errors to apache error log with our own logs
wrap_wsgi_errors(environ, configuration)
_wrap_wsgi_errors(environ, configuration)

for line in env_sync_status:
_logger.debug(line)
Expand Down Expand Up @@ -298,22 +351,20 @@ def application(environ, start_response):
default_page = configuration.site_landing_page
script_name = requested_backend(environ, fallback=default_page,
strip_ext=False)
backend = requested_backend(environ, fallback=default_page)
# _logger.debug('DEBUG: wsgi found backend %s and script %s' %
# (backend, script_name))
fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'],
environ=environ)
user_arguments_dict = fieldstorage_to_dict(fieldstorage)
user_arguments_dict = _fieldstorage_to_dict(fieldstorage)
if 'output_format' in user_arguments_dict:
output_format = user_arguments_dict['output_format'][0]

module_path = 'mig.shared.functionality.%s' % backend
(allow, msg) = allow_script(configuration, script_name, client_id)
if allow:
# _logger.debug("wsgi handling script: %s" % script_name)
(output_objs, ret_val) = stub(configuration, client_id,
module_path, backend,
user_arguments_dict, environ)
backend, (output_objs, ret_val) = stub(configuration, client_id,
user_arguments_dict, environ,
_retrieve_handler)
else:
_logger.warning("wsgi handling refused script:%s" % script_name)
(output_objs, ret_val) = reject_main(client_id,
Expand All @@ -335,7 +386,7 @@ def application(environ, start_response):

if 'json' == output_format:
default_content = 'application/json'
elif 'file' == output_format:
elif 'file' == output_format or 'chunked' == output_format:
default_content = 'application/octet-stream'
elif 'html' != output_format:
default_content = 'text/plain'
Expand Down Expand Up @@ -363,7 +414,7 @@ def application(environ, start_response):
output_objs.append(wsgi_entry)

_logger.debug("call format %r output to %s" % (backend, output_format))
output = format_output(configuration, backend, ret_code, ret_msg,
output = _format_output(configuration, backend, ret_code, ret_msg,
output_objs, output_format)
# _logger.debug("formatted %s output to %s" % (backend, output_format))
# _logger.debug("output:\n%s" % [output])
Expand All @@ -372,7 +423,7 @@ def application(environ, start_response):
_logger.error(
"Formatted output is NOT on default str coding: %s" % [output[:100]])
err_mark = '__****__'
output = format_output(configuration, backend, ret_code, ret_msg,
output = _format_output(configuration, backend, ret_code, ret_msg,
force_default_str_coding_rec(
output_objs, highlight=err_mark),
output_format)
Expand All @@ -385,18 +436,33 @@ def application(environ, start_response):
if output is None:
_logger.error("WSGI %s output formatting failed" % output_format)
output = 'Error: output could not be correctly delivered!'
output_format = 'html'

if output_format == 'serve':
serve_obj = next((x for x in output_objs if x['object_type'] == 'serve_paths'), None)
for piece in serve_paths(configuration, serve_obj['paths'], start_response):
yield piece
return

content_length = len(output)
if not 'Content-Length' in dict(response_headers):
# _logger.debug("WSGI adding explicit content length %s" % content_length)
# adding explicit content length
response_headers.append(('Content-Length', "%d" % content_length))

_logger.debug("send %r response as %s to %s" %
(backend, output_format, client_id))
# NOTE: send response to client but don't crash e.g. on closed connection
try:
start_response(status, response_headers)
except IOError as ioe:
_logger.warning("WSGI %s for %s could not deliver output: %s" %
(backend, client_id, ioe))
except Exception as exc:
_logger.error("WSGI %s for %s crashed during response: %s" %
(backend, client_id, exc))

# serve response data with a known content type
try:
# NOTE: we consistently hit download error for archive files reaching ~2GB
# with showfreezefile.py on wsgi but the same on cgi does NOT suffer
# the problem for the exact same files. It seems wsgi has a limited
Expand All @@ -410,12 +476,15 @@ def application(environ, start_response):
_logger.info("WSGI %s yielding %d output parts (%db)" %
(backend, chunk_parts, content_length))
# _logger.debug("send chunked %r response to client" % backend)
for i in xrange(chunk_parts):
for i in list(range(chunk_parts)):
# _logger.debug("WSGI %s yielding part %d / %d output parts" %
# (backend, i+1, chunk_parts))
# end index may be after end of content - but no problem
part = output[i*download_block_size:(i+1)*download_block_size]
yield part
if output_format == 'file':
yield part
else:
yield _ensure_encoded_string(part)
if chunk_parts > 1:
_logger.info("WSGI %s finished yielding all %d output parts" %
(backend, chunk_parts))
Expand Down
1 change: 1 addition & 0 deletions tests/data/loading.gif
1 change: 1 addition & 0 deletions tests/fixture/mig_shared_configuration--new.json
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
"migserver_https_url": "",
"migserver_public_alias_url": "",
"migserver_public_url": "",
"migserver_server_maxsize": -1,
"min_seconds_between_live_update_requests": 0,
"mrsl_files_dir": "",
"myfiles_py_location": "",
Expand Down
Loading
Loading