diff --git a/envhelp/makeconfig.py b/envhelp/makeconfig.py index 46de93e5d..f76664b0c 100644 --- a/envhelp/makeconfig.py +++ b/envhelp/makeconfig.py @@ -58,12 +58,17 @@ def write_testconfig(env_name, is_py2=False): 'destination': os.path.join(_ENVHELP_OUTPUT_DIR, confs_name), 'destination_suffix': "-py%s" % ('2' if is_py2 else '3',), } + if is_py2: - overrides.update(**{ - 'mig_code': '/usr/src/app/mig', - 'mig_certs': '/usr/src/app/envhelp/output/certs', - 'mig_state': '/usr/src/app/envhelp/output/state', - }) + conf_dir_path = '/usr/src/app' + else: + conf_dir_path = _ENVHELP_OUTPUT_DIR + overrides.update(**{ + 'mig_code': os.path.join(conf_dir_path, 'mig'), + 'mig_certs': os.path.join(conf_dir_path, 'certs'), + 'mig_state': os.path.join(conf_dir_path, 'state'), + }) + generate_confs(_ENVHELP_OUTPUT_DIR, **overrides) diff --git a/mig/shared/base.py b/mig/shared/base.py index 64f12b370..316c1cb11 100644 --- a/mig/shared/base.py +++ b/mig/shared/base.py @@ -516,8 +516,9 @@ def force_utf8_rec(input_obj, highlight=''): if isinstance(input_obj, dict): return {force_utf8_rec(i, highlight): force_utf8_rec(j, highlight) for (i, j) in input_obj.items()} - elif isinstance(input_obj, list): - return [force_utf8_rec(i, highlight) for i in input_obj] + elif isinstance(input_obj, (list, tuple)): + thetype = type(input_obj) + return thetype(force_utf8_rec(i, highlight) for i in input_obj) elif is_unicode(input_obj): return force_utf8(input_obj, highlight) else: @@ -544,8 +545,9 @@ def force_unicode_rec(input_obj, highlight=''): if isinstance(input_obj, dict): return {force_unicode_rec(i, highlight): force_unicode_rec(j, highlight) for (i, j) in input_obj.items()} - elif isinstance(input_obj, list): - return [force_unicode_rec(i, highlight) for i in input_obj] + elif isinstance(input_obj, (list, tuple)): + thetype = type(input_obj) + return thetype(force_utf8_rec(i, highlight) for i in input_obj) elif not is_unicode(input_obj): return force_unicode(input_obj, highlight) else: diff --git a/mig/shared/configuration.py b/mig/shared/configuration.py index bbf969ed5..e77103a2c 100644 --- a/mig/shared/configuration.py +++ b/mig/shared/configuration.py @@ -222,6 +222,7 @@ def fix_missing(config_file, verbose=True): 'trac_id_field': 'email', 'migserver_http_url': 'http://%%(server_fqdn)s', 'migserver_https_url': '', + 'migserver_server_maxsize': -1, 'myfiles_py_location': '', 'mig_server_id': '%s.0' % fqdn, 'empty_job_name': 'no_suitable_job-', @@ -596,6 +597,7 @@ class Configuration: migserver_https_mig_oidc_url = '' migserver_https_ext_oidc_url = '' migserver_https_sid_url = '' + migserver_server_maxsize = -1 sleep_period_for_empty_jobs = '' min_seconds_between_live_update_requests = 0 cputime_for_empty_jobs = 0 diff --git a/mig/shared/returnvalues.py b/mig/shared/returnvalues.py index 8c9d98363..f0ddcbe98 100644 --- a/mig/shared/returnvalues.py +++ b/mig/shared/returnvalues.py @@ -46,3 +46,7 @@ USER_NOT_CREATED = (201, 'USER_NOT_CREATED') OUTPUT_VALIDATION_ERROR = (202, 'The output the server ' + 'has generated could not be validated') + +# REQUEST ERRORS + +REJECTED_ERROR = (422, 'REJECTED') diff --git a/mig/wsgi-bin/migwsgi.py b/mig/wsgi-bin/migwsgi.py index 73987133e..11387f669 100755 --- a/mig/wsgi-bin/migwsgi.py +++ b/mig/wsgi-bin/migwsgi.py @@ -26,6 +26,7 @@ # import cgi +import codecs import importlib import os import sys @@ -35,6 +36,7 @@ from mig.shared.bailout import bailout_helper, crash_helper, compact_string from mig.shared.base import requested_backend, allow_script, \ is_default_str_coding, force_default_str_coding_rec +from mig.shared.compat import PY2 from mig.shared.defaults import download_block_size, default_fs_coding from mig.shared.conf import get_configuration_object from mig.shared.objecttypes import get_object_type_info @@ -43,14 +45,53 @@ from mig.shared.scriptinput import fieldstorage_to_dict +if PY2: + def _ensure_encoded_string(chunk): + return chunk +else: + def _ensure_encoded_string(chunk): + return codecs.encode(chunk, 'utf8') + + +def _import_backend(backend): + import_path = 'mig.shared.functionality.%s' % backend + module_handle = importlib.import_module(import_path) + return module_handle.main + + +def _returnvalue_to_status(returnvalue): + return ' '.join((str(item) for item in returnvalue)) + + def object_type_info(object_type): """Lookup object type""" return get_object_type_info(object_type) -def stub(configuration, client_id, import_path, backend, user_arguments_dict, - environ): +def serve_paths(configuration, paths, start_response): + serve_maxsize = configuration.migserver_server_maxsize + + serve_paths_stat_results = (os.stat(path) for path in paths) + serve_paths_total_bytes = sum(st.st_size for st in serve_paths_stat_results) + + if serve_maxsize > -1 and serve_paths_total_bytes > serve_maxsize: + start_response(_returnvalue_to_status(returnvalues.REJECTED_ERROR), {}) + yield b'' + return + + # we are all good to respond.. do so + start_response(_returnvalue_to_status(returnvalues.OK), { + 'Content-Type': 'application/octet-stream', + 'Transfer-Encoding': 'chunked', + }) + + for path in paths: + with open(path, 'rb') as path_handle: + yield path_handle.read() + + +def stub(configuration, client_id, user_arguments_dict, environ, _retrieve_handler): """Run backend on behalf of client_id with supplied user_arguments_dict. I.e. import main from import_path and execute it with supplied arguments. """ @@ -61,6 +102,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict, before_time = time.time() output_objects = [] + backend = 'UNKNOWN' main = dummy_main # _logger.debug("stub for backend %r" % backend) @@ -69,10 +111,12 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict, # NEVER print/output it verbatim before it is validated below. try: + default_page = configuration.site_landing_page # TODO: avoid doing this work a second time + backend = requested_backend(environ, fallback=default_page) valid_backend_name(backend) except InputException as iex: - _logger.error("%s refused to import invalid backend %r (%s): %s" % - (_addr, backend, import_path, iex)) + _logger.error("%s refused to import invalid backend %r: %s" % + (_addr, backend, iex)) bailout_helper(configuration, backend, output_objects, header_text='User Error') output_objects.extend([ @@ -81,18 +125,17 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict, {'object_type': 'link', 'text': 'Go to default interface', 'destination': configuration.site_landing_page} ]) - return (output_objects, returnvalues.CLIENT_ERROR) + return backend, (output_objects, returnvalues.CLIENT_ERROR) try: # Import main from backend module # _logger.debug("import main from %r" % import_path) # NOTE: dynamic module loading to find corresponding main function - module_handle = importlib.import_module(import_path) - main = module_handle.main + main = _retrieve_handler(backend) except Exception as err: - _logger.error("%s could not import %r (%s): %s" % - (_addr, backend, import_path, err)) + _logger.error("%s could not import %r: %s" % + (_addr, backend, err)) bailout_helper(configuration, backend, output_objects) output_objects.extend([ {'object_type': 'error_text', 'text': @@ -100,22 +143,22 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict, {'object_type': 'link', 'text': 'Go to default interface', 'destination': configuration.site_landing_page} ]) - return (output_objects, returnvalues.SYSTEM_ERROR) + return backend, (output_objects, returnvalues.SYSTEM_ERROR) # _logger.debug("imported main %s" % main) # Now backend value is validated to be safe for output if not isinstance(user_arguments_dict, dict): - _logger.error("%s invalid user args %s for %s" % (_addr, + _logger.error("%s invalid user args %s for backend %r" % (_addr, user_arguments_dict, - import_path)) + backend)) bailout_helper(configuration, backend, output_objects, header_text='Input Error') output_objects.append( {'object_type': 'error_text', 'text': 'User input is not on expected format!'}) - return (output_objects, returnvalues.INVALID_ARGUMENT) + return backend, (output_objects, returnvalues.INVALID_ARGUMENT) try: (output_objects, (ret_code, ret_msg)) = main(client_id, @@ -125,7 +168,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict, _logger.error("%s script crashed:\n%s" % (_addr, traceback.format_exc())) crash_helper(configuration, backend, output_objects) - return (output_objects, returnvalues.ERROR) + return backend, (output_objects, returnvalues.ERROR) (val_ret, val_msg) = validate(output_objects) if not val_ret: @@ -138,7 +181,7 @@ def stub(configuration, client_id, import_path, backend, user_arguments_dict, after_time = time.time() output_objects.append({'object_type': 'timing_info', 'text': "done in %.3fs" % (after_time - before_time)}) - return (output_objects, (ret_code, ret_msg)) + return backend, (output_objects, (ret_code, ret_msg)) def wrap_wsgi_errors(environ, configuration, max_line_len=100): @@ -193,6 +236,14 @@ def application(environ, start_response): *start_response* is a helper function used to deliver the client response. """ + def _set_os_environ(value): + os.environ = value + + return _application(None, environ, start_response, _set_environ=_set_os_environ, _wrap_wsgi_errors=wrap_wsgi_errors) + + +def _application(configuration, environ, start_response, _set_environ, _fieldstorage_to_dict=fieldstorage_to_dict, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False): + # NOTE: pass app environ including apache and query args on to sub handlers # through the usual 'os.environ' channel expected in functionality # handlers. Special care is needed to avoid various sub-interpreter @@ -235,18 +286,20 @@ def application(environ, start_response): os_env_value)) # Assign updated environ to LOCAL os.environ for the rest of this session - os.environ = environ + _set_environ(environ) # NOTE: redirect stdout to stderr in python 2 only. It breaks logger in 3 # and stdout redirection apparently is already handled there. if sys.version_info[0] < 3: sys.stdout = sys.stderr - configuration = get_configuration_object() + if configuration is None: + configuration = get_configuration_object(_config_file, _skip_log) + _logger = configuration.logger # NOTE: replace default wsgi errors to apache error log with our own logs - wrap_wsgi_errors(environ, configuration) + _wrap_wsgi_errors(environ, configuration) for line in env_sync_status: _logger.debug(line) @@ -298,22 +351,20 @@ def application(environ, start_response): default_page = configuration.site_landing_page script_name = requested_backend(environ, fallback=default_page, strip_ext=False) - backend = requested_backend(environ, fallback=default_page) # _logger.debug('DEBUG: wsgi found backend %s and script %s' % # (backend, script_name)) fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'], environ=environ) - user_arguments_dict = fieldstorage_to_dict(fieldstorage) + user_arguments_dict = _fieldstorage_to_dict(fieldstorage) if 'output_format' in user_arguments_dict: output_format = user_arguments_dict['output_format'][0] - module_path = 'mig.shared.functionality.%s' % backend (allow, msg) = allow_script(configuration, script_name, client_id) if allow: # _logger.debug("wsgi handling script: %s" % script_name) - (output_objs, ret_val) = stub(configuration, client_id, - module_path, backend, - user_arguments_dict, environ) + backend, (output_objs, ret_val) = stub(configuration, client_id, + user_arguments_dict, environ, + _retrieve_handler) else: _logger.warning("wsgi handling refused script:%s" % script_name) (output_objs, ret_val) = reject_main(client_id, @@ -335,7 +386,7 @@ def application(environ, start_response): if 'json' == output_format: default_content = 'application/json' - elif 'file' == output_format: + elif 'file' == output_format or 'chunked' == output_format: default_content = 'application/octet-stream' elif 'html' != output_format: default_content = 'text/plain' @@ -363,7 +414,7 @@ def application(environ, start_response): output_objs.append(wsgi_entry) _logger.debug("call format %r output to %s" % (backend, output_format)) - output = format_output(configuration, backend, ret_code, ret_msg, + output = _format_output(configuration, backend, ret_code, ret_msg, output_objs, output_format) # _logger.debug("formatted %s output to %s" % (backend, output_format)) # _logger.debug("output:\n%s" % [output]) @@ -372,7 +423,7 @@ def application(environ, start_response): _logger.error( "Formatted output is NOT on default str coding: %s" % [output[:100]]) err_mark = '__****__' - output = format_output(configuration, backend, ret_code, ret_msg, + output = _format_output(configuration, backend, ret_code, ret_msg, force_default_str_coding_rec( output_objs, highlight=err_mark), output_format) @@ -385,10 +436,17 @@ def application(environ, start_response): if output is None: _logger.error("WSGI %s output formatting failed" % output_format) output = 'Error: output could not be correctly delivered!' + output_format = 'html' + + if output_format == 'serve': + serve_obj = next((x for x in output_objs if x['object_type'] == 'serve_paths'), None) + for piece in serve_paths(configuration, serve_obj['paths'], start_response): + yield piece + return content_length = len(output) if not 'Content-Length' in dict(response_headers): - # _logger.debug("WSGI adding explicit content length %s" % content_length) + # adding explicit content length response_headers.append(('Content-Length', "%d" % content_length)) _logger.debug("send %r response as %s to %s" % @@ -396,7 +454,15 @@ def application(environ, start_response): # NOTE: send response to client but don't crash e.g. on closed connection try: start_response(status, response_headers) + except IOError as ioe: + _logger.warning("WSGI %s for %s could not deliver output: %s" % + (backend, client_id, ioe)) + except Exception as exc: + _logger.error("WSGI %s for %s crashed during response: %s" % + (backend, client_id, exc)) + # serve response data with a known content type + try: # NOTE: we consistently hit download error for archive files reaching ~2GB # with showfreezefile.py on wsgi but the same on cgi does NOT suffer # the problem for the exact same files. It seems wsgi has a limited @@ -410,12 +476,15 @@ def application(environ, start_response): _logger.info("WSGI %s yielding %d output parts (%db)" % (backend, chunk_parts, content_length)) # _logger.debug("send chunked %r response to client" % backend) - for i in xrange(chunk_parts): + for i in list(range(chunk_parts)): # _logger.debug("WSGI %s yielding part %d / %d output parts" % # (backend, i+1, chunk_parts)) # end index may be after end of content - but no problem part = output[i*download_block_size:(i+1)*download_block_size] - yield part + if output_format == 'file': + yield part + else: + yield _ensure_encoded_string(part) if chunk_parts > 1: _logger.info("WSGI %s finished yielding all %d output parts" % (backend, chunk_parts)) diff --git a/tests/data/loading.gif b/tests/data/loading.gif new file mode 120000 index 000000000..2ef2c9b77 --- /dev/null +++ b/tests/data/loading.gif @@ -0,0 +1 @@ +../../mig/images/loading.gif \ No newline at end of file diff --git a/tests/fixture/mig_shared_configuration--new.json b/tests/fixture/mig_shared_configuration--new.json index b73c4d842..9b61988a7 100644 --- a/tests/fixture/mig_shared_configuration--new.json +++ b/tests/fixture/mig_shared_configuration--new.json @@ -121,6 +121,7 @@ "migserver_https_url": "", "migserver_public_alias_url": "", "migserver_public_url": "", + "migserver_server_maxsize": -1, "min_seconds_between_live_update_requests": 0, "mrsl_files_dir": "", "myfiles_py_location": "", diff --git a/tests/support/__init__.py b/tests/support/__init__.py index 1b04f74f2..2445b2ac8 100644 --- a/tests/support/__init__.py +++ b/tests/support/__init__.py @@ -40,14 +40,29 @@ import sys from unittest import TestCase, main as testmain +from tests.support.configsupp import FakeConfiguration from tests.support.suppconst import MIG_BASE, TEST_BASE, TEST_FIXTURE_DIR, \ - TEST_OUTPUT_DIR + TEST_OUTPUT_DIR, TEST_DATA_DIR PY2 = (sys.version_info[0] == 2) # force defaults to a local environment os.environ['MIG_ENV'] = 'local' +# expose the configuraed environment as a constant +MIG_ENV = os.environ['MIG_ENV'] + +if MIG_ENV == 'local': + # force testconfig as the conig file path + is_py2 = PY2 + _conf_dir_suffix = "-py%s" % ('2' if is_py2 else '3',) + _conf_dir = "testconfs%s" % (_conf_dir_suffix,) + _local_conf = os.path.join( + MIG_BASE, 'envhelp/output', _conf_dir, 'MiGserver.conf') + _config_file = os.getenv('MIG_CONF', None) + if _config_file is None: + os.environ['MIG_CONF'] = _local_conf + # All MiG related code will at some point include bits from the mig module # namespace. Rather than have this knowledge spread through every test file, # make the sole responsbility of test files to find the support file and @@ -67,8 +82,11 @@ from tests.support.assertover import AssertOver from tests.support.configsupp import FakeConfiguration +from tests.support.htmlsupp import HtmlAssertMixin from tests.support.loggersupp import FakeLogger from tests.support.serversupp import make_wrapped_server +from tests.support.wsgisupp import create_wsgi_environ, \ + create_wsgi_start_response, ServerAssertMixin # Basic global logging configuration for testing @@ -103,6 +121,7 @@ def __init__(self, *args): super(MigTestCase, self).__init__(*args) self._cleanup_checks = list() self._cleanup_paths = set() + self._configuration = None self._logger = None self._skip_logging = False @@ -153,6 +172,31 @@ def _reset_logging(self, stream): root_handler = root_logger.handlers[0] root_handler.stream = stream + # testcase defaults + + @staticmethod + def _make_configuration_instance(configuration_to_make): + if configuration_to_make == 'fakeconfig': + return FakeConfiguration() + elif configuration_to_make == 'testconfig': + from mig.shared.conf import get_configuration_object + return get_configuration_object(skip_log=True, disable_auth_log=True) + else: + raise AssertionError( + "MigTestCase: unknown configuration %r" % (configuration_to_make,)) + + def _provide_configuration(self): + return 'fakeconfig' + + @property + def configuration(self): + """Init a fake configuration if not already done""" + if self._configuration is None: + configuration_to_make = self._provide_configuration() + self._configuration = self._make_configuration_instance( + configuration_to_make) + return self._configuration + @property def logger(self): """Init a fake logger if not already done""" @@ -199,6 +243,10 @@ def assertPathExists(self, relative_path): assert not os.path.isabs( relative_path), "expected relative path within output folder" absolute_path = os.path.join(TEST_OUTPUT_DIR, relative_path) + return MigTestCase._absolute_path_kind(absolute_path) + + @staticmethod + def _absolute_path_kind(absolute_path): stat_result = os.lstat(absolute_path) if stat.S_ISLNK(stat_result.st_mode): return "symlink" @@ -295,12 +343,16 @@ def temppath(relative_path, test_case, ensure_dir=False, skip_clean=False): """Get absolute temp path for relative_path""" assert isinstance(test_case, MigTestCase) tmp_path = os.path.join(TEST_OUTPUT_DIR, relative_path) + return _temppath(tmp_path, test_case, ensure_dir=ensure_dir, skip_clean=skip_clean) + + +def _temppath(tmp_path, test_case, ensure_dir=False, skip_clean=False): if ensure_dir: try: os.mkdir(tmp_path) except FileExistsError: raise AssertionError( - "ABORT: use of unclean output path: %s" % relative_path) + "ABORT: use of unclean output path: %s" % tmp_path) if not skip_clean: test_case._cleanup_paths.add(tmp_path) return tmp_path diff --git a/tests/support/htmlsupp.py b/tests/support/htmlsupp.py new file mode 100644 index 000000000..61fcadbee --- /dev/null +++ b/tests/support/htmlsupp.py @@ -0,0 +1,84 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# --- BEGIN_HEADER --- +# +# htmlsupp - test support library for HTML +# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH +# +# This file is part of MiG. +# +# MiG is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# MiG is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# -- END_HEADER --- +# + +"""Test support library for HTML.""" + + +class HtmlAssertMixin: + """Custom assertions for HTML containing strings.""" + + def assertHtmlElement(self, value, tag_name): + """Check that an occurrence of the specifid tag within an HTML input + string can be found. Returns the textual content of the first match. + """ + + self.assertIsValidHtmlDocument(value) + + # TODO: this is a definitively stop-gap way of finding a tag within the HTML + # and is used purely to keep this initial change to a reasonable size. + + tag_open = ''.join(['<', tag_name, '>']) + tag_open_index = value.index(tag_open) + tag_open_index_after = tag_open_index + len(tag_open) + + tag_close = ''.join(['']) + tag_close_index = value.index(tag_close, tag_open_index_after) + + return value[tag_open_index_after:tag_close_index] + + def assertHtmlElementTextContent(self, value, tag_name, expected_text, trim_newlines=True): + """Check there is an occurrence of a tag within an HTML input string + and check the text it encloses equals exactly the expecatation. + """ + + self.assertIsValidHtmlDocument(value) + + # TODO: this is a definitively stop-gap way of finding a tag within the HTML + # and is used purely to keep this initial change to a reasonable size. + + actual_text = self.assertHtmlElement(value, tag_name) + if trim_newlines: + actual_text = actual_text.strip('\n') + self.assertEqual(actual_text, expected_text) + + def assertIsValidHtmlDocument(self, value): + """Check that the input string contains a valid HTML document. + """ + + assert isinstance(value, type(u"")), "input string was not utf8" + + error = None + try: + has_doctype = value.startswith("') + maybe_document_end = value[end_html_tag_idx:].rstrip() + assert maybe_document_end == '', "no valid document closer" + except Exception as exc: + error = exc + if error: + raise AssertionError("failed to verify input string as HTML: %s", str(error)) diff --git a/tests/support/suppconst.py b/tests/support/suppconst.py index fcf401290..15912e933 100644 --- a/tests/support/suppconst.py +++ b/tests/support/suppconst.py @@ -31,6 +31,7 @@ # Use abspath for __file__ on Py2 _SUPPORT_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_BASE = os.path.normpath(os.path.join(_SUPPORT_DIR, "..")) +TEST_DATA_DIR = os.path.join(TEST_BASE, "data") TEST_FIXTURE_DIR = os.path.join(TEST_BASE, "fixture") TEST_OUTPUT_DIR = os.path.join(TEST_BASE, "output") MIG_BASE = os.path.realpath(os.path.join(TEST_BASE, "..")) diff --git a/tests/support/wsgisupp.py b/tests/support/wsgisupp.py new file mode 100644 index 000000000..b3bdb0672 --- /dev/null +++ b/tests/support/wsgisupp.py @@ -0,0 +1,67 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# --- BEGIN_HEADER --- +# +# htmlsupp - test support library for WSGI +# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH +# +# This file is part of MiG. +# +# MiG is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# MiG is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# -- END_HEADER --- +# + +"""Test support library for WSGI.""" + + +def create_wsgi_environ(configuration, wsgi_variables): + environ = {} + environ['wsgi.input'] = () + environ['MIG_CONF'] = configuration.config_file + environ['HTTP_HOST'] = wsgi_variables.get('http_host') + environ['PATH_INFO'] = wsgi_variables.get('path_info') + environ['SCRIPT_URI'] = ''.join(('http://', environ['HTTP_HOST'], environ['PATH_INFO'])) + return environ + + +class FakeStartResponse: + def __init__(self): + self.calls = [] + + def __call__(self, status, headers, exc=None): + self.calls.append((status, headers, exc)) + + +def create_wsgi_start_response(): + return FakeStartResponse() + + +class ServerAssertMixin: + """Custom assertions for verifying server code executed under test.""" + + def assertWsgiResponseStatus(self, fake_start_response, expected_status_code): + assert isinstance(fake_start_response, FakeStartResponse) + + def called_once(fake): + assert hasattr(fake, 'calls') + return len(fake.calls) == 1 + + self.assertTrue(called_once(fake_start_response)) + thecall = fake_start_response.calls[0] + wsgi_status = thecall[0] + actual_status_code = int(wsgi_status[0:3]) + self.assertEqual(actual_status_code, expected_status_code) diff --git a/tests/test_mig_wsgi-bin_migwsgi.py b/tests/test_mig_wsgi-bin_migwsgi.py new file mode 100644 index 000000000..39129324b --- /dev/null +++ b/tests/test_mig_wsgi-bin_migwsgi.py @@ -0,0 +1,364 @@ +# -*- coding: utf-8 -*- +# +# --- BEGIN_HEADER --- +# +# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH +# +# This file is part of MiG. +# +# MiG is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# MiG is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. +# +# --- END_HEADER --- +# + +"""Unit tests for the MiG WSGI glue""" + +import codecs +from configparser import ConfigParser +import importlib +import os +import stat +import sys + +from tests.support import MIG_BASE, TEST_BASE, TEST_DATA_DIR, MigTestCase, testmain +from mig.shared.output import format_output +import mig.shared.returnvalues as returnvalues + + +from tests.support import PY2, is_path_within, \ + create_wsgi_environ, create_wsgi_start_response, \ + ServerAssertMixin, HtmlAssertMixin +from mig.shared.base import client_id_dir, client_dir_id, get_short_id, \ + invisible_path, allow_script, brief_list + + +_LOCAL_MIG_BASE = '/usr/src/app' if PY2 else MIG_BASE # account for execution in container +_PYTHON_MAJOR = '2' if PY2 else '3' +_TEST_CONF_DIR = os.path.join(MIG_BASE, "envhelp/output/testconfs-py%s" % (_PYTHON_MAJOR,)) +_TEST_CONF_FILE = os.path.join(_TEST_CONF_DIR, "MiGserver.conf") +_TEST_CONF_SYMLINK = os.path.join(MIG_BASE, "envhelp/output/testconfs") + + +# workaround for migwsgi being placed witin a non-module directory +def _import_migwsgi(): + sys.path.append(os.path.join(MIG_BASE, 'mig/wsgi-bin')) + migwsgi = importlib.import_module('migwsgi') + sys.path.pop(-1) + return migwsgi +migwsgi = _import_migwsgi() + + +def _is_return_value(return_value): + defined_return_values = returnvalues.__dict__.values() + return return_value in defined_return_values + + +def _trigger_and_unpack_result(application_result, result_kind='textual'): + assert result_kind in ('textual', 'binary') + + chunks = list(application_result) + + assert len(chunks) > 0, "invocation returned no output" + complete_value = b''.join(chunks) + if result_kind == 'binary': + decoded_value = complete_value + else: + decoded_value = codecs.decode(complete_value, 'utf8') + return decoded_value + + +def create_instrumented_fieldstorage_to_dict(): + def _instrumented_fieldstorage_to_dict(fieldstorage): + return _instrumented_fieldstorage_to_dict._result + + _instrumented_fieldstorage_to_dict._result = { + 'output_format': ('html',) + } + + def set_result(result): + _instrumented_fieldstorage_to_dict._result = result + + _instrumented_fieldstorage_to_dict.set_result = set_result + + return _instrumented_fieldstorage_to_dict + + +def create_instrumented_format_output(): + def _instrumented_format_output( + configuration, + backend, + ret_val, + ret_msg, + out_obj, + outputformat, + ): + # record the call args + call_args_out_obj = list(out_obj) # capture the original before altering it + call_args = (configuration, backend, ret_val, ret_msg, call_args_out_obj, outputformat,) + _instrumented_format_output.calls.append({ 'args': call_args }) + + if _instrumented_format_output._file: + return format_output( + configuration, + backend, + ret_val, + ret_msg, + out_obj, + outputformat, + ) + + # FIXME: the following is a workaround for a bug that exists between the WSGI wrapper + # and the output formatter - specifically, the latter adds default header and + # title if start does not exist, but the former ensures that start always exists + # meaning that a default response under WSGI is missing half the HTML. + start_obj_idx = next((i for i, obj in enumerate(out_obj) if obj['object_type'] == 'start')) + insertion_idx = start_obj_idx + + # FIXME: format_output() is sensitive to ordering and MUST see a title object _before_ + # anything else otherwise the preamble ends up written above the header and thus + # an invalid HTML page is served. + insertion_idx += 1 + out_obj.insert(insertion_idx, { + 'object_type': 'title', + 'text': _instrumented_format_output.values['title_text'], + 'meta': '', + 'style': {}, + 'script': {}, + }) + + insertion_idx += 1 + out_obj.insert(insertion_idx, { + 'object_type': 'header', + 'text': _instrumented_format_output.values['header_text'] + }) + + return format_output( + configuration, + backend, + ret_val, + ret_msg, + out_obj, + outputformat, + ) + _instrumented_format_output.calls = [] + _instrumented_format_output._file = False + _instrumented_format_output.values = dict( + title_text='', + header_text='', + ) + + + def _set_file(is_enabled): + _instrumented_format_output._file = is_enabled + + setattr(_instrumented_format_output, 'set_file', _set_file) + + def _program_values(**kwargs): + _instrumented_format_output.values.update(kwargs) + + _instrumented_format_output.set_values = _program_values + + return _instrumented_format_output + + +def create_instrumented_retrieve_handler(): + def _simulated_action(*args): + return _simulated_action.returning or ([], returnvalues.ERROR) + _simulated_action.calls = [] + _simulated_action.returning = None + + def _program_response(output_objects=None, return_value=None): + assert _is_return_value(return_value), "return value must be present in returnvalues" + assert isinstance(output_objects, list) + _simulated_action.returning = (output_objects, return_value) + + def _instrumented_retrieve_handler(*args): + _instrumented_retrieve_handler.calls.append(tuple(args)) + return _simulated_action + _instrumented_retrieve_handler.calls = [] + + _instrumented_retrieve_handler.program = _program_response + _instrumented_retrieve_handler.simulated = _simulated_action + + return _instrumented_retrieve_handler + + +def noop(*args): + pass + + +class MigWsgi_binMigwsgi(MigTestCase, ServerAssertMixin, HtmlAssertMixin): + def assertInstrumentation(self): + simulated_action = self.instrumented_retrieve_handler.simulated + self.assertIsNotNone(simulated_action.returning, "no response programmed") + + def was_called(fake): + assert hasattr(fake, 'calls') + return len(fake.calls) > 0 + + self.assertTrue(was_called(self.instrumented_format_output), "no output generated") + self.assertTrue(was_called(self.instrumented_retrieve_handler), "no output generated") + + def _provide_configuration(self): + return 'testconfig' + + def before_each(self): + # generic WSGI setup + self.fake_wsgi_environ = create_wsgi_environ(self.configuration, wsgi_variables=dict( + http_host='localhost', + path_info='/', + )) + self.fake_start_response = create_wsgi_start_response() + + # MiG WSGI wrapper specific setup + self.instrumented_fieldstorage_to_dict = create_instrumented_fieldstorage_to_dict() + self.instrumented_format_output = create_instrumented_format_output() + self.instrumented_retrieve_handler = create_instrumented_retrieve_handler() + + self.application_args = (self.configuration, self.fake_wsgi_environ, self.fake_start_response,) + self.application_kwargs = dict( + _wrap_wsgi_errors=noop, + _format_output=self.instrumented_format_output, + _fieldstorage_to_dict=self.instrumented_fieldstorage_to_dict, + _retrieve_handler=self.instrumented_retrieve_handler, + _set_environ=noop, + ) + + def test_return_value_ok_returns_status_200(self): + self.instrumented_retrieve_handler.program([], returnvalues.OK) + + application_result = migwsgi._application( + *self.application_args, + **self.application_kwargs + ) + + _trigger_and_unpack_result(application_result) + + self.assertInstrumentation() + self.assertWsgiResponseStatus(self.fake_start_response, 200) + + def test_return_value_ok_returns_valid_html_page(self): + self.instrumented_retrieve_handler.program([], returnvalues.OK) + + application_result = migwsgi._application( + *self.application_args, + **self.application_kwargs + ) + + output = _trigger_and_unpack_result(application_result) + + self.assertInstrumentation() + self.assertIsValidHtmlDocument(output) + + def test_return_value_ok_returns_expected_title(self): + self.instrumented_format_output.set_values(title_text='TEST') + self.instrumented_retrieve_handler.program([], returnvalues.OK) + + application_result = migwsgi._application( + *self.application_args, + **self.application_kwargs + ) + + output = _trigger_and_unpack_result(application_result) + + self.assertInstrumentation() + self.assertHtmlElementTextContent(output, 'title', 'TEST', trim_newlines=True) + + def test_return_value_ok_serving_a_binary_file(self): + test_binary_file = os.path.join(TEST_DATA_DIR, 'loading.gif') + with open(test_binary_file, 'rb') as f: + test_binary_data = f.read() + + self.instrumented_fieldstorage_to_dict.set_result({ + 'output_format': ('file',) + }) + self.instrumented_format_output.set_file(True) + + file_obj = { 'object_type': 'binary', 'data': test_binary_data } + self.instrumented_retrieve_handler.program([file_obj], returnvalues.OK) + + application_result = migwsgi._application( + *self.application_args, + **self.application_kwargs + ) + + output = _trigger_and_unpack_result(application_result, 'binary') + + self.assertInstrumentation() + self.assertEqual(output, test_binary_data) + + def test_serve_paths_signle_file_at_limit(self): + test_binary_file = os.path.join(TEST_DATA_DIR, 'loading.gif') + test_binary_file_size = os.stat(test_binary_file).st_size + with open(test_binary_file, 'rb') as fh_test_file: + test_binary_data = fh_test_file.read() + + self.configuration.migserver_server_maxsize = test_binary_file_size + + self.instrumented_fieldstorage_to_dict.set_result({ + 'output_format': ('serve',) + }) + self.instrumented_format_output.set_file(True) + + output_obj = { + 'object_type': 'serve_paths', + 'paths': [test_binary_file] + } + self.instrumented_retrieve_handler.program([output_obj], returnvalues.OK) + + application_result = migwsgi._application( + *self.application_args, + **self.application_kwargs + ) + + output = _trigger_and_unpack_result(application_result, 'binary') + + self.assertInstrumentation() + self.assertEqual(output, test_binary_data) + + def test_serve_paths_signle_file_over_limit(self): + test_binary_file = os.path.join(TEST_DATA_DIR, 'loading.gif') + test_binary_file_size = os.stat(test_binary_file).st_size + with open(test_binary_file, 'rb') as fh_test_file: + test_binary_data = fh_test_file.read() + + self.configuration.migserver_server_maxsize = test_binary_file_size - 1 + + self.instrumented_fieldstorage_to_dict.set_result({ + 'output_format': ('serve',) + }) + self.instrumented_format_output.set_file(True) + + output_obj = { + 'object_type': 'serve_paths', + 'paths': [test_binary_file] + } + self.instrumented_retrieve_handler.program([output_obj], returnvalues.OK) + + application_result = migwsgi._application( + *self.application_args, + **self.application_kwargs + ) + + output = _trigger_and_unpack_result(application_result, 'binary') + + self.assertInstrumentation() + self.assertEqual(output, b'') + self.assertWsgiResponseStatus(self.fake_start_response, 422) + + +if __name__ == '__main__': + testmain() diff --git a/tests/test_support.py b/tests/test_support.py index 34fb61247..3ecc3e458 100644 --- a/tests/test_support.py +++ b/tests/test_support.py @@ -4,7 +4,10 @@ import unittest from tests.support import MigTestCase, PY2, testmain, temppath, \ - AssertOver + AssertOver, FakeConfiguration + +from mig.shared.conf import get_configuration_object +from mig.shared.configuration import Configuration class InstrumentedAssertOver(AssertOver): @@ -39,6 +42,17 @@ def _class_attribute(self, name, **kwargs): else: return getattr(cls, name, None) + def test_provides_a_fake_configuration(self): + configuration = self.configuration + + self.assertIsInstance(configuration, FakeConfiguration) + + def test_provides_a_fake_configuration_for_the_duration_of_the_test(self): + c1 = self.configuration + c2 = self.configuration + + self.assertIs(c2, c1) + @unittest.skipIf(PY2, "Python 3 only") def test_unclosed_files_are_recorded(self): tmp_path = temppath("support-unclosed", self) @@ -88,5 +102,23 @@ def test_when_asserting_over_multiple_values_after(self): self.assertTrue(attempt_wrapper.was_check_callable_called()) +class SupportTestCase_overridden_configuration(MigTestCase): + def _provide_configuration(self): + return 'testconfig' + + def test_provides_the_test_configuration(self): + expected_last_dir = 'testconfs-py2' if PY2 else 'testconfs-py3' + + configuration = self.configuration + + # check we have a real config object + self.assertIsInstance(configuration, Configuration) + # check for having loaded a config file from a test config dir + config_file_path_parts = configuration.config_file.split(os.path.sep) + config_file_path_parts.pop() # discard file part + config_file_last_dir = config_file_path_parts.pop() + self.assertTrue(config_file_last_dir, expected_last_dir) + + if __name__ == '__main__': testmain()