Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix python file2stringarray #278

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,19 @@ Changes

Changes:
--------
- No change.
- | Modify problematic output location and execution methodology of ``file2string_array`` process so it does what
dbyrns marked this conversation as resolved.
Show resolved Hide resolved
it actually advertises in its ``abstract`` description and doesn't result in error after execution.
|
| This modification actually changes the internal operation accomplished by ``file2string_array`` process
since it was attempting to create directly a CWL output of type ``File[]``. This is not yet supported
in `Weaver` (see issue `#25 <https://github.com/crim-ca/weaver/issues/25>`_) because `OGC API - Processes`
does not allow output multiplicity under a same output ID.

Fixes:
------
- No change.
- Fix invalid ``python`` reference location in ``file2string_array`` process CWL definition
(fixes `#275 <https://github.com/crim-ca/weaver/issues/275>`_).
- Fix missing ``version`` field definition for ``file2string_array`` process and set it as ``1.0``.

`3.3.0 <https://github.com/crim-ca/weaver/tree/3.3.0>`_ (2021-07-16)
========================================================================
Expand Down
79 changes: 79 additions & 0 deletions tests/functional/test_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,82 @@ def test_jsonarray2netcdf_execute(self):
assert isinstance(nc_path, str) and len(nc_path)
assert nc_path.startswith(wps_out)
assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1]

def test_file2string_array_describe(self):
resp = self.app.get("/processes/file2string_array", headers=self.json_headers)
assert resp.status_code == 200
assert resp.content_type in CONTENT_TYPE_APP_JSON
assert resp.json["process"]["id"] == "file2string_array"
dbyrns marked this conversation as resolved.
Show resolved Hide resolved
assert resp.json["process"]["abstract"] not in ["", None]
assert resp.json["process"]["executeEndpoint"] == "https://localhost/processes/file2string_array/jobs"
assert isinstance(resp.json["process"]["inputs"], list)
assert len(resp.json["process"]["inputs"]) == 1
assert resp.json["process"]["inputs"][0]["id"] == "input"
assert isinstance(resp.json["process"]["inputs"][0]["formats"], list)
assert len(resp.json["process"]["inputs"][0]["formats"]) == 1 # must exist for file, mime-type not important
assert isinstance(resp.json["process"]["outputs"], list)
assert len(resp.json["process"]["outputs"]) == 1
assert resp.json["process"]["outputs"][0]["id"] == "output"
assert isinstance(resp.json["process"]["outputs"][0]["formats"], list)
assert len(resp.json["process"]["outputs"][0]["formats"]) == 1
assert resp.json["process"]["outputs"][0]["formats"][0]["mimeType"] == CONTENT_TYPE_APP_JSON # important here

def test_file2string_array_execute(self):
tmp_file = None
dirname = tempfile.gettempdir()
with contextlib.ExitStack() as stack_exec:
tmp_text = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".txt")
tmp_text = stack_exec.enter_context(tmp_text) # noqa
tmp_text.write("Hello World!")
tmp_text.seek(0)
tmp_file = tmp_text.name
data = {
"mode": "async",
"response": "document",
"inputs": [{"id": "input", "href": tmp_file}],
"outputs": [{"id": "output", "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE}],
}

for mock_exec in mocked_execute_process():
stack_exec.enter_context(mock_exec)
path = "/processes/file2string_array/jobs"
resp = mocked_sub_requests(self.app, "post_json", path,
data=data, headers=self.json_headers, only_local=True)

assert resp.status_code == 201, "Error: {}".format(resp.json)
assert resp.content_type in CONTENT_TYPE_APP_JSON
job_url = resp.json["location"]
results = self.monitor_job(job_url)

# first validate format of OGC-API results
assert "output" in results, "Expected result ID 'output' in response body"
assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict"
assert "href" in results["output"]
assert "format" in results["output"]
fmt = results["output"]["format"] # type: JSON
assert isinstance(fmt, dict), "Result format should be provided with content details"
assert "mediaType" in fmt
assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition"
assert fmt["mediaType"] == CONTENT_TYPE_APP_JSON, "Result 'output' format expected to be JSON file"
out_path = results["output"]["href"]
assert isinstance(out_path, str) and len(out_path)
settings = get_settings_from_testapp(self.app)
wps_out = "{}{}".format(settings.get("weaver.url"), settings.get("weaver.wps_output_path"))
real_path = out_path.replace(wps_out, settings.get("weaver.wps_output_dir"))
assert out_path.startswith(wps_out)
assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1]
assert os.path.isfile(real_path)
with open(real_path, "r") as f:
out_data = json.load(f)
assert out_data == {"output": [tmp_file]}

# if everything was valid for results, validate equivalent but differently formatted outputs response
output_url = job_url + "/outputs"
resp = self.app.get(output_url, headers=self.json_headers)
assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json)
outputs = resp.json
assert outputs["outputs"][0]["id"] == "output"
out_path = outputs["outputs"][0]["href"]
assert isinstance(out_path, str) and len(out_path)
assert out_path.startswith(wps_out)
assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1]
5 changes: 3 additions & 2 deletions weaver/processes/builtin/file2string_array.cwl
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
# target the installed python pointing to weaver conda env to allow imports
baseCommand: ${WEAVER_ROOT_DIR}/bin/python
baseCommand: python
arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/file2string_array.py", "-o", $(runtime.outdir)]
inputs:
input:
Expand All @@ -15,5 +14,7 @@ outputs:
output:
type: File
format: iana:application/json
outputBinding:
glob: "output.json"
$namespaces:
iana: "https://www.iana.org/assignments/media-types/"
24 changes: 10 additions & 14 deletions weaver/processes/builtin/file2string_array.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
"""
Transforms a file input into JSON file containing an array of file references as value.
Transforms a file input into JSON file containing a string array formed of the single input file reference as value.
"""
import argparse
import json
Expand All @@ -17,27 +17,23 @@
LOGGER.addHandler(logging.StreamHandler(sys.stdout))
LOGGER.setLevel(logging.INFO)

OUTPUT_CWL_JSON = "cwl.output.json"
# process details
__version__ = "1.0"
__title__ = "File to string array"
__abstract__ = __doc__ # NOTE: '__doc__' is fetched directly, this is mostly to be informative


def main(input_file, output_dir):
# type: (argparse.FileType, str) -> None
LOGGER.info(
"Got arguments: input_file=%s output_dir=%s", input_file, output_dir
)
output_data = {"output": [input_file]}
json.dump(output_data, open(os.path.join(output_dir, OUTPUT_CWL_JSON), "w"))
LOGGER.info("Got arguments: input_file=%s output_dir=%s", input_file, output_dir)
output_data = [input_file]
json.dump(output_data, open(os.path.join(output_dir, "output.json"), "w"))


if __name__ == "__main__":
LOGGER.info("Parsing inputs of '%s' process.", PACKAGE_NAME)
PARSER = argparse.ArgumentParser(description=__doc__)
PARSER.add_argument("-i", help="CWL File")
PARSER.add_argument(
"-o",
metavar="outdir",
required=True,
help="Output directory of the retrieved NetCDF files extracted by name from the JSON file.",
)
PARSER.add_argument("-i", required=True, help="Input file reference.")
PARSER.add_argument("-o", required=True, help="Output directory where to generate the JSON file with input file.")
ARGS = PARSER.parse_args()
sys.exit(main(ARGS.i, ARGS.o))
1 change: 0 additions & 1 deletion weaver/processes/builtin/jsonarray2netcdf.cwl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
# target the installed python pointing to weaver conda env to allow imports
baseCommand: python
arguments:
- "${WEAVER_ROOT_DIR}/weaver/processes/builtin/jsonarray2netcdf.py"
Expand Down
1 change: 0 additions & 1 deletion weaver/processes/builtin/metalink2netcdf.cwl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
# target the installed python pointing to weaver conda env to allow imports
baseCommand: python
arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/metalink2netcdf.py", "-o", $(runtime.outdir)]
inputs:
Expand Down
2 changes: 1 addition & 1 deletion weaver/processes/wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ def setup_loggers(self, log_stdout_stderr=True):
log_file_handler.setFormatter(log_file_formatter)

# prepare package logger
self.logger = logging.getLogger("{}.{}".format(LOGGER.name, self.package_id))
self.logger = logging.getLogger("{}|{}".format(LOGGER.name, self.package_id))
self.logger.addHandler(log_file_handler)
self.logger.setLevel(self.log_level)

Expand Down