crim-ca · fmigneault · Jul 7, 2021 · Jul 7, 2021 · Jul 7, 2021 · Jul 16, 2021
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -10,11 +10,19 @@ Changes
 
 Changes:
 --------
-- No change.
+- | Modify problematic output location and execution methodology of ``file2string_array`` process so it does what
+    it actually advertises in its ``abstract`` description and doesn't result in error after execution.
+  |
+  | This modification actually changes the internal operation accomplished by ``file2string_array`` process
+    since it was attempting to create directly a CWL output of type ``File[]``. This is not yet supported
+    in `Weaver` (see issue `#25 <https://github.com/crim-ca/weaver/issues/25>`_) because `OGC API - Processes`
+    does not allow output multiplicity under a same output ID.
 
 Fixes:
 ------
-- No change.
+- Fix invalid ``python`` reference location in ``file2string_array`` process CWL definition
+  (fixes `#275 <https://github.com/crim-ca/weaver/issues/275>`_).
+- Fix missing ``version`` field definition for ``file2string_array`` process and set it as ``1.0``.
 
 `3.3.0 <https://github.com/crim-ca/weaver/tree/3.3.0>`_ (2021-07-16)
 ========================================================================

diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py
@@ -117,3 +117,82 @@ def test_jsonarray2netcdf_execute(self):
         assert isinstance(nc_path, str) and len(nc_path)
         assert nc_path.startswith(wps_out)
         assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1]
+
+    def test_file2string_array_describe(self):
+        resp = self.app.get("/processes/file2string_array", headers=self.json_headers)
+        assert resp.status_code == 200
+        assert resp.content_type in CONTENT_TYPE_APP_JSON
+        assert resp.json["process"]["id"] == "file2string_array"
+        assert resp.json["process"]["abstract"] not in ["", None]
+        assert resp.json["process"]["executeEndpoint"] == "https://localhost/processes/file2string_array/jobs"
+        assert isinstance(resp.json["process"]["inputs"], list)
+        assert len(resp.json["process"]["inputs"]) == 1
+        assert resp.json["process"]["inputs"][0]["id"] == "input"
+        assert isinstance(resp.json["process"]["inputs"][0]["formats"], list)
+        assert len(resp.json["process"]["inputs"][0]["formats"]) == 1  # must exist for file, mime-type not important
+        assert isinstance(resp.json["process"]["outputs"], list)
+        assert len(resp.json["process"]["outputs"]) == 1
+        assert resp.json["process"]["outputs"][0]["id"] == "output"
+        assert isinstance(resp.json["process"]["outputs"][0]["formats"], list)
+        assert len(resp.json["process"]["outputs"][0]["formats"]) == 1
+        assert resp.json["process"]["outputs"][0]["formats"][0]["mimeType"] == CONTENT_TYPE_APP_JSON  # important here
+
+    def test_file2string_array_execute(self):
+        tmp_file = None
+        dirname = tempfile.gettempdir()
+        with contextlib.ExitStack() as stack_exec:
+            tmp_text = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".txt")
+            tmp_text = stack_exec.enter_context(tmp_text)  # noqa
+            tmp_text.write("Hello World!")
+            tmp_text.seek(0)
+            tmp_file = tmp_text.name
+            data = {
+                "mode": "async",
+                "response": "document",
+                "inputs": [{"id": "input", "href": tmp_file}],
+                "outputs": [{"id": "output", "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE}],
+            }
+
+            for mock_exec in mocked_execute_process():
+                stack_exec.enter_context(mock_exec)
+            path = "/processes/file2string_array/jobs"
+            resp = mocked_sub_requests(self.app, "post_json", path,
+                                       data=data, headers=self.json_headers, only_local=True)
+
+        assert resp.status_code == 201, "Error: {}".format(resp.json)
+        assert resp.content_type in CONTENT_TYPE_APP_JSON
+        job_url = resp.json["location"]
+        results = self.monitor_job(job_url)
+
+        # first validate format of OGC-API results
+        assert "output" in results, "Expected result ID 'output' in response body"
+        assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict"
+        assert "href" in results["output"]
+        assert "format" in results["output"]
+        fmt = results["output"]["format"]  # type: JSON
+        assert isinstance(fmt, dict), "Result format should be provided with content details"
+        assert "mediaType" in fmt
+        assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition"
+        assert fmt["mediaType"] == CONTENT_TYPE_APP_JSON, "Result 'output' format expected to be JSON file"
+        out_path = results["output"]["href"]
+        assert isinstance(out_path, str) and len(out_path)
+        settings = get_settings_from_testapp(self.app)
+        wps_out = "{}{}".format(settings.get("weaver.url"), settings.get("weaver.wps_output_path"))
+        real_path = out_path.replace(wps_out, settings.get("weaver.wps_output_dir"))
+        assert out_path.startswith(wps_out)
+        assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1]
+        assert os.path.isfile(real_path)
+        with open(real_path, "r") as f:
+            out_data = json.load(f)
+        assert out_data == {"output": [tmp_file]}
+
+        # if everything was valid for results, validate equivalent but differently formatted outputs response
+        output_url = job_url + "/outputs"
+        resp = self.app.get(output_url, headers=self.json_headers)
+        assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json)
+        outputs = resp.json
+        assert outputs["outputs"][0]["id"] == "output"
+        out_path = outputs["outputs"][0]["href"]
+        assert isinstance(out_path, str) and len(out_path)
+        assert out_path.startswith(wps_out)
+        assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1]
diff --git a/weaver/processes/builtin/file2string_array.cwl b/weaver/processes/builtin/file2string_array.cwl
@@ -1,8 +1,7 @@
 #!/usr/bin/env cwl-runner
 cwlVersion: v1.0
 class: CommandLineTool
-# target the installed python pointing to weaver conda env to allow imports
-baseCommand: ${WEAVER_ROOT_DIR}/bin/python
+baseCommand: python
 arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/file2string_array.py", "-o", $(runtime.outdir)]
 inputs:
   input:
@@ -15,5 +14,7 @@ outputs:
   output:
     type: File
     format: iana:application/json
+    outputBinding:
+      glob: "output.json"
 $namespaces:
   iana: "https://www.iana.org/assignments/media-types/"
diff --git a/weaver/processes/builtin/file2string_array.py b/weaver/processes/builtin/file2string_array.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
-Transforms a file input into JSON file containing an array of file references as value.
+Transforms a file input into JSON file containing a string array formed of the single input file reference as value.
 """
 import argparse
 import json
@@ -17,27 +17,23 @@
 LOGGER.addHandler(logging.StreamHandler(sys.stdout))
 LOGGER.setLevel(logging.INFO)
 
-OUTPUT_CWL_JSON = "cwl.output.json"
+# process details
+__version__ = "1.0"
+__title__ = "File to string array"
+__abstract__ = __doc__  # NOTE: '__doc__' is fetched directly, this is mostly to be informative
 
 
 def main(input_file, output_dir):
     # type: (argparse.FileType, str) -> None
-    LOGGER.info(
-        "Got arguments: input_file=%s output_dir=%s", input_file, output_dir
-    )
-    output_data = {"output": [input_file]}
-    json.dump(output_data, open(os.path.join(output_dir, OUTPUT_CWL_JSON), "w"))
+    LOGGER.info("Got arguments: input_file=%s output_dir=%s", input_file, output_dir)
+    output_data = [input_file]
+    json.dump(output_data, open(os.path.join(output_dir, "output.json"), "w"))
 
 
 if __name__ == "__main__":
     LOGGER.info("Parsing inputs of '%s' process.", PACKAGE_NAME)
     PARSER = argparse.ArgumentParser(description=__doc__)
-    PARSER.add_argument("-i", help="CWL File")
-    PARSER.add_argument(
-        "-o",
-        metavar="outdir",
-        required=True,
-        help="Output directory of the retrieved NetCDF files extracted by name from the JSON file.",
-    )
+    PARSER.add_argument("-i", required=True, help="Input file reference.")
+    PARSER.add_argument("-o", required=True, help="Output directory where to generate the JSON file with input file.")
     ARGS = PARSER.parse_args()
     sys.exit(main(ARGS.i, ARGS.o))
diff --git a/weaver/processes/builtin/jsonarray2netcdf.cwl b/weaver/processes/builtin/jsonarray2netcdf.cwl
@@ -1,7 +1,6 @@
 #!/usr/bin/env cwl-runner
 cwlVersion: v1.0
 class: CommandLineTool
-# target the installed python pointing to weaver conda env to allow imports
 baseCommand: python
 arguments:
   - "${WEAVER_ROOT_DIR}/weaver/processes/builtin/jsonarray2netcdf.py"

diff --git a/weaver/processes/builtin/metalink2netcdf.cwl b/weaver/processes/builtin/metalink2netcdf.cwl
@@ -1,7 +1,6 @@
 #!/usr/bin/env cwl-runner
 cwlVersion: v1.0
 class: CommandLineTool
-# target the installed python pointing to weaver conda env to allow imports
 baseCommand: python
 arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/metalink2netcdf.py", "-o", $(runtime.outdir)]
 inputs:

diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py
@@ -720,7 +720,7 @@ def setup_loggers(self, log_stdout_stderr=True):
         log_file_handler.setFormatter(log_file_formatter)
 
         # prepare package logger
-        self.logger = logging.getLogger("{}.{}".format(LOGGER.name, self.package_id))
+        self.logger = logging.getLogger("{}|{}".format(LOGGER.name, self.package_id))
         self.logger.addHandler(log_file_handler)
         self.logger.setLevel(self.log_level)