kit-data-manager · gabinoumbe · Aug 22, 2025 · Aug 28, 2025 · Aug 28, 2025 · Aug 29, 2025
diff --git a/mapping_cli.py b/mapping_cli.py
@@ -3,11 +3,13 @@
 import logging
 import os
 from sys import exit
+from pathlib import Path
 
 from src.IO.MappingAbortionError import MappingAbortionError
 from src.IO.sem.InputReader import InputReader as InputReader_SEM
 from src.IO.tomo.InputReader import InputReader as InputReader_TOMO
-from src.IO.tomo.OutputWriter import OutputWriter
+from src.IO.sem.OutputWriter import OutputWriter as OutputWriter_SEM
+from src.IO.tomo.OutputWriter import OutputWriter as OutputWriter_TOMO
 from src.resources.maps.parsing import map_from_flag
 
 # make log level configurable from ENV, defaults to info level
@@ -87,8 +89,8 @@ def run_tomo_mapper(args):
         #si = setup_infos if len(setup_infos) >= 1 else None
         #ri = run_infos if len(run_infos) >= 1 else None
 
-        output = OutputWriter.stitch_together(setup_infos, run_infos, imgs)
-        OutputWriter.writeOutput(output, OUTPUT_PATH)
+        output = OutputWriter_TOMO.stitch_together(setup_infos, run_infos, imgs)
+        OutputWriter_TOMO.writeOutput(output, OUTPUT_PATH)
     except MappingAbortionError as e:
         reader.clean_up()
         exit(e)
@@ -105,16 +107,63 @@ def run_sem_mapper(args):
 
     try:
         reader = InputReader_SEM(MAP_SOURCE, INPUT_SOURCE)
+        tmpdir = reader.temp_dir_path
+
+        if tmpdir:
+            # The case of a zipped input file
+            list_of_file_names = []
+            success_count = 0
+
+            for file_path in Path(tmpdir).rglob('*'):
+                if not file_path.is_file():
+                    continue
+                if '__MACOSX' in str(file_path):
+                    #logging.debug(f"Skipping macOS metadata file: {file_path}")
+                    continue
+
+                logging.info(f"Processing extracted file: {file_path.name}")
+                try:
+                    reader_ = InputReader_SEM(MAP_SOURCE, file_path)
+                    img_info = reader_.retrieve_image_info(file_path)
+                    logging.debug(f"IMAGE_INFO: {img_info}")
+
+                    if not img_info:
+                        raise MappingAbortionError(f"Could not retrieve image information for {file_path.name}.")
+
+                    file_name = file_path.with_suffix('').name + ".json"
+                    OutputWriter_SEM.save_the_file(img_info, file_name)
+                    list_of_file_names.append(file_name)
+                    success_count += 1
+
+                except MappingAbortionError as e:
+                    logging.warning(f"Skipping file {file_path.name} due to mapping error: {e}")
+                except Exception as e:
+                    logging.exception(f"Unexpected error processing file {file_path.name}")
+
+            if success_count > 0:
+                logging.info(f"In total {success_count} file(s) were successfully processed.")
+                OutputWriter_SEM.save_to_zip(list_of_file_names, OUTPUT_PATH)
+            else:
+                raise MappingAbortionError("No files could be processed successfully. Aborting.")
+
+        else:
+            # The case of a single input file
+            logging.info("Processing input as single file.")
+            img_info = reader.retrieve_image_info(INPUT_SOURCE)
+            if not img_info:
+                raise MappingAbortionError("Could not retrieve image information. Aborting.")
+
+            OutputWriter_SEM.save_the_file(img_info, OUTPUT_PATH)
+
+            #with open(OUTPUT_PATH, 'w', encoding="utf-8") as f:
+                #json.dump(img_info, f, indent=4, ensure_ascii=False)
 
-        img_info = reader.retrieve_image_info(INPUT_SOURCE)
-        if not img_info:
-            logging.error('Could not retrieve image information due to unknown error. Aborting.')
-            exit(1)
-        with open(OUTPUT_PATH, 'w', encoding="utf-8") as f:
-            json.dump(img_info, f, indent=4, ensure_ascii=False)
     except MappingAbortionError as e:
+        #logging.error(f"MappingAbortionError: {e}")
         exit(e)
-
+    finally:
+        if reader:
+            reader.clean_up()
 
 if __name__ == '__main__':
     run_cli()
diff --git a/mappingservice-plugin/src/main/java/edu/kit/datamanager/semplugin/SEMImagePlugin.java b/mappingservice-plugin/src/main/java/edu/kit/datamanager/semplugin/SEMImagePlugin.java
@@ -70,12 +70,12 @@ public String uri() {
 
     @Override
     public MimeType[] inputTypes() {
-        return new MimeType[]{MimeTypeUtils.parseMimeType("image/tiff"), MimeTypeUtils.parseMimeType("text/plain")};
+        return new MimeType[]{MimeTypeUtils.parseMimeType("image/tiff"), MimeTypeUtils.parseMimeType("text/plain"), MimeTypeUtils.parseMimeType("application/zip")};
     }
 
     @Override
     public MimeType[] outputTypes() {
-        return new MimeType[]{MimeTypeUtils.APPLICATION_JSON};
+        return new MimeType[]{MimeTypeUtils.APPLICATION_JSON, MimeTypeUtils.parseMimeType("application/zip")};
     }
 
     @Override

diff --git a/src/IO/sem/InputReader.py b/src/IO/sem/InputReader.py
@@ -1,17 +1,19 @@
 import logging
 import mimetypes
 import os
+import shutil
 
 from src.IO.MappingAbortionError import MappingAbortionError
 from src.parser.ImageParser import ParserMode
 from src.parser.ParserFactory import ParserFactory
-from src.util import load_json, get_filetype_with_magica, robust_textfile_read
+from src.util import is_zipfile, extract_zip_file, load_json, get_filetype_with_magica, robust_textfile_read
 
 
 class InputReader:
 
     mapping = None
     parser_names = None
+    temp_dir_path: str = None
 
     def __init__(self, map_path, input_path):
         logging.info("Preparing parsers based on parsing map file and input.")
@@ -20,15 +22,18 @@ def __init__(self, map_path, input_path):
         if not os.path.exists(input_path):
             logging.error("Input file {} does not exist. Aborting".format(input_path))
             raise MappingAbortionError("Input file loading failed.")
+
+        if is_zipfile(input_path):
+            self.temp_dir_path = extract_zip_file(input_path)
+        else:
+            self.parser_names = self.get_applicable_parsers(input_path)
 
-        self.parser_names = self.get_applicable_parsers(input_path)
-
-        if not self.parser_names:
-            logging.error("No applicable parsers found for input {}".format(input_path))
-            mimetype_set = list(set([v.expected_input_format() for v in ParserFactory.available_img_parsers.values()]))
-            logging.info("Supported mimetypes: {}".format(mimetype_set))
-            raise MappingAbortionError("Input file parsing aborted.")
-        logging.info("Applicable parsers: {}".format(", ".join(self.parser_names)))
+            if not self.parser_names:
+                logging.error("No applicable parsers found for input {}".format(input_path))
+                mimetype_set = list(set([v.expected_input_format() for v in ParserFactory.available_img_parsers.values()]))
+                logging.info("Supported mimetypes: {}".format(mimetype_set))
+                raise MappingAbortionError("Input file parsing aborted.")
+            logging.info("Applicable parsers: {}".format(", ".join(self.parser_names)))
 
 
     @staticmethod
@@ -82,3 +87,10 @@ def retrieve_image_info(self, input_path):
             if result and result.image_metadata:
                 output_dict = result.image_metadata.to_schema_dict()
                 return output_dict
+
+    def clean_up(self):
+        if self.temp_dir_path:
+            shutil.rmtree(self.temp_dir_path)
+            logging.debug("Temp folder deletion: {} - {}".format(self.temp_dir_path, os.path.exists(self.temp_dir_path)))
+        else:
+            logging.debug("No temp folder used, nothing to clean up.")
diff --git a/src/IO/sem/OutputWriter.py b/src/IO/sem/OutputWriter.py
@@ -0,0 +1,45 @@
+import os
+import json
+import logging
+import zipfile
+
+from src.IO.MappingAbortionError import MappingAbortionError
+
+
+class OutputWriter:
+
+    @staticmethod
+    def save_the_file(mapped_metadata, file_path):
+        try:
+            with open(file_path, 'w', encoding="utf-8") as json_file:
+                json.dump(mapped_metadata, json_file, indent=4, ensure_ascii=False)
+            logging.info("The output document has been created successfully!")
+        except (FileNotFoundError, PermissionError, IsADirectoryError, OSError, TypeError, ValueError) as e:
+            logging.error(f"Unable to save {file_path}: {e}")
+            raise MappingAbortionError(f"Failed to save {file_path}.")
+
+    @staticmethod
+    def save_to_zip(file_path_list, zip_file_path):
+        try:
+            with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zf:
+                # "ZIP_DEFLATED" is a lossless compression algorithm, meaning no data is lost during the compression process.
+                for file_path in file_path_list:
+                    try:
+                        zf.write(file_path, os.path.basename(file_path))
+                        logging.debug(f"Added {file_path} to zip.")
+                    except (FileNotFoundError, PermissionError, IsADirectoryError, OSError, zipfile.BadZipFile) as e:
+                        logging.error(f"Adding {file_path} to zip was not successful: {e}")
+                        raise MappingAbortionError(f"Failed to add {file_path} to zip.")
+            logging.info(f"Files have been zipped into {zip_file_path} sucessfully!")
+        except MappingAbortionError as e:
+            logging.error(f"Failed to create zip file at {zip_file_path}: {e}")
+            raise MappingAbortionError(f"Failed to save to zip.")
+
+        # Delete the original files after zipping
+        for file_path in file_path_list:
+            try:
+                os.remove(file_path)
+                logging.info(f"{file_path} has been deleted.")
+            except (FileNotFoundError, PermissionError, IsADirectoryError, OSError) as e:
+                logging.warning(f"{file_path} to zip was not deleted: {e}")
+                raise MappingAbortionError(f"Failed to delete file {file_path} after zip.")
diff --git a/src/parser/impl/TxtParser.py b/src/parser/impl/TxtParser.py
@@ -3,6 +3,7 @@
 
 from PIL import Image
 
+from src.IO.MappingAbortionError import MappingAbortionError
 from src.Preprocessor import Preprocessor
 from src.model.ImageMD import ImageMD
 from src.parser.ImageParser import ImageParser, ParserMode
@@ -36,7 +37,7 @@ def parse(self, file_path, mapping) -> tuple[ImageMD, str]:
 
         if not mapping and not self.internal_mapping:
             logging.error("No mapping provided for image parsing. Aborting")
-            exit(1)
+            raise MappingAbortionError("Image parsing failed.")
         mapping_dict = mapping if mapping else self.internal_mapping
         image_md = map_a_dict(input_md, mapping_dict)
         #print("image_md: ", image_md)