Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 59 additions & 10 deletions mapping_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import logging
import os
from sys import exit
from pathlib import Path

from src.IO.MappingAbortionError import MappingAbortionError
from src.IO.sem.InputReader import InputReader as InputReader_SEM
from src.IO.tomo.InputReader import InputReader as InputReader_TOMO
from src.IO.tomo.OutputWriter import OutputWriter
from src.IO.sem.OutputWriter import OutputWriter as OutputWriter_SEM
from src.IO.tomo.OutputWriter import OutputWriter as OutputWriter_TOMO
from src.resources.maps.parsing import map_from_flag

# make log level configurable from ENV, defaults to info level
Expand Down Expand Up @@ -87,8 +89,8 @@ def run_tomo_mapper(args):
#si = setup_infos if len(setup_infos) >= 1 else None
#ri = run_infos if len(run_infos) >= 1 else None

output = OutputWriter.stitch_together(setup_infos, run_infos, imgs)
OutputWriter.writeOutput(output, OUTPUT_PATH)
output = OutputWriter_TOMO.stitch_together(setup_infos, run_infos, imgs)
OutputWriter_TOMO.writeOutput(output, OUTPUT_PATH)
except MappingAbortionError as e:
reader.clean_up()
exit(e)
Expand All @@ -105,16 +107,63 @@ def run_sem_mapper(args):

try:
reader = InputReader_SEM(MAP_SOURCE, INPUT_SOURCE)
tmpdir = reader.temp_dir_path

if tmpdir:
# The case of a zipped input file
list_of_file_names = []
success_count = 0

for file_path in Path(tmpdir).rglob('*'):
if not file_path.is_file():
continue
if '__MACOSX' in str(file_path):
#logging.debug(f"Skipping macOS metadata file: {file_path}")
continue

logging.info(f"Processing extracted file: {file_path.name}")
try:
reader_ = InputReader_SEM(MAP_SOURCE, file_path)
img_info = reader_.retrieve_image_info(file_path)
logging.debug(f"IMAGE_INFO: {img_info}")

if not img_info:
raise MappingAbortionError(f"Could not retrieve image information for {file_path.name}.")

file_name = file_path.with_suffix('').name + ".json"
OutputWriter_SEM.save_the_file(img_info, file_name)
list_of_file_names.append(file_name)
success_count += 1

except MappingAbortionError as e:
logging.warning(f"Skipping file {file_path.name} due to mapping error: {e}")
except Exception as e:
logging.exception(f"Unexpected error processing file {file_path.name}")

if success_count > 0:
logging.info(f"In total {success_count} file(s) were successfully processed.")
OutputWriter_SEM.save_to_zip(list_of_file_names, OUTPUT_PATH)
else:
raise MappingAbortionError("No files could be processed successfully. Aborting.")

else:
# The case of a single input file
logging.info("Processing input as single file.")
img_info = reader.retrieve_image_info(INPUT_SOURCE)
if not img_info:
raise MappingAbortionError("Could not retrieve image information. Aborting.")

OutputWriter_SEM.save_the_file(img_info, OUTPUT_PATH)

#with open(OUTPUT_PATH, 'w', encoding="utf-8") as f:
#json.dump(img_info, f, indent=4, ensure_ascii=False)

img_info = reader.retrieve_image_info(INPUT_SOURCE)
if not img_info:
logging.error('Could not retrieve image information due to unknown error. Aborting.')
exit(1)
with open(OUTPUT_PATH, 'w', encoding="utf-8") as f:
json.dump(img_info, f, indent=4, ensure_ascii=False)
except MappingAbortionError as e:
#logging.error(f"MappingAbortionError: {e}")
exit(e)

finally:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, I was not even aware that 'finally' would run even on exit call. TIL :)

if reader:
reader.clean_up()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you need to implement the clean_up function.
Since it is likely identical for tomo and sem, it should ideally be inherited from a common base class

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For a first quick solution, a separate clean_up function has been added in IO/sem/InputReader. A common Clean_up base class will be implement to unify cleanup logic across reader types.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking more in the direction of an InputReader base class that either provides the interface for the clean_up method or (more likely) even implements it, since it likely always treats a working_dir used by inputReaders in the same way. Maybe there is even more overlap, especially in regards to parser handling.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The InputReader base class has been implement in a distinct branch dev_inputreader_base_class.


if __name__ == '__main__':
run_cli()
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ public String uri() {

@Override
public MimeType[] inputTypes() {
return new MimeType[]{MimeTypeUtils.parseMimeType("image/tiff"), MimeTypeUtils.parseMimeType("text/plain")};
return new MimeType[]{MimeTypeUtils.parseMimeType("image/tiff"), MimeTypeUtils.parseMimeType("text/plain"), MimeTypeUtils.parseMimeType("application/zip")};
}

@Override
public MimeType[] outputTypes() {
return new MimeType[]{MimeTypeUtils.APPLICATION_JSON};
return new MimeType[]{MimeTypeUtils.APPLICATION_JSON, MimeTypeUtils.parseMimeType("application/zip")};
}

@Override
Expand Down
30 changes: 21 additions & 9 deletions src/IO/sem/InputReader.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import logging
import mimetypes
import os
import shutil

from src.IO.MappingAbortionError import MappingAbortionError
from src.parser.ImageParser import ParserMode
from src.parser.ParserFactory import ParserFactory
from src.util import load_json, get_filetype_with_magica, robust_textfile_read
from src.util import is_zipfile, extract_zip_file, load_json, get_filetype_with_magica, robust_textfile_read


class InputReader:

mapping = None
parser_names = None
temp_dir_path: str = None

def __init__(self, map_path, input_path):
logging.info("Preparing parsers based on parsing map file and input.")
Expand All @@ -20,15 +22,18 @@ def __init__(self, map_path, input_path):
if not os.path.exists(input_path):
logging.error("Input file {} does not exist. Aborting".format(input_path))
raise MappingAbortionError("Input file loading failed.")

if is_zipfile(input_path):
self.temp_dir_path = extract_zip_file(input_path)
else:
self.parser_names = self.get_applicable_parsers(input_path)

self.parser_names = self.get_applicable_parsers(input_path)

if not self.parser_names:
logging.error("No applicable parsers found for input {}".format(input_path))
mimetype_set = list(set([v.expected_input_format() for v in ParserFactory.available_img_parsers.values()]))
logging.info("Supported mimetypes: {}".format(mimetype_set))
raise MappingAbortionError("Input file parsing aborted.")
logging.info("Applicable parsers: {}".format(", ".join(self.parser_names)))
if not self.parser_names:
logging.error("No applicable parsers found for input {}".format(input_path))
mimetype_set = list(set([v.expected_input_format() for v in ParserFactory.available_img_parsers.values()]))
logging.info("Supported mimetypes: {}".format(mimetype_set))
raise MappingAbortionError("Input file parsing aborted.")
logging.info("Applicable parsers: {}".format(", ".join(self.parser_names)))


@staticmethod
Expand Down Expand Up @@ -82,3 +87,10 @@ def retrieve_image_info(self, input_path):
if result and result.image_metadata:
output_dict = result.image_metadata.to_schema_dict()
return output_dict

def clean_up(self):
if self.temp_dir_path:
shutil.rmtree(self.temp_dir_path)
logging.debug("Temp folder deletion: {} - {}".format(self.temp_dir_path, os.path.exists(self.temp_dir_path)))
else:
logging.debug("No temp folder used, nothing to clean up.")
45 changes: 45 additions & 0 deletions src/IO/sem/OutputWriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
import json
import logging
import zipfile

from src.IO.MappingAbortionError import MappingAbortionError


class OutputWriter:

@staticmethod
def save_the_file(mapped_metadata, file_path):
try:
with open(file_path, 'w', encoding="utf-8") as json_file:
json.dump(mapped_metadata, json_file, indent=4, ensure_ascii=False)
logging.info("The output document has been created successfully!")
except (FileNotFoundError, PermissionError, IsADirectoryError, OSError, TypeError, ValueError) as e:
logging.error(f"Unable to save {file_path}: {e}")
raise MappingAbortionError(f"Failed to save {file_path}.")

@staticmethod
def save_to_zip(file_path_list, zip_file_path):
try:
with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zf:
# "ZIP_DEFLATED" is a lossless compression algorithm, meaning no data is lost during the compression process.
for file_path in file_path_list:
try:
zf.write(file_path, os.path.basename(file_path))
logging.debug(f"Added {file_path} to zip.")
except (FileNotFoundError, PermissionError, IsADirectoryError, OSError, zipfile.BadZipFile) as e:
logging.error(f"Adding {file_path} to zip was not successful: {e}")
raise MappingAbortionError(f"Failed to add {file_path} to zip.")
logging.info(f"Files have been zipped into {zip_file_path} sucessfully!")
except MappingAbortionError as e:
logging.error(f"Failed to create zip file at {zip_file_path}: {e}")
raise MappingAbortionError(f"Failed to save to zip.")

# Delete the original files after zipping
for file_path in file_path_list:
try:
os.remove(file_path)
logging.info(f"{file_path} has been deleted.")
except (FileNotFoundError, PermissionError, IsADirectoryError, OSError) as e:
logging.warning(f"{file_path} to zip was not deleted: {e}")
raise MappingAbortionError(f"Failed to delete file {file_path} after zip.")
3 changes: 2 additions & 1 deletion src/parser/impl/TxtParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from PIL import Image

from src.IO.MappingAbortionError import MappingAbortionError
from src.Preprocessor import Preprocessor
from src.model.ImageMD import ImageMD
from src.parser.ImageParser import ImageParser, ParserMode
Expand Down Expand Up @@ -36,7 +37,7 @@ def parse(self, file_path, mapping) -> tuple[ImageMD, str]:

if not mapping and not self.internal_mapping:
logging.error("No mapping provided for image parsing. Aborting")
exit(1)
raise MappingAbortionError("Image parsing failed.")
mapping_dict = mapping if mapping else self.internal_mapping
image_md = map_a_dict(input_md, mapping_dict)
#print("image_md: ", image_md)
Expand Down
Loading