diff --git a/python/magika/__init__.py b/python/magika/__init__.py
index 047a2b71..45299ba0 100644
--- a/python/magika/__init__.py
+++ b/python/magika/__init__.py
@@ -15,7 +15,8 @@
 
 import dotenv
 
-from magika import magika, prediction_mode
+from magika import magika
+from magika.types import prediction_mode
 
 Magika = magika.Magika
 MagikaError = magika.MagikaError
diff --git a/python/magika/cli/magika.py b/python/magika/cli/magika_python_module_tester.py
similarity index 57%
rename from python/magika/cli/magika.py
rename to python/magika/cli/magika_python_module_tester.py
index a7112534..5b85c87a 100755
--- a/python/magika/cli/magika.py
+++ b/python/magika/cli/magika_python_module_tester.py
@@ -14,25 +14,23 @@
 # limitations under the License.
 
 
-import copy
 import dataclasses
-import hashlib
+import importlib.metadata
 import json
 import logging
 import os
 import sys
 from pathlib import Path
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 import click
-from tabulate import tabulate
 
 from magika import Magika, MagikaError, PredictionMode, colors
-from magika.content_types import ContentTypesManager
 from magika.logger import get_logger
-from magika.types import FeedbackReport, MagikaResult
+from magika.types import ContentTypeLabel, MagikaResult, Status, StatusOr
 
-VERSION = "0.5.2-dev"
+# TODO: the version should be migrated to the magika module, or somewhere else in python/
+VERSION = importlib.metadata.version("magika")
 
 CONTACT_EMAIL = "magika-dev@google.com"
 
@@ -115,12 +113,6 @@
 )
 @click.option("-v", "--verbose", is_flag=True, help="Enable more verbose output.")
 @click.option("-vv", "--debug", is_flag=True, help="Enable debug logging.")
-@click.option(
-    "--generate-report",
-    "generate_report_flag",
-    is_flag=True,
-    help="Generate report useful when reporting feedback.",
-)
 @click.option(
     "--dump-performance-stats",
     "dump_performance_stats_flag",
@@ -130,12 +122,6 @@
 @click.option(
     "--version", "output_version", is_flag=True, help="Print the version and exit."
 )
-@click.option(
-    "--list-output-content-types",
-    "list_output_content_types",
-    is_flag=True,
-    help="Show a list of supported content types.",
-)
 @click.option(
     "--model-dir",
     type=click.Path(
@@ -158,10 +144,8 @@ def main(
     with_colors: bool,
     verbose: bool,
     debug: bool,
-    generate_report_flag: bool,
     dump_performance_stats_flag: bool,
     output_version: bool,
-    list_output_content_types: bool,
     model_dir: Optional[Path],
 ) -> None:
     """
@@ -178,6 +162,9 @@ def main(
         with_colors = False
 
     _l = get_logger(use_colors=with_colors)
+    _l.warning(
+        "This CLI is deprecated and only used for testing the python module! Use the Rust CLI instead."
+    )
 
     if verbose:
         _l.setLevel(logging.INFO)
@@ -189,14 +176,6 @@ def main(
         _l.raw_print_to_stdout(f"Default model: {Magika.get_default_model_name()}")
         sys.exit(0)
 
-    # check CLI arguments and options
-    if list_output_content_types:
-        if len(files_paths) > 0:
-            _l.error("You cannot pass any path when using the -l / --list option.")
-            sys.exit(1)
-        print_output_content_types_list()
-        sys.exit(0)
-
     if len(files_paths) == 0:
         _l.error("You need to pass at least one path, or - to read from stdin.")
         sys.exit(1)
@@ -244,7 +223,7 @@ def main(
                 _l.error(f'File or directory "{str(p)}" does not exist.')
                 sys.exit(1)
         # the resulting list may still include some directories; thus, we filter them out.
-        files_paths = list(filter(lambda x: not x.is_dir(), expanded_paths))
+        files_paths: List[Path] = list(filter(lambda x: not x.is_dir(), expanded_paths))  # type: ignore[no-redef]
 
     _l.info(f"Considering {len(files_paths)} files")
     _l.debug(f"Files: {files_paths}")
@@ -284,84 +263,85 @@ def main(
     }
 
     # updated only when we need to output in JSON format
-    all_predictions: List[MagikaResult] = []
-
-    # used only when the user decides to generate a feedback report
-    report_entries: List[FeedbackReport] = []
+    all_predictions: List[Tuple[Path, StatusOr[MagikaResult]]] = []
 
     batches_num = len(files_paths) // batch_size
     if len(files_paths) % batch_size != 0:
         batches_num += 1
     for batch_idx in range(batches_num):
-        files_ = files_paths[batch_idx * batch_size : (batch_idx + 1) * batch_size]
+        batch_files_paths = files_paths[
+            batch_idx * batch_size : (batch_idx + 1) * batch_size
+        ]
 
         if should_read_from_stdin(files_paths):
             batch_predictions = [get_magika_result_from_stdin(magika)]
         else:
-            batch_predictions = magika.identify_paths(files_)
+            batch_predictions = magika.identify_paths(batch_files_paths)
 
         if json_output:
             # we do not stream the output for JSON output
-            all_predictions.extend(batch_predictions)
+            all_predictions.extend(zip(batch_files_paths, batch_predictions))
         elif jsonl_output:
-            for magika_result in batch_predictions:
-                _l.raw_print_to_stdout(json.dumps(dataclasses.asdict(magika_result)))
+            for file_path, result in zip(batch_files_paths, batch_predictions):
+                _l.raw_print_to_stdout(
+                    json.dumps(path_and_result_to_dict(file_path, result))
+                )
         else:
-            for magika_result in batch_predictions:
-                path = magika_result.path
-                output_ct_label = magika_result.output.ct_label
-                output_ct_description = magika_result.output.description
-                output_ct_group = magika_result.output.group
-
-                if mime_output:
-                    # If the user requested the MIME type, we use the mime type
-                    # regardless of the compatibility mode.
-                    output = magika_result.output.mime_type
-                elif label_output:
-                    output = magika_result.output.ct_label
-                elif magic_compatibility_mode:
-                    output = magika_result.output.magic
-                else:  # human-readable description
-                    dl_ct_label = magika_result.dl.ct_label
-
-                    output = f"{output_ct_description} ({output_ct_group})"
-
-                    if dl_ct_label is not None and dl_ct_label != output_ct_label:
-                        # it seems that we had a too-low confidence prediction
-                        # from the model. Let's warn the user about our best
-                        # bet.
-                        assert magika_result.dl.score is not None
-                        dl_description = magika_result.dl.description
-                        dl_group = magika_result.dl.group
-                        dl_score = int(magika_result.dl.score * 100)
-                        output += f" [Low-confidence model best-guess: {dl_description} ({dl_group}), score={dl_score}]"
-
-                if with_colors:
-                    start_color = color_by_group.get(output_ct_group, colors.WHITE)
-                    end_color = colors.RESET
-
-                if output_score:
-                    score = int(magika_result.output.score * 100)
+            for file_path, result in zip(batch_files_paths, batch_predictions):
+                if result.ok:
+                    if mime_output:
+                        # If the user requested the MIME type, we use the mime type
+                        # regardless of the compatibility mode.
+                        output = result.value.output.mime_type
+                    elif label_output:
+                        output = str(result.value.output.label)
+                    else:  # human-readable description
+                        output = f"{result.value.output.description} ({result.value.output.group})"
+
+                        if (
+                            result.value.dl.label != ContentTypeLabel.UNDEFINED
+                            and result.value.dl.label != result.value.output.label
+                        ):
+                            # it seems that we had a too-low confidence prediction
+                            # from the model. Let's warn the user about our best
+                            # bet.
+                            output += (
+                                " [Low-confidence model best-guess: "
+                                f"{result.value.dl.description} ({result.value.dl.group}), "
+                                f"score={result.value.score}]"
+                            )
+
+                    if with_colors:
+                        start_color = color_by_group.get(
+                            result.value.output.group, colors.WHITE
+                        )
+                        end_color = colors.RESET
+                else:
+                    output = result.status
+                    start_color = ""
+                    end_color = ""
+
+                if output_score and result.ok:
+                    score = int(result.value.score * 100)
                     _l.raw_print_to_stdout(
-                        f"{start_color}{path}: {output} {score}%{end_color}"
+                        f"{start_color}{file_path}: {output} {score}%{end_color}"
                     )
                 else:
-                    _l.raw_print_to_stdout(f"{start_color}{path}: {output}{end_color}")
-
-        if generate_report_flag:
-            for file_path, magika_result in zip(files_, batch_predictions):
-                report_entries.append(
-                    generate_feedback_report(magika, file_path, magika_result)
-                )
+                    _l.raw_print_to_stdout(
+                        f"{start_color}{file_path}: {output}{end_color}"
+                    )
 
     if json_output:
         _l.raw_print_to_stdout(
-            json.dumps([dataclasses.asdict(res) for res in all_predictions], indent=4)
+            json.dumps(
+                [
+                    path_and_result_to_dict(file_path, result)
+                    for file_path, result in all_predictions
+                ],
+                indent=4,
+            )
         )
 
-    if generate_report_flag:
-        print_feedback_report(magika=magika, reports=report_entries)
-
     if dump_performance_stats_flag:
         magika.dump_performance_stats()
 
@@ -370,92 +350,21 @@ def should_read_from_stdin(files_paths: List[Path]) -> bool:
     return len(files_paths) == 1 and str(files_paths[0]) == "-"
 
 
-def get_magika_result_from_stdin(magika: Magika) -> MagikaResult:
+def get_magika_result_from_stdin(magika: Magika) -> StatusOr[MagikaResult]:
     content = sys.stdin.buffer.read()
     result = magika.identify_bytes(content)
     return result
 
 
-def generate_feedback_report(
-    magika: Magika, file_path: Path, magika_result: MagikaResult
-) -> FeedbackReport:
-    magika_result_copy = copy.copy(magika_result)
-    magika_result_copy.path = "<REMOVED>"  # avoid PII
-    features = Magika._extract_features_from_path(
-        file_path,
-        beg_size=magika._input_sizes["beg"],
-        mid_size=magika._input_sizes["mid"],
-        end_size=magika._input_sizes["end"],
-        padding_token=magika._padding_token,
-        block_size=magika._block_size,
-    )
-    return FeedbackReport(
-        hash=hashlib.sha256(file_path.read_bytes()).hexdigest(),
-        features=features,
-        result=magika_result_copy,
-    )
-
-
-def print_feedback_report(magika: Magika, reports: List[FeedbackReport]) -> None:
-    _l = get_logger()
-
-    processed_reports = [
-        {
-            "hash": report.hash,
-            "features": json.dumps(dataclasses.asdict(report.features)).replace(
-                " ", ""
-            ),
-            "result": dataclasses.asdict(report.result),
+def path_and_result_to_dict(file_path: Path, result: StatusOr[MagikaResult]) -> dict:
+    if result.ok:
+        out = {
+            "path": str(file_path),
+            "result": {"status": Status.OK, "value": dataclasses.asdict(result.value)},
         }
-        for report in reports
-    ]
-
-    full_report = {
-        "version": VERSION,
-        "model_dir_name": magika.get_model_name(),
-        "python_version": sys.version,
-        "reports": processed_reports,
-    }
-    report_header = "REPORT"
-    report_header_full_len = 40
-    _l.raw_print("#" * report_header_full_len)
-    _l.raw_print(
-        "###"
-        + (" " * ((report_header_full_len - 6 - len(report_header)) // 2))
-        + report_header
-        + (" " * ((report_header_full_len - 6 - len(report_header)) // 2))
-        + "###",
-    )
-    _l.raw_print("#" * report_header_full_len)
-    _l.raw_print(json.dumps(full_report))
-    _l.raw_print("#" * report_header_full_len)
-    _l.raw_print(
-        f"Please copy/paste the above as a description of your issue. Open a GitHub issue or reach out at {CONTACT_EMAIL}.",
-    )
-    _l.raw_print(
-        "Please include as many details as possible, e.g., what was the expected content type.",
-    )
-    _l.raw_print(
-        "IMPORTANT: do NOT submit private information or PII! The extracted features include many bytes of the tested files!",
-    )
-
-
-def print_output_content_types_list() -> None:
-    _l = get_logger()
-
-    ctm = ContentTypesManager()
-    content_types = ctm.get_output_content_types()
-
-    headers = ["#", "Content Type Label", "Description"]
-    rows = []
-    for ct_idx, ct in enumerate(content_types):
-        row = [
-            ct_idx + 1,
-            ct.name,
-            "" if ct.description is None else ct.description,
-        ]
-        rows.append(row)
-    _l.raw_print_to_stdout(tabulate(rows, headers=headers))
+    else:
+        out = {"path": str(file_path), "result": {"status": result.status}}
+    return out
 
 
 if __name__ == "__main__":
diff --git a/python/magika/config/content_types_config.json b/python/magika/config/content_types_config.json
deleted file mode 100644
index 429905d9..00000000
--- a/python/magika/config/content_types_config.json
+++ /dev/null
@@ -1,4760 +0,0 @@
-{
-    "3gp": {
-        "name": "3gp",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ace": {
-        "name": "ace",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "aff": {
-        "name": "aff",
-        "extensions": [
-            "aff"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ai": {
-        "name": "ai",
-        "extensions": [
-            "ai"
-        ],
-        "mime_type": "application/pdf",
-        "group": "document",
-        "magic": "PDF document",
-        "description": "Adobe Illustrator Artwork",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "ai",
-        "target_label": "ai",
-        "correct_labels": [
-            "ai",
-            "pdf"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "algol68": {
-        "name": "algol68",
-        "extensions": [
-            "a68"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "apk": {
-        "name": "apk",
-        "extensions": [
-            "apk"
-        ],
-        "mime_type": "application/vnd.android.package-archive",
-        "group": "executable",
-        "magic": "Java archive data",
-        "description": "Android package",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "zip",
-        "target_label": "apk",
-        "correct_labels": [
-            "apk"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "appleplist": {
-        "name": "appleplist",
-        "extensions": [
-            "bplist",
-            "plist"
-        ],
-        "mime_type": "application/x-plist",
-        "group": "application",
-        "magic": "Apple binary property list",
-        "description": "Apple property list",
-        "vt_type": "appleplist",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "text"
-        ],
-        "model_target_label": "appleplist",
-        "target_label": "appleplist",
-        "correct_labels": [
-            "appleplist"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "arj": {
-        "name": "arj",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "asm": {
-        "name": "asm",
-        "extensions": [
-            "S",
-            "asm"
-        ],
-        "mime_type": "text/x-asm",
-        "group": "code",
-        "magic": "assembler source",
-        "description": "Assembly",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "asm",
-        "target_label": "asm",
-        "correct_labels": [
-            "asm"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "asp": {
-        "name": "asp",
-        "extensions": [
-            "aspx",
-            "asp"
-        ],
-        "mime_type": "text/html",
-        "group": "code",
-        "magic": "HTML document",
-        "description": "ASP source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "asp",
-        "target_label": "asp",
-        "correct_labels": [
-            "asp"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "avi": {
-        "name": "avi",
-        "extensions": [
-            "avi"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ax": {
-        "name": "ax",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "batch": {
-        "name": "batch",
-        "extensions": [
-            "bat"
-        ],
-        "mime_type": "text/x-msdos-batch",
-        "group": "code",
-        "magic": "DOS batch file",
-        "description": "DOS batch file",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "batch",
-        "target_label": "batch",
-        "correct_labels": [
-            "batch"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "bcad": {
-        "name": "bcad",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "bib": {
-        "name": "bib",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "bmp": {
-        "name": "bmp",
-        "extensions": [
-            "bmp"
-        ],
-        "mime_type": "image/bmp",
-        "group": "image",
-        "magic": "PC bitmap",
-        "description": "BMP image data",
-        "vt_type": "bmp",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "bmp",
-        "target_label": "bmp",
-        "correct_labels": [
-            "bmp"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "bpl": {
-        "name": "bpl",
-        "extensions": [
-            "bpl"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "brainfuck": {
-        "name": "brainfuck",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "bzip": {
-        "name": "bzip",
-        "extensions": [
-            "bz2",
-            "tbz2",
-            "tar.bz2"
-        ],
-        "mime_type": "application/x-bzip2",
-        "group": "archive",
-        "magic": "bzip2 compressed data",
-        "description": "bzip2 compressed data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "archive"
-        ],
-        "model_target_label": "bzip",
-        "target_label": "bzip",
-        "correct_labels": [
-            "bzip"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "c": {
-        "name": "c",
-        "extensions": [
-            "c",
-            "cpp",
-            "h",
-            "hpp",
-            "cc"
-        ],
-        "mime_type": "text/x-c",
-        "group": "code",
-        "magic": "C source",
-        "description": "C source",
-        "vt_type": "c,cpp",
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "c",
-        "target_label": "c",
-        "correct_labels": [
-            "c",
-            "cpp"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "cab": {
-        "name": "cab",
-        "extensions": [
-            "cab"
-        ],
-        "mime_type": "application/vnd.ms-cab-compressed",
-        "group": "archive",
-        "magic": "Microsoft Cabinet archive data",
-        "description": "Microsoft Cabinet archive data",
-        "vt_type": "cab",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "cab",
-        "target_label": "cab",
-        "correct_labels": [
-            "cab"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "cad": {
-        "name": "cad",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "cat": {
-        "name": "cat",
-        "extensions": [
-            "cat"
-        ],
-        "mime_type": "application/octet-stream",
-        "group": "application",
-        "magic": "data",
-        "description": "Windows Catalog file",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "cat",
-        "target_label": "cat",
-        "correct_labels": [
-            "cat",
-            "ctl"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "cdf": {
-        "name": "cdf",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "chm": {
-        "name": "chm",
-        "extensions": [
-            "chm"
-        ],
-        "mime_type": "application/chm",
-        "group": "application",
-        "magic": "MS Windows HtmlHelp Data",
-        "description": "MS Windows HtmlHelp Data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "chm",
-        "target_label": "chm",
-        "correct_labels": [
-            "chm"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "clojure": {
-        "name": "clojure",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "cmake": {
-        "name": "cmake",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "cobol": {
-        "name": "cobol",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "coff": {
-        "name": "coff",
-        "extensions": [],
-        "mime_type": "application/x-coff",
-        "group": "executable",
-        "magic": "Intel 80386 COFF",
-        "description": "Intel 80386 COFF",
-        "vt_type": "coff",
-        "datasets": [
-            "vt-type"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "coff",
-        "target_label": "coff",
-        "correct_labels": [
-            "coff",
-            "exp"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "coffee": {
-        "name": "coffee",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "com": {
-        "name": "com",
-        "extensions": [],
-        "mime_type": "application/x-dosexec",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": "com",
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "cpl": {
-        "name": "cpl",
-        "extensions": [
-            "cpl"
-        ],
-        "mime_type": "application/x-dosexec",
-        "group": "executable",
-        "magic": "PE32 executable",
-        "description": "PE Windows executable",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "pebin"
-        ],
-        "model_target_label": "pebin",
-        "target_label": "pebin",
-        "correct_labels": [
-            "pebin",
-            "cpl"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "cpp": {
-        "name": "cpp",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "crx": {
-        "name": "crx",
-        "extensions": [
-            "crx"
-        ],
-        "mime_type": "application/x-chrome-extension",
-        "group": "executable",
-        "magic": "Google Chrome extension",
-        "description": "Google Chrome extension",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "crx",
-        "target_label": "crx",
-        "correct_labels": [
-            "crx"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "cs": {
-        "name": "cs",
-        "extensions": [
-            "cs"
-        ],
-        "mime_type": "text/plain",
-        "group": "code",
-        "magic": "ASCII text",
-        "description": "C# source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "cs",
-        "target_label": "cs",
-        "correct_labels": [
-            "cs"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "css": {
-        "name": "css",
-        "extensions": [
-            "css"
-        ],
-        "mime_type": "text/css",
-        "group": "code",
-        "magic": "ASCII text",
-        "description": "CSS source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "css",
-        "target_label": "css",
-        "correct_labels": [
-            "css"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "csv": {
-        "name": "csv",
-        "extensions": [
-            "csv"
-        ],
-        "mime_type": "text/csv",
-        "group": "code",
-        "magic": "CSV text",
-        "description": "CSV document",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "csv",
-        "target_label": "csv",
-        "correct_labels": [
-            "csv"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "ctl": {
-        "name": "ctl",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "dart": {
-        "name": "dart",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "deb": {
-        "name": "deb",
-        "extensions": [
-            "deb"
-        ],
-        "mime_type": "application/vnd.debian.binary-package",
-        "group": "archive",
-        "magic": "Debian binary package",
-        "description": "Debian binary package",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "archive"
-        ],
-        "model_target_label": "deb",
-        "target_label": "deb",
-        "correct_labels": [
-            "deb"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "dex": {
-        "name": "dex",
-        "extensions": [
-            "dex"
-        ],
-        "mime_type": "application/x-android-dex",
-        "group": "executable",
-        "magic": "Dalvik dex file",
-        "description": "Dalvik dex file",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "dex",
-        "target_label": "dex",
-        "correct_labels": [
-            "dex"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "dey": {
-        "name": "dey",
-        "extensions": [],
-        "mime_type": "application/x-android-dey",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "diff": {
-        "name": "diff",
-        "extensions": [
-            "diff"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "directory": {
-        "name": "directory",
-        "extensions": [],
-        "mime_type": "inode/directory",
-        "group": "inode",
-        "magic": "directory",
-        "description": "A directory",
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": "directory",
-        "correct_labels": [
-            "directory"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": false
-    },
-    "dll": {
-        "name": "dll",
-        "extensions": [
-            "dll"
-        ],
-        "mime_type": "application/x-dosexec",
-        "group": "executable",
-        "magic": "PE Windows executable",
-        "description": "PE Windows executable",
-        "vt_type": "pedll",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "pebin"
-        ],
-        "model_target_label": "pebin",
-        "target_label": "pebin",
-        "correct_labels": [
-            "pebin",
-            "dll"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "dm": {
-        "name": "dm",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "dmg": {
-        "name": "dmg",
-        "extensions": [
-            "dmg"
-        ],
-        "mime_type": "application/x-apple-diskimage",
-        "group": "archive",
-        "magic": "Apple disk image",
-        "description": "Apple disk image",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "dmg",
-        "target_label": "dmg",
-        "correct_labels": [
-            "dmg"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "doc": {
-        "name": "doc",
-        "extensions": [
-            "doc"
-        ],
-        "mime_type": "application/msword",
-        "group": "document",
-        "magic": "Composite Document File",
-        "description": "Microsoft Word CDF document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "cdf"
-        ],
-        "model_target_label": "cdf",
-        "target_label": "doc",
-        "correct_labels": [
-            "doc"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "dockerfile": {
-        "name": "dockerfile",
-        "extensions": [
-            "=Dockerfile"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "docx": {
-        "name": "docx",
-        "extensions": [
-            "docx",
-            "docm"
-        ],
-        "mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-        "group": "document",
-        "magic": "Microsoft Word 2007+",
-        "description": "Microsoft Word 2007+ document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "ooxml",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "zip",
-        "target_label": "docx",
-        "correct_labels": [
-            "docx",
-            "tmdx"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "dosmbr": {
-        "name": "dosmbr",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "dylib": {
-        "name": "dylib",
-        "extensions": [
-            "dylib"
-        ],
-        "mime_type": "application/x-mach-o",
-        "group": "executable",
-        "magic": "Mach-O executable",
-        "description": "Mach-O executable",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "macho"
-        ],
-        "model_target_label": "macho",
-        "target_label": "macho",
-        "correct_labels": [
-            "macho",
-            "dylib"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "elf": {
-        "name": "elf",
-        "extensions": [
-            "elf",
-            "so"
-        ],
-        "mime_type": "application/x-executable-elf",
-        "group": "executable",
-        "magic": "ELF executable",
-        "description": "ELF executable",
-        "vt_type": "elf",
-        "datasets": [
-            "vt-type"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "elf"
-        ],
-        "model_target_label": "elf",
-        "target_label": "elf",
-        "correct_labels": [
-            "elf",
-            "so"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "elixir": {
-        "name": "elixir",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "emf": {
-        "name": "emf",
-        "extensions": [
-            "emf"
-        ],
-        "mime_type": "application/octet-stream",
-        "group": "application",
-        "magic": "Windows Enhanced Metafile",
-        "description": "Windows Enhanced Metafile image data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "emf",
-        "target_label": "emf",
-        "correct_labels": [
-            "emf"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "eml": {
-        "name": "eml",
-        "extensions": [
-            "eml"
-        ],
-        "mime_type": "message/rfc822",
-        "group": "text",
-        "magic": "RFC 822 mail",
-        "description": "RFC 822 mail",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "eml",
-        "target_label": "eml",
-        "correct_labels": [
-            "eml"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "empty": {
-        "name": "empty",
-        "extensions": [],
-        "mime_type": "inode/x-empty",
-        "group": "inode",
-        "magic": "empty",
-        "description": "Empty file",
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": "empty",
-        "correct_labels": [
-            "empty"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": false
-    },
-    "epub": {
-        "name": "epub",
-        "extensions": [
-            "epub"
-        ],
-        "mime_type": "application/epub+zip",
-        "group": "document",
-        "magic": "EPUB document",
-        "description": "EPUB document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "zip",
-        "target_label": "epub",
-        "correct_labels": [
-            "epub"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "erlang": {
-        "name": "erlang",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ese": {
-        "name": "ese",
-        "extensions": [],
-        "mime_type": "application/x-ms-ese",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "exe": {
-        "name": "exe",
-        "extensions": [
-            "exe"
-        ],
-        "mime_type": "application/x-dosexec",
-        "group": "executable",
-        "magic": "ELF executable",
-        "description": "ELF executable",
-        "vt_type": "peexe",
-        "datasets": [
-            "vt-ext",
-            "vt-ext-malicious"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "pebin"
-        ],
-        "model_target_label": "pebin",
-        "target_label": "pebin",
-        "correct_labels": [
-            "pebin",
-            "exe"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "exp": {
-        "name": "exp",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "flac": {
-        "name": "flac",
-        "extensions": [
-            "flac"
-        ],
-        "mime_type": "audio/flac",
-        "group": "audio",
-        "magic": "FLAC audio bitstream data",
-        "description": "FLAC audio bitstream data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "flac",
-        "target_label": "flac",
-        "correct_labels": [
-            "flac"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "fortran": {
-        "name": "fortran",
-        "extensions": [
-            "f90",
-            "f95",
-            "f03"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "fpx": {
-        "name": "fpx",
-        "extensions": [
-            "fpx"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": "fpx",
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "gif": {
-        "name": "gif",
-        "extensions": [
-            "gif"
-        ],
-        "mime_type": "image/gif",
-        "group": "image",
-        "magic": "GIF image data",
-        "description": "GIF image data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "gif",
-        "target_label": "gif",
-        "correct_labels": [
-            "gif"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "go": {
-        "name": "go",
-        "extensions": [
-            "go"
-        ],
-        "mime_type": "text/x-golang",
-        "group": "code",
-        "magic": "ASCII text",
-        "description": "Golang source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "go",
-        "target_label": "go",
-        "correct_labels": [
-            "go"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "gpx": {
-        "name": "gpx",
-        "extensions": [
-            "gpx"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "groovy": {
-        "name": "groovy",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "gzip": {
-        "name": "gzip",
-        "extensions": [
-            "gz",
-            "gzip",
-            "tgz",
-            "tar.gz"
-        ],
-        "mime_type": "application/gzip",
-        "group": "archive",
-        "magic": "gzip compressed data",
-        "description": "gzip compressed data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "archive"
-        ],
-        "model_target_label": "gzip",
-        "target_label": "gzip",
-        "correct_labels": [
-            "gzip"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "haskell": {
-        "name": "haskell",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "hfs": {
-        "name": "hfs",
-        "extensions": [
-            "hfs"
-        ],
-        "mime_type": "application/x-hfs",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "hlp": {
-        "name": "hlp",
-        "extensions": [
-            "hlp"
-        ],
-        "mime_type": "application/winhlp",
-        "group": "application",
-        "magic": "MS Windows help",
-        "description": "MS Windows help",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "hlp",
-        "target_label": "hlp",
-        "correct_labels": [
-            "hlp"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "hta": {
-        "name": "hta",
-        "extensions": [
-            "hta"
-        ],
-        "mime_type": "application/hta",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "html": {
-        "name": "html",
-        "extensions": [
-            "html",
-            "htm",
-            "xhtml",
-            "xht"
-        ],
-        "mime_type": "text/html",
-        "group": "code",
-        "magic": "HTML document",
-        "description": "HTML document",
-        "vt_type": "html",
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "html",
-        "target_label": "html",
-        "correct_labels": [
-            "html"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "hve": {
-        "name": "hve",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ico": {
-        "name": "ico",
-        "extensions": [
-            "ico"
-        ],
-        "mime_type": "image/vnd.microsoft.icon",
-        "group": "image",
-        "magic": "MS Windows icon resource",
-        "description": "MS Windows icon resource",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "ico",
-        "target_label": "ico",
-        "correct_labels": [
-            "ico"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "img": {
-        "name": "img",
-        "extensions": [
-            "img"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ini": {
-        "name": "ini",
-        "extensions": [
-            "ini"
-        ],
-        "mime_type": "text/plain",
-        "group": "text",
-        "magic": "Generic INItialization configuration",
-        "description": "INI configuration file",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "ini",
-        "target_label": "ini",
-        "correct_labels": [
-            "ini"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "internetshortcut": {
-        "name": "internetshortcut",
-        "extensions": [
-            "url"
-        ],
-        "mime_type": "application/x-mswinurl",
-        "group": "application",
-        "magic": "MS Windows 95 Internet shortcut",
-        "description": "MS Windows Internet shortcut",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "internetshortcut",
-        "target_label": "internetshortcut",
-        "correct_labels": [
-            "internetshortcut"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "iosapp": {
-        "name": "iosapp",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "iso": {
-        "name": "iso",
-        "extensions": [
-            "iso"
-        ],
-        "mime_type": "application/x-iso9660-image",
-        "group": "archive",
-        "magic": "ISO 9660 CD-ROM filesystem data",
-        "description": "ISO 9660 CD-ROM filesystem data",
-        "vt_type": "isoimage",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "iso",
-        "target_label": "iso",
-        "correct_labels": [
-            "iso",
-            "udf"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "jar": {
-        "name": "jar",
-        "extensions": [
-            "jar"
-        ],
-        "mime_type": "application/java-archive",
-        "group": "archive",
-        "magic": "Java archive data (JAR)",
-        "description": "Java archive data (JAR)",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "jar",
-        "target_label": "jar",
-        "correct_labels": [
-            "jar"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "java": {
-        "name": "java",
-        "extensions": [
-            "java"
-        ],
-        "mime_type": "text/x-java",
-        "group": "code",
-        "magic": "Java source",
-        "description": "Java source",
-        "vt_type": "java",
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "java",
-        "target_label": "java",
-        "correct_labels": [
-            "java"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "javabytecode": {
-        "name": "javabytecode",
-        "extensions": [
-            "class"
-        ],
-        "mime_type": "application/x-java-applet",
-        "group": "executable",
-        "magic": "compiled Java class data",
-        "description": "Java compiled bytecode",
-        "vt_type": "class",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "javabytecode",
-        "target_label": "javabytecode",
-        "correct_labels": [
-            "javabytecode"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "javascript": {
-        "name": "javascript",
-        "extensions": [
-            "js"
-        ],
-        "mime_type": "application/javascript",
-        "group": "code",
-        "magic": "JavaScript source",
-        "description": "JavaScript source",
-        "vt_type": "javascript",
-        "datasets": [
-            "github",
-            "vt-ext",
-            "vt-ext-malicious"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "javascript",
-        "target_label": "javascript",
-        "correct_labels": [
-            "javascript",
-            "typescript"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "jpeg": {
-        "name": "jpeg",
-        "extensions": [
-            "jpg",
-            "jpeg"
-        ],
-        "mime_type": "image/jpeg",
-        "group": "image",
-        "magic": "JPEG image data",
-        "description": "JPEG image data",
-        "vt_type": "jpeg",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "jpeg",
-        "target_label": "jpeg",
-        "correct_labels": [
-            "jpeg"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "json": {
-        "name": "json",
-        "extensions": [
-            "json"
-        ],
-        "mime_type": "application/json",
-        "group": "code",
-        "magic": "JSON data",
-        "description": "JSON document",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "json",
-        "target_label": "json",
-        "correct_labels": [
-            "json"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "julia": {
-        "name": "julia",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ko": {
-        "name": "ko",
-        "extensions": [
-            "ko"
-        ],
-        "mime_type": "application/x-executable-elf",
-        "group": "executable",
-        "magic": "ELF executable",
-        "description": "ELF executable",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "elf"
-        ],
-        "model_target_label": "elf",
-        "target_label": "elf",
-        "correct_labels": [
-            "elf",
-            "ko"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "kotlin": {
-        "name": "kotlin",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "latex": {
-        "name": "latex",
-        "extensions": [
-            "tex"
-        ],
-        "mime_type": "text/x-tex",
-        "group": "text",
-        "magic": "LaTeX document",
-        "description": "LaTeX document",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "latex",
-        "target_label": "latex",
-        "correct_labels": [
-            "latex"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "lisp": {
-        "name": "lisp",
-        "extensions": [
-            "lisp"
-        ],
-        "mime_type": "text/x-lisp",
-        "group": "code",
-        "magic": "Lisp/Scheme program",
-        "description": "Lisp source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "lisp",
-        "target_label": "lisp",
-        "correct_labels": [
-            "lisp"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "lnk": {
-        "name": "lnk",
-        "extensions": [
-            "lnk"
-        ],
-        "mime_type": "application/x-ms-shortcut",
-        "group": "application",
-        "magic": "MS Windows shortcut",
-        "description": "MS Windows shortcut",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "lnk",
-        "target_label": "lnk",
-        "correct_labels": [
-            "lnk"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "lua": {
-        "name": "lua",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "m3u": {
-        "name": "m3u",
-        "extensions": [
-            "m3u8",
-            "m3u"
-        ],
-        "mime_type": "text/plain",
-        "group": "application",
-        "magic": "M3U playlist",
-        "description": "M3U playlist",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "m3u",
-        "target_label": "m3u",
-        "correct_labels": [
-            "m3u"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "macho": {
-        "name": "macho",
-        "extensions": [],
-        "mime_type": "application/x-mach-o",
-        "group": "executable",
-        "magic": "Mach-O executable",
-        "description": "Mach-O executable",
-        "vt_type": "macho",
-        "datasets": [
-            "vt-type"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "macho"
-        ],
-        "model_target_label": "macho",
-        "target_label": "macho",
-        "correct_labels": [
-            "macho",
-            "dylib"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "maff": {
-        "name": "maff",
-        "extensions": [
-            "maff"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "makefile": {
-        "name": "makefile",
-        "extensions": [
-            "=Makefile"
-        ],
-        "mime_type": "text/x-makefile",
-        "group": "code",
-        "magic": "makefile script",
-        "description": "Makefile source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "makefile",
-        "target_label": "makefile",
-        "correct_labels": [
-            "makefile"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "markdown": {
-        "name": "markdown",
-        "extensions": [
-            "md"
-        ],
-        "mime_type": "text/markdown",
-        "group": "text",
-        "magic": "ASCII text",
-        "description": "Markdown document",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "markdown",
-        "target_label": "markdown",
-        "correct_labels": [
-            "markdown"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "matlab": {
-        "name": "matlab",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "mht": {
-        "name": "mht",
-        "extensions": [
-            "mht"
-        ],
-        "mime_type": "application/x-mimearchive",
-        "group": "code",
-        "magic": "HTML document",
-        "description": "MHTML document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "mht",
-        "target_label": "mht",
-        "correct_labels": [
-            "mht"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "mkv": {
-        "name": "mkv",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "mov": {
-        "name": "mov",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "mp3": {
-        "name": "mp3",
-        "extensions": [
-            "mp3"
-        ],
-        "mime_type": "audio/mpeg",
-        "group": "audio",
-        "magic": "Audio file with ID3",
-        "description": "MP3 media file",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "mp3",
-        "target_label": "mp3",
-        "correct_labels": [
-            "mp3"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "mp4": {
-        "name": "mp4",
-        "extensions": [
-            "mov",
-            "mp4"
-        ],
-        "mime_type": "video/mp4",
-        "group": "video",
-        "magic": "ISO Media",
-        "description": "MP4 media file",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "mp4",
-        "target_label": "mp4",
-        "correct_labels": [
-            "mp4"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "mscompress": {
-        "name": "mscompress",
-        "extensions": [],
-        "mime_type": "application/x-ms-compress-szdd",
-        "group": "archive",
-        "magic": "MS Compress archive data",
-        "description": "MS Compress archive data",
-        "vt_type": "mscompress",
-        "datasets": [
-            "vt-type"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "mscompress",
-        "target_label": "mscompress",
-        "correct_labels": [
-            "mscompress"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "msi": {
-        "name": "msi",
-        "extensions": [
-            "msi"
-        ],
-        "mime_type": "application/x-msi",
-        "group": "archive",
-        "magic": "Composite Document File",
-        "description": "Microsoft Installer file",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "cdf"
-        ],
-        "model_target_label": "msi",
-        "target_label": "msi",
-        "correct_labels": [
-            "msi"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "mst": {
-        "name": "mst",
-        "extensions": [
-            "mst"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "msvisio": {
-        "name": "msvisio",
-        "extensions": [],
-        "mime_type": "application/vnd.ms-visio.drawing.main+xml",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "mui": {
-        "name": "mui",
-        "extensions": [
-            "mui"
-        ],
-        "mime_type": "application/x-dosexec",
-        "group": "application",
-        "magic": "PE Windows executable",
-        "description": "PE Windows executable",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "pebin"
-        ],
-        "model_target_label": "pebin",
-        "target_label": "pebin",
-        "correct_labels": [
-            "pebin",
-            "mui"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "mum": {
-        "name": "mum",
-        "extensions": [
-            "mum"
-        ],
-        "mime_type": "text/xml",
-        "group": "application",
-        "magic": "XML document",
-        "description": "Windows Update Package file",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "mum",
-        "target_label": "mum",
-        "correct_labels": [
-            "mum"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "mun": {
-        "name": "mun",
-        "extensions": [
-            "mun"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "nim": {
-        "name": "nim",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "null": {
-        "name": "null",
-        "extensions": [
-            "null"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "object": {
-        "name": "object",
-        "extensions": [
-            "o"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "objectivec": {
-        "name": "objectivec",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ocaml": {
-        "name": "ocaml",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ocx": {
-        "name": "ocx",
-        "extensions": [
-            "ocx"
-        ],
-        "mime_type": "application/x-dosexec",
-        "group": "executable",
-        "magic": "PE Windows executable",
-        "description": "PE Windows executable",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "pebin"
-        ],
-        "model_target_label": "pebin",
-        "target_label": "pebin",
-        "correct_labels": [
-            "pebin",
-            "ax",
-            "ocx"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "odex": {
-        "name": "odex",
-        "extensions": [
-            "odex"
-        ],
-        "mime_type": "application/x-executable-elf",
-        "group": "executable",
-        "magic": "ELF executable",
-        "description": "ODEX ELF executable",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "elf"
-        ],
-        "model_target_label": "odex",
-        "target_label": "odex",
-        "correct_labels": [
-            "odex",
-            "elf"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "odp": {
-        "name": "odp",
-        "extensions": [
-            "odp"
-        ],
-        "mime_type": "application/vnd.oasis.opendocument.presentation",
-        "group": "document",
-        "magic": "OpenDocument Presentation",
-        "description": "OpenDocument Presentation",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "zip_archive"
-        ],
-        "model_target_label": "odp",
-        "target_label": "odp",
-        "correct_labels": [
-            "odp"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "ods": {
-        "name": "ods",
-        "extensions": [
-            "ods"
-        ],
-        "mime_type": "application/vnd.oasis.opendocument.spreadsheet",
-        "group": "document",
-        "magic": "OpenDocument Spreadsheet",
-        "description": "OpenDocument Spreadsheet",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "zip_archive"
-        ],
-        "model_target_label": "ods",
-        "target_label": "ods",
-        "correct_labels": [
-            "ods"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "odt": {
-        "name": "odt",
-        "extensions": [
-            "odt"
-        ],
-        "mime_type": "application/vnd.oasis.opendocument.text",
-        "group": "document",
-        "magic": "OpenDocument Text",
-        "description": "OpenDocument Text",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "zip_archive"
-        ],
-        "model_target_label": "odt",
-        "target_label": "odt",
-        "correct_labels": [
-            "odt"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "ogg": {
-        "name": "ogg",
-        "extensions": [
-            "ogg"
-        ],
-        "mime_type": "audio/ogg",
-        "group": "audio",
-        "magic": "Ogg data",
-        "description": "Ogg data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "ogg",
-        "target_label": "ogg",
-        "correct_labels": [
-            "ogg"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "ole": {
-        "name": "ole",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ooxml": {
-        "name": "ooxml",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "outlook": {
-        "name": "outlook",
-        "extensions": [],
-        "mime_type": "application/vnd.ms-outlook",
-        "group": "application",
-        "magic": "CDFV2 Microsoft Outlook Message",
-        "description": "MS Outlook Message",
-        "vt_type": "outlook",
-        "datasets": [
-            "vt-type"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "cdf"
-        ],
-        "model_target_label": "cdf",
-        "target_label": "outlook",
-        "correct_labels": [
-            "outlook"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "palmos": {
-        "name": "palmos",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": "palmos",
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "pascal": {
-        "name": "pascal",
-        "extensions": [
-            "pascal"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "pbm": {
-        "name": "pbm",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "pcap": {
-        "name": "pcap",
-        "extensions": [
-            "pcap",
-            "pcapng"
-        ],
-        "mime_type": "application/vnd.tcpdump.pcap",
-        "group": "application",
-        "magic": "pcap capture file",
-        "description": "pcap capture file",
-        "vt_type": "pcap",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "pcap",
-        "target_label": "pcap",
-        "correct_labels": [
-            "pcap"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "pdf": {
-        "name": "pdf",
-        "extensions": [
-            "pdf"
-        ],
-        "mime_type": "application/pdf",
-        "group": "document",
-        "magic": "PDF document",
-        "description": "PDF document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "pdf",
-        "target_label": "pdf",
-        "correct_labels": [
-            "pdf",
-            "ai"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "pebin": {
-        "name": "pebin",
-        "extensions": [
-            "exe",
-            "dll",
-            "sys"
-        ],
-        "mime_type": "application/x-dosexec",
-        "group": "executable",
-        "magic": "PE executable",
-        "description": "PE executable",
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": false
-    },
-    "pem": {
-        "name": "pem",
-        "extensions": [
-            "pem",
-            "pub"
-        ],
-        "mime_type": "application/x-pem-file",
-        "group": "application",
-        "magic": "PEM certificate",
-        "description": "PEM certificate",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "pem",
-        "target_label": "pem",
-        "correct_labels": [
-            "pem",
-            "pgpkey"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "perl": {
-        "name": "perl",
-        "extensions": [
-            "pl"
-        ],
-        "mime_type": "text/x-perl",
-        "group": "code",
-        "magic": "Perl script text executable",
-        "description": "Perl source",
-        "vt_type": "perl",
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "perl",
-        "target_label": "perl",
-        "correct_labels": [
-            "perl"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "pgpkey": {
-        "name": "pgpkey",
-        "extensions": [],
-        "mime_type": "application/pgp-keys",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "php": {
-        "name": "php",
-        "extensions": [
-            "php"
-        ],
-        "mime_type": "text/x-php",
-        "group": "code",
-        "magic": "PHP script",
-        "description": "PHP source",
-        "vt_type": "php",
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "php",
-        "target_label": "php",
-        "correct_labels": [
-            "php"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "png": {
-        "name": "png",
-        "extensions": [
-            "png"
-        ],
-        "mime_type": "image/png",
-        "group": "image",
-        "magic": "PNG image data",
-        "description": "PNG image data",
-        "vt_type": "png",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "png",
-        "target_label": "png",
-        "correct_labels": [
-            "png"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "postscript": {
-        "name": "postscript",
-        "extensions": [
-            "ps"
-        ],
-        "mime_type": "application/postscript",
-        "group": "document",
-        "magic": "PostScript document text",
-        "description": "PostScript document",
-        "vt_type": "postscript",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "postscript",
-        "target_label": "postscript",
-        "correct_labels": [
-            "postscript"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "powershell": {
-        "name": "powershell",
-        "extensions": [
-            "ps1"
-        ],
-        "mime_type": "application/x-powershell",
-        "group": "code",
-        "magic": "a powershell script",
-        "description": "Powershell source",
-        "vt_type": "ps",
-        "datasets": [
-            "github",
-            "vt-ext",
-            "vt-ext-malicious"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "powershell",
-        "target_label": "powershell",
-        "correct_labels": [
-            "powershell"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "ppt": {
-        "name": "ppt",
-        "extensions": [
-            "ppt"
-        ],
-        "mime_type": "application/vnd.ms-powerpoint",
-        "group": "document",
-        "magic": "Composite Document File",
-        "description": "Microsoft PowerPoint CDF document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "cdf"
-        ],
-        "model_target_label": "cdf",
-        "target_label": "ppt",
-        "correct_labels": [
-            "ppt"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "pptx": {
-        "name": "pptx",
-        "extensions": [
-            "pptx",
-            "pptm"
-        ],
-        "mime_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
-        "group": "document",
-        "magic": "Microsoft PowerPoint 2007+",
-        "description": "Microsoft PowerPoint 2007+ document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "ooxml",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "zip",
-        "target_label": "pptx",
-        "correct_labels": [
-            "pptx"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "printfox": {
-        "name": "printfox",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "prolog": {
-        "name": "prolog",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "pub": {
-        "name": "pub",
-        "extensions": [
-            "pub"
-        ],
-        "mime_type": "application/x-mspublisher",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [
-            "binary",
-            "cdf"
-        ],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "python": {
-        "name": "python",
-        "extensions": [
-            "py"
-        ],
-        "mime_type": "text/x-python",
-        "group": "code",
-        "magic": "Python script",
-        "description": "Python source",
-        "vt_type": "python",
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "python",
-        "target_label": "python",
-        "correct_labels": [
-            "python"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "pythonbytecode": {
-        "name": "pythonbytecode",
-        "extensions": [
-            "pyc",
-            "pyo"
-        ],
-        "mime_type": "application/x-bytecode.python",
-        "group": "executable",
-        "magic": "python byte-compiled",
-        "description": "Python compiled bytecode",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "pythonbytecode",
-        "target_label": "pythonbytecode",
-        "correct_labels": [
-            "pythonbytecode"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "pythonpar": {
-        "name": "pythonpar",
-        "extensions": [
-            "par"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "r": {
-        "name": "r",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "randombytes": {
-        "name": "randombytes",
-        "extensions": [],
-        "mime_type": "application/octet-stream",
-        "group": "unknown",
-        "magic": "data",
-        "description": "Random bytes",
-        "vt_type": null,
-        "datasets": [
-            "synthetic"
-        ],
-        "parent": null,
-        "tags": [],
-        "model_target_label": "unknown",
-        "target_label": "unknown",
-        "correct_labels": [
-            "unknown"
-        ],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": true
-    },
-    "rar": {
-        "name": "rar",
-        "extensions": [
-            "rar"
-        ],
-        "mime_type": "application/x-rar",
-        "group": "archive",
-        "magic": "RAR archive data",
-        "description": "RAR archive data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "archive"
-        ],
-        "model_target_label": "rar",
-        "target_label": "rar",
-        "correct_labels": [
-            "rar"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "rdf": {
-        "name": "rdf",
-        "extensions": [
-            "rdf"
-        ],
-        "mime_type": "application/rdf+xml",
-        "group": "text",
-        "magic": "XML document",
-        "description": "Resource Description Framework document (RDF)",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "rdf",
-        "target_label": "rdf",
-        "correct_labels": [
-            "rdf"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "rll": {
-        "name": "rll",
-        "extensions": [
-            "rll"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "rpm": {
-        "name": "rpm",
-        "extensions": [
-            "rpm"
-        ],
-        "mime_type": "application/x-rpm",
-        "group": "archive",
-        "magic": "RPM",
-        "description": "RedHat Package Manager archive (RPM)",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "archive"
-        ],
-        "model_target_label": "rpm",
-        "target_label": "rpm",
-        "correct_labels": [
-            "rpm"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "rst": {
-        "name": "rst",
-        "extensions": [
-            "rst"
-        ],
-        "mime_type": "text/x-rst",
-        "group": "text",
-        "magic": "ReStructuredText file",
-        "description": "ReStructuredText document",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "rst",
-        "target_label": "rst",
-        "correct_labels": [
-            "rst"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "rtf": {
-        "name": "rtf",
-        "extensions": [
-            "rtf"
-        ],
-        "mime_type": "text/rtf",
-        "group": "text",
-        "magic": "Rich Text Format data",
-        "description": "Rich Text Format document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "rtf",
-        "target_label": "rtf",
-        "correct_labels": [
-            "rtf"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "ruby": {
-        "name": "ruby",
-        "extensions": [
-            "rb"
-        ],
-        "mime_type": "application/x-ruby",
-        "group": "code",
-        "magic": "Ruby script",
-        "description": "Ruby source",
-        "vt_type": "ruby",
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "ruby",
-        "target_label": "ruby",
-        "correct_labels": [
-            "ruby"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "rust": {
-        "name": "rust",
-        "extensions": [
-            "rs"
-        ],
-        "mime_type": "application/x-rust",
-        "group": "code",
-        "magic": "ASCII text",
-        "description": "Rust source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "rust",
-        "target_label": "rust",
-        "correct_labels": [
-            "rust"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "s": {
-        "name": "s",
-        "extensions": [
-            "s"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "scala": {
-        "name": "scala",
-        "extensions": [
-            "scala"
-        ],
-        "mime_type": "application/x-scala",
-        "group": "code",
-        "magic": "ASCII text",
-        "description": "Scala source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "scala",
-        "target_label": "scala",
-        "correct_labels": [
-            "scala"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "scr": {
-        "name": "scr",
-        "extensions": [
-            "scr"
-        ],
-        "mime_type": "application/x-dosexec",
-        "group": "executable",
-        "magic": "PE Windows executable",
-        "description": "PE Windows executable",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "pebin"
-        ],
-        "model_target_label": "pebin",
-        "target_label": "pebin",
-        "correct_labels": [
-            "pebin",
-            "scr"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "scriptwsf": {
-        "name": "scriptwsf",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "sevenzip": {
-        "name": "sevenzip",
-        "extensions": [
-            "7z"
-        ],
-        "mime_type": "application/x-7z-compressed",
-        "group": "archive",
-        "magic": "7-zip archive data",
-        "description": "7-zip archive data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "archive"
-        ],
-        "model_target_label": "sevenzip",
-        "target_label": "sevenzip",
-        "correct_labels": [
-            "sevenzip"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "sgml": {
-        "name": "sgml",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "sh3d": {
-        "name": "sh3d",
-        "extensions": [
-            "sh3d"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "shell": {
-        "name": "shell",
-        "extensions": [
-            "sh"
-        ],
-        "mime_type": "text/x-shellscript",
-        "group": "code",
-        "magic": "shell script",
-        "description": "Shell script",
-        "vt_type": "shell",
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "shell",
-        "target_label": "shell",
-        "correct_labels": [
-            "shell"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "smali": {
-        "name": "smali",
-        "extensions": [
-            "smali"
-        ],
-        "mime_type": "application/x-smali",
-        "group": "code",
-        "magic": "ASCII text",
-        "description": "Smali source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "smali",
-        "target_label": "smali",
-        "correct_labels": [
-            "smali"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "so": {
-        "name": "so",
-        "extensions": [
-            "so"
-        ],
-        "mime_type": "application/x-executable-elf",
-        "group": "executable",
-        "magic": "ELF executable",
-        "description": "ELF executable",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "elf"
-        ],
-        "model_target_label": "elf",
-        "target_label": "elf",
-        "correct_labels": [
-            "elf",
-            "so"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "sql": {
-        "name": "sql",
-        "extensions": [
-            "sql"
-        ],
-        "mime_type": "application/x-sql",
-        "group": "code",
-        "magic": "ASCII text",
-        "description": "SQL source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "sql",
-        "target_label": "sql",
-        "correct_labels": [
-            "sql"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "squashfs": {
-        "name": "squashfs",
-        "extensions": [],
-        "mime_type": "application/octet-stream",
-        "group": "archive",
-        "magic": "Squashfs filesystem",
-        "description": "Squash filesystem",
-        "vt_type": "squashfs",
-        "datasets": [
-            "vt-type"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "squashfs",
-        "target_label": "squashfs",
-        "correct_labels": [
-            "squashfs"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "svd": {
-        "name": "svd",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "svg": {
-        "name": "svg",
-        "extensions": [
-            "svg"
-        ],
-        "mime_type": "image/svg+xml",
-        "group": "image",
-        "magic": "SVG Scalable Vector Graphics image",
-        "description": "SVG Scalable Vector Graphics image data",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "svg",
-        "target_label": "svg",
-        "correct_labels": [
-            "svg"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "swf": {
-        "name": "swf",
-        "extensions": [
-            "swf"
-        ],
-        "mime_type": "application/x-shockwave-flash",
-        "group": "executable",
-        "magic": "Macromedia Flash data",
-        "description": "Macromedia Flash data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "swf",
-        "target_label": "swf",
-        "correct_labels": [
-            "swf"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "swift": {
-        "name": "swift",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "symlinktext": {
-        "name": "symlinktext",
-        "extensions": [],
-        "mime_type": "text/plain",
-        "group": "application",
-        "magic": "ASCII text",
-        "description": "Symbolic link (textual representation)",
-        "vt_type": null,
-        "datasets": [
-            "synthetic"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "symlinktext",
-        "target_label": "symlinktext",
-        "correct_labels": [
-            "symlinktext",
-            "txt"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "symlink": {
-        "name": "symlink",
-        "extensions": [],
-        "mime_type": "inode/symlink",
-        "group": "inode",
-        "magic": "symbolic link to <path>",
-        "description": "Symbolic link to <path>",
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": "symlink",
-        "correct_labels": [
-            "symlink"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": false
-    },
-    "sys": {
-        "name": "sys",
-        "extensions": [
-            "sys"
-        ],
-        "mime_type": "application/x-windows-driver",
-        "group": "executable",
-        "magic": "PE Windows executable",
-        "description": "PE Windows executable",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "pebin"
-        ],
-        "model_target_label": "pebin",
-        "target_label": "pebin",
-        "correct_labels": [
-            "pebin",
-            "sys"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "tar": {
-        "name": "tar",
-        "extensions": [
-            "tar"
-        ],
-        "mime_type": "application/x-tar",
-        "group": "archive",
-        "magic": "POSIX tar archive",
-        "description": "POSIX tar archive",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "archive"
-        ],
-        "model_target_label": "tar",
-        "target_label": "tar",
-        "correct_labels": [
-            "tar"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "tga": {
-        "name": "tga",
-        "extensions": [
-            "tga"
-        ],
-        "mime_type": "image/x-tga",
-        "group": "image",
-        "magic": "Targa image data",
-        "description": "Targa image data",
-        "vt_type": "targa",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "tga",
-        "target_label": "tga",
-        "correct_labels": [
-            "tga"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "tiff": {
-        "name": "tiff",
-        "extensions": [
-            "tiff",
-            "tif"
-        ],
-        "mime_type": "image/tiff",
-        "group": "image",
-        "magic": "TIFF image data",
-        "description": "TIFF image data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "tiff",
-        "target_label": "tiff",
-        "correct_labels": [
-            "tiff"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "tmdx": {
-        "name": "tmdx",
-        "extensions": [
-            "tmdx",
-            "tmvx"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "toml": {
-        "name": "toml",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "torrent": {
-        "name": "torrent",
-        "extensions": [
-            "torrent"
-        ],
-        "mime_type": "application/x-bittorrent",
-        "group": "application",
-        "magic": "BitTorrent file",
-        "description": "BitTorrent file",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "torrent",
-        "target_label": "torrent",
-        "correct_labels": [
-            "torrent"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "troff": {
-        "name": "troff",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "ttf": {
-        "name": "ttf",
-        "extensions": [
-            "ttf"
-        ],
-        "mime_type": "font/sfnt",
-        "group": "font",
-        "magic": "TrueType Font data",
-        "description": "TrueType Font data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "ttf",
-        "target_label": "ttf",
-        "correct_labels": [
-            "ttf"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "txt": {
-        "name": "txt",
-        "extensions": [
-            "txt"
-        ],
-        "mime_type": "text/plain",
-        "group": "text",
-        "magic": "ASCII text",
-        "description": "Generic text document",
-        "vt_type": null,
-        "datasets": [
-            "github",
-            "synthetic"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "txt",
-        "target_label": "txt",
-        "correct_labels": [
-            "txt"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "typescript": {
-        "name": "typescript",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "udf": {
-        "name": "udf",
-        "extensions": [],
-        "mime_type": "application/x-udf-image",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "unixcompress": {
-        "name": "unixcompress",
-        "extensions": [
-            "z"
-        ],
-        "mime_type": "application/x-compress",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "unknown": {
-        "name": "unknown",
-        "extensions": [],
-        "mime_type": "application/octet-stream",
-        "group": "unknown",
-        "magic": "data",
-        "description": "Unknown binary data",
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "unknown",
-        "target_label": "unknown",
-        "correct_labels": [
-            "unknown"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": false
-    },
-    "vba": {
-        "name": "vba",
-        "extensions": [
-            "vbs"
-        ],
-        "mime_type": "text/vbscript",
-        "group": "code",
-        "magic": "ASCII text",
-        "description": "MS Visual Basic source (VBA)",
-        "vt_type": "vba",
-        "datasets": [
-            "vt-ext",
-            "vt-ext-malicious"
-        ],
-        "parent": null,
-        "tags": [
-            "text"
-        ],
-        "model_target_label": "vba",
-        "target_label": "vba",
-        "correct_labels": [
-            "vba"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "verilog": {
-        "name": "verilog",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "vhd": {
-        "name": "vhd",
-        "extensions": [],
-        "mime_type": "application/x-vhd",
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "wasm": {
-        "name": "wasm",
-        "extensions": [
-            "wasm"
-        ],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "wav": {
-        "name": "wav",
-        "extensions": [
-            "wav"
-        ],
-        "mime_type": "audio/x-wav",
-        "group": "audio",
-        "magic": "RIFF data",
-        "description": "Waveform Audio file (WAV)",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "wav",
-        "target_label": "wav",
-        "correct_labels": [
-            "wav"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "webm": {
-        "name": "webm",
-        "extensions": [
-            "webm"
-        ],
-        "mime_type": "video/webm",
-        "group": "video",
-        "magic": "WebM",
-        "description": "WebM data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "webm",
-        "target_label": "webm",
-        "correct_labels": [
-            "webm"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "webp": {
-        "name": "webp",
-        "extensions": [
-            "webp"
-        ],
-        "mime_type": "image/webp",
-        "group": "image",
-        "magic": "RIFF data",
-        "description": "WebP data",
-        "vt_type": "webp",
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "media"
-        ],
-        "model_target_label": "webp",
-        "target_label": "webp",
-        "correct_labels": [
-            "webp"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "winregistry": {
-        "name": "winregistry",
-        "extensions": [
-            "reg"
-        ],
-        "mime_type": "text/x-ms-regedit",
-        "group": "application",
-        "magic": "Windows Registry text",
-        "description": "Windows Registry text",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "winregistry",
-        "target_label": "winregistry",
-        "correct_labels": [
-            "winregistry"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "wmf": {
-        "name": "wmf",
-        "extensions": [
-            "wmf"
-        ],
-        "mime_type": "image/wmf",
-        "group": "image",
-        "magic": "Windows metafile",
-        "description": "Windows metafile",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "wmf",
-        "target_label": "wmf",
-        "correct_labels": [
-            "wmf"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "woff": {
-        "name": "woff",
-        "extensions": [],
-        "mime_type": null,
-        "group": null,
-        "magic": null,
-        "description": null,
-        "vt_type": null,
-        "datasets": [],
-        "parent": null,
-        "tags": [],
-        "model_target_label": null,
-        "target_label": null,
-        "correct_labels": [],
-        "in_scope_for_output_content_type": false,
-        "in_scope_for_training": false
-    },
-    "xar": {
-        "name": "xar",
-        "extensions": [
-            "pkg",
-            "xar"
-        ],
-        "mime_type": "application/x-xar",
-        "group": "archive",
-        "magic": "xar archive compressed",
-        "description": "XAR archive compressed data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "archive"
-        ],
-        "model_target_label": "xar",
-        "target_label": "xar",
-        "correct_labels": [
-            "xar"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "xls": {
-        "name": "xls",
-        "extensions": [
-            "xls"
-        ],
-        "mime_type": "application/vnd.ms-excel",
-        "group": "document",
-        "magic": "Composite Document File",
-        "description": "Microsoft Excel CDF document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "cdf"
-        ],
-        "model_target_label": "cdf",
-        "target_label": "xls",
-        "correct_labels": [
-            "xls"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "xlsb": {
-        "name": "xlsb",
-        "extensions": [
-            "xlsb"
-        ],
-        "mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-        "group": "document",
-        "magic": "Microsoft Excel 2007+",
-        "description": "Microsoft Excel 2007+ document (binary format)",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "ooxml",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "zip",
-        "target_label": "xlsb",
-        "correct_labels": [
-            "xlsb",
-            "xlsx"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "xlsx": {
-        "name": "xlsx",
-        "extensions": [
-            "xlsx",
-            "xlsm"
-        ],
-        "mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-        "group": "document",
-        "magic": "Microsoft Excel 2007+",
-        "description": "Microsoft Excel 2007+ document",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "ooxml",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "zip",
-        "target_label": "xlsx",
-        "correct_labels": [
-            "xlsx",
-            "xlsb"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "xml": {
-        "name": "xml",
-        "extensions": [
-            "xml"
-        ],
-        "mime_type": "text/xml",
-        "group": "code",
-        "magic": "XML document",
-        "description": "XML document",
-        "vt_type": "xml",
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "xml",
-        "target_label": "xml",
-        "correct_labels": [
-            "xml"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "xpi": {
-        "name": "xpi",
-        "extensions": [
-            "xpi"
-        ],
-        "mime_type": "application/zip",
-        "group": "archive",
-        "magic": "Zip archive data",
-        "description": "Compressed installation archive (XPI)",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "zip",
-        "target_label": "xpi",
-        "correct_labels": [
-            "xpi"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "xz": {
-        "name": "xz",
-        "extensions": [
-            "xz"
-        ],
-        "mime_type": "application/x-xz",
-        "group": "archive",
-        "magic": "XZ compressed data",
-        "description": "XZ compressed data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "archive"
-        ],
-        "model_target_label": "xz",
-        "target_label": "xz",
-        "correct_labels": [
-            "xz"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "yaml": {
-        "name": "yaml",
-        "extensions": [
-            "yml",
-            "yaml"
-        ],
-        "mime_type": "application/x-yaml",
-        "group": "code",
-        "magic": "ASCII text",
-        "description": "YAML source",
-        "vt_type": null,
-        "datasets": [
-            "github"
-        ],
-        "parent": null,
-        "tags": [
-            "text",
-            "dl_target"
-        ],
-        "model_target_label": "yaml",
-        "target_label": "yaml",
-        "correct_labels": [
-            "yaml"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "zip": {
-        "name": "zip",
-        "extensions": [
-            "zip"
-        ],
-        "mime_type": "application/zip",
-        "group": "archive",
-        "magic": "Zip archive data",
-        "description": "Zip archive data",
-        "vt_type": null,
-        "datasets": [
-            "vt-ext"
-        ],
-        "parent": null,
-        "tags": [
-            "binary",
-            "zip_archive",
-            "archive"
-        ],
-        "model_target_label": "zip",
-        "target_label": "zip",
-        "correct_labels": [
-            "zip"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    },
-    "zlibstream": {
-        "name": "zlibstream",
-        "extensions": [],
-        "mime_type": "application/zlib",
-        "group": "application",
-        "magic": "zlib compressed data",
-        "description": "zlib compressed data",
-        "vt_type": "zlib",
-        "datasets": [
-            "vt-type"
-        ],
-        "parent": null,
-        "tags": [
-            "binary"
-        ],
-        "model_target_label": "zlibstream",
-        "target_label": "zlibstream",
-        "correct_labels": [
-            "zlibstream"
-        ],
-        "in_scope_for_output_content_type": true,
-        "in_scope_for_training": true
-    }
-}
diff --git a/python/magika/config/content_types_kb.min.json b/python/magika/config/content_types_kb.min.json
new file mode 100644
index 00000000..fa959868
--- /dev/null
+++ b/python/magika/config/content_types_kb.min.json
@@ -0,0 +1 @@
+{"3gp": {"mime_type": "video/3gpp", "group": "video", "description": null, "extensions": ["3gp"], "is_text": false}, "3ds": {"mime_type": "application/octet-stream", "group": "unknown", "description": "Nintendo 3DS roms", "extensions": ["3ds"], "is_text": false}, "3dsx": {"mime_type": "application/octet-stream", "group": "unknown", "description": "Nintendo 3DS homebrew", "extensions": ["3dsx"], "is_text": false}, "3dsm": {"mime_type": "application/x-3ds", "group": "image", "description": "3D studio Max", "extensions": ["3ds"], "is_text": false}, "3mf": {"mime_type": "application/vnd.ms-package.3dmanufacturing-3dmodel+xml", "group": "image", "description": "3D Manufacturing Format", "extensions": ["3mf"], "is_text": false}, "abnf": {"mime_type": null, "group": null, "description": "augmented Backus\u2013Naur form", "extensions": ["abnf"], "is_text": false}, "ace": {"mime_type": "application/x-ace-compressed", "group": null, "description": "ACE", "extensions": ["ace"], "is_text": false}, "ada": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "aff": {"mime_type": null, "group": null, "description": "Hunspell Affix", "extensions": ["aff"], "is_text": true}, "ai": {"mime_type": "application/pdf", "group": "document", "description": "Adobe Illustrator Artwork", "extensions": ["ai"], "is_text": false}, "aidl": {"mime_type": null, "group": null, "description": "Android Interface Definition Language", "extensions": ["aidl"], "is_text": true}, "algol68": {"mime_type": null, "group": null, "description": null, "extensions": ["a68"], "is_text": false}, "ani": {"mime_type": "application/x-navi-animation", "group": null, "description": "Animated cursor", "extensions": ["ani"], "is_text": false}, "apk": {"mime_type": "application/vnd.android.package-archive", "group": "executable", "description": "Android package", "extensions": ["apk"], "is_text": false}, "applebplist": {"mime_type": "application/x-bplist", "group": "application", "description": "Apple binary property list", "extensions": ["bplist", "plist"], "is_text": false}, "appledouble": {"mime_type": "multipart/appledouble", "group": "unknown", "description": "AppleDouble", "extensions": [], "is_text": false}, "appleplist": {"mime_type": "application/x-plist", "group": "application", "description": "Apple property list", "extensions": ["plist"], "is_text": true}, "applesingle": {"mime_type": "application/applefile", "group": "unknown", "description": "AppleSingle", "extensions": [], "is_text": false}, "ar": {"mime_type": "application/x-archive", "group": null, "description": null, "extensions": [], "is_text": false}, "arc": {"mime_type": "application/x-arc", "group": "archive", "description": "Arc", "extensions": ["arc"], "is_text": false}, "arj": {"mime_type": "application/arj", "group": "archive", "description": "Arj", "extensions": [], "is_text": false}, "arrow": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "asc": {"mime_type": "application/pgp-signature", "group": "text", "description": "PGP", "extensions": ["asc"], "is_text": true}, "asd": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "asf": {"mime_type": "video/x-ms-wma", "group": null, "description": "Microsoft Advanced Systems Format", "extensions": ["asf"], "is_text": false}, "asm": {"mime_type": "text/x-asm", "group": "code", "description": "Assembly", "extensions": ["s", "S", "asm"], "is_text": true}, "asp": {"mime_type": "text/html", "group": "code", "description": "ASP source", "extensions": ["aspx", "asp"], "is_text": true}, "autohotkey": {"mime_type": "text/plain", "group": null, "description": "AutoHotKey", "extensions": [], "is_text": true}, "autoit": {"mime_type": "text/plain", "group": "text", "description": "AutoIt", "extensions": ["au3"], "is_text": true}, "avi": {"mime_type": "video/x-msvideo", "group": "video", "description": "Audio Video Interleave", "extensions": ["avi"], "is_text": false}, "avif": {"mime_type": "image/avif", "group": "video", "description": "AV1 Image File Format", "extensions": ["avif", "avifs"], "is_text": false}, "avro": {"mime_type": "application/x-avro-binary", "group": null, "description": "Apache Avro binary", "extensions": ["avro"], "is_text": false}, "awk": {"mime_type": "text/plain", "group": "text", "description": "Awk", "extensions": ["awk"], "is_text": true}, "ax": {"mime_type": "application/x-dosexec", "group": "executable", "description": "Directshow filter", "extensions": ["ax"], "is_text": false}, "batch": {"mime_type": "text/x-msdos-batch", "group": "code", "description": "DOS batch file", "extensions": ["bat"], "is_text": true}, "bazel": {"mime_type": null, "group": null, "description": null, "extensions": ["bzl"], "is_text": true}, "bcad": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "bib": {"mime_type": "text/x-bibtex", "group": "text", "description": "BibTeX", "extensions": ["bib"], "is_text": true}, "bmp": {"mime_type": "image/bmp", "group": "image", "description": "BMP image data", "extensions": ["bmp"], "is_text": false}, "bpg": {"mime_type": "image/bpg", "group": "image", "description": "BPG", "extensions": ["bpg"], "is_text": false}, "bpl": {"mime_type": null, "group": null, "description": null, "extensions": ["bpl"], "is_text": false}, "brainfuck": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": true}, "brf": {"mime_type": "text/plain", "group": "text", "description": "Braille Ready Format", "extensions": ["brf", "bfm"], "is_text": false}, "bzip": {"mime_type": "application/x-bzip2", "group": "archive", "description": "bzip2 compressed data", "extensions": ["bz2", "tbz2", "tar.bz2"], "is_text": false}, "bzip3": {"mime_type": null, "group": null, "description": "bzip3", "extensions": ["bz3"], "is_text": false}, "c": {"mime_type": "text/x-c", "group": "code", "description": "C source", "extensions": ["c"], "is_text": true}, "cab": {"mime_type": "application/vnd.ms-cab-compressed", "group": "archive", "description": "Microsoft Cabinet archive data", "extensions": ["cab"], "is_text": false}, "cad": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "cat": {"mime_type": "application/octet-stream", "group": "application", "description": "Windows Catalog file", "extensions": ["cat"], "is_text": false}, "cdf": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "chm": {"mime_type": "application/chm", "group": "application", "description": "MS Windows HtmlHelp Data", "extensions": ["chm"], "is_text": false}, "clojure": {"mime_type": "text/x-clojure", "group": "code", "description": "Clojure", "extensions": ["clj", "cljs", "cljc", "cljr"], "is_text": true}, "cmake": {"mime_type": null, "group": null, "description": null, "extensions": ["cmake"], "is_text": true}, "cobol": {"mime_type": "text/x-cobol", "group": "code", "description": "Cobol", "extensions": ["cbl", "cob", "cpy", "CBL", "COB", "CPY"], "is_text": true}, "coff": {"mime_type": "application/x-coff", "group": "executable", "description": "Intel 80386 COFF", "extensions": ["obj", "o"], "is_text": false}, "coffeescript": {"mime_type": "text/coffeescript", "group": "code", "description": "CoffeeScript", "extensions": ["coffee"], "is_text": true}, "com": {"mime_type": "application/x-dosexec", "group": null, "description": null, "extensions": [], "is_text": false}, "cpl": {"mime_type": "application/x-dosexec", "group": "executable", "description": "PE Windows executable", "extensions": ["cpl"], "is_text": false}, "cpp": {"mime_type": "text/x-c", "group": "code", "description": "C++ source", "extensions": ["cc", "cpp", "cxx", "c++", "cppm", "ixx"], "is_text": true}, "crt": {"mime_type": null, "group": "text", "description": "Certificates (binary format)", "extensions": ["der", "cer", "crt"], "is_text": false}, "crx": {"mime_type": "application/x-chrome-extension", "group": "executable", "description": "Google Chrome extension", "extensions": ["crx"], "is_text": false}, "cs": {"mime_type": "text/plain", "group": "code", "description": "C# source", "extensions": ["cs", "csx"], "is_text": true}, "csproj": {"mime_type": null, "group": null, "description": null, "extensions": ["csproj"], "is_text": true}, "css": {"mime_type": "text/css", "group": "code", "description": "CSS source", "extensions": ["css"], "is_text": true}, "csv": {"mime_type": "text/csv", "group": "code", "description": "CSV document", "extensions": ["csv"], "is_text": true}, "ctl": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "dart": {"mime_type": "text/plain", "group": "code", "description": null, "extensions": ["dart"], "is_text": true}, "deb": {"mime_type": "application/vnd.debian.binary-package", "group": "archive", "description": "Debian binary package", "extensions": ["deb"], "is_text": false}, "dex": {"mime_type": "application/x-android-dex", "group": "executable", "description": "Dalvik dex file", "extensions": ["dex"], "is_text": false}, "dey": {"mime_type": "application/x-android-dey", "group": null, "description": null, "extensions": [], "is_text": false}, "dicom": {"mime_type": "application/dicom", "group": "image", "description": "DICOM", "extensions": ["dcm"], "is_text": false}, "diff": {"mime_type": null, "group": null, "description": null, "extensions": ["diff", "patch"], "is_text": true}, "directory": {"mime_type": "inode/directory", "group": "inode", "description": "A directory", "extensions": [], "is_text": false}, "django": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "dll": {"mime_type": "application/x-dosexec", "group": "executable", "description": "PE Windows executable", "extensions": ["dll"], "is_text": false}, "dm": {"mime_type": null, "group": "text", "description": "Dream Maker", "extensions": ["dm"], "is_text": true}, "dmigd": {"mime_type": null, "group": "text", "description": "Dominion Mods", "extensions": ["dm"], "is_text": true}, "dmg": {"mime_type": "application/x-apple-diskimage", "group": "archive", "description": "Apple disk image", "extensions": ["dmg"], "is_text": false}, "dmscript": {"mime_type": null, "group": "code", "description": "Digital Micrograph Script", "extensions": ["s"], "is_text": true}, "doc": {"mime_type": "application/msword", "group": "document", "description": "Microsoft Word CDF document", "extensions": ["doc"], "is_text": false}, "dockerfile": {"mime_type": "text/x-dockerfile", "group": "code", "description": "Dockerfile", "extensions": [], "is_text": true}, "docx": {"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "group": "document", "description": "Microsoft Word 2007+ document", "extensions": ["docx", "docm"], "is_text": false}, "dosmbr": {"mime_type": null, "group": null, "description": "Master boot record", "extensions": [], "is_text": false}, "dotx": {"mime_type": null, "group": null, "description": "Office Word 2007 template", "extensions": ["dotx"], "is_text": false}, "dsstore": {"mime_type": "application/octet-stream", "group": "unknown", "description": "Application Desktop Services Store", "extensions": [], "is_text": false}, "dwg": {"mime_type": "image/x-dwg", "group": "image", "description": "Autocad Drawing", "extensions": ["dwg"], "is_text": false}, "dxf": {"mime_type": "image/vnd.dxf", "group": "image", "description": "Audocad Drawing Exchange Format", "extensions": ["dxf"], "is_text": true}, "dylib": {"mime_type": "application/x-mach-o", "group": "executable", "description": "Mach-O executable", "extensions": ["dylib"], "is_text": false}, "ebml": {"mime_type": null, "group": null, "description": "Extensible Binary Meta Language", "extensions": [], "is_text": false}, "elf": {"mime_type": "application/x-executable-elf", "group": "executable", "description": "ELF executable", "extensions": ["elf"], "is_text": false}, "elixir": {"mime_type": "text/plain", "group": null, "description": "Elixir script", "extensions": ["exs"], "is_text": true}, "emf": {"mime_type": "application/octet-stream", "group": "application", "description": "Windows Enhanced Metafile image data", "extensions": ["emf"], "is_text": false}, "eml": {"mime_type": "message/rfc822", "group": "text", "description": "RFC 822 mail", "extensions": ["eml"], "is_text": true}, "empty": {"mime_type": "inode/x-empty", "group": "inode", "description": "Empty file", "extensions": [], "is_text": false}, "epub": {"mime_type": "application/epub+zip", "group": "document", "description": "EPUB document", "extensions": ["epub"], "is_text": false}, "erb": {"mime_type": null, "group": null, "description": null, "extensions": ["erb"], "is_text": true}, "erlang": {"mime_type": "text/x-erlang", "group": "code", "description": null, "extensions": ["erl", "hrl"], "is_text": true}, "ese": {"mime_type": "application/x-ms-ese", "group": null, "description": "ESE Db", "extensions": ["dat"], "is_text": false}, "exe": {"mime_type": "application/x-dosexec", "group": "executable", "description": "PE executable", "extensions": ["exe"], "is_text": false}, "exp": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "flac": {"mime_type": "audio/flac", "group": "audio", "description": "FLAC audio bitstream data", "extensions": ["flac"], "is_text": false}, "flutter": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "flv": {"mime_type": "video/x-flv", "group": "video", "description": "Flash Video", "extensions": ["flv"], "is_text": false}, "fortran": {"mime_type": "text/x-fortran", "group": "document", "description": "Fortran", "extensions": ["f90", "f95", "f03", "F90"], "is_text": true}, "fpx": {"mime_type": null, "group": "image", "description": "Flashpix", "extensions": ["fpx"], "is_text": false}, "gemfile": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": true}, "gemspec": {"mime_type": null, "group": null, "description": null, "extensions": ["gemspec"], "is_text": true}, "gif": {"mime_type": "image/gif", "group": "image", "description": "GIF image data", "extensions": ["gif"], "is_text": false}, "gitattributes": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": true}, "gitmodules": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": true}, "gleam": {"mime_type": null, "group": "code", "description": "Gleam", "extensions": ["gleam"], "is_text": true}, "go": {"mime_type": "text/x-golang", "group": "code", "description": "Golang source", "extensions": ["go"], "is_text": true}, "gpx": {"mime_type": null, "group": null, "description": "XML document", "extensions": ["gpx"], "is_text": false}, "gradle": {"mime_type": null, "group": null, "description": null, "extensions": ["gradle"], "is_text": true}, "groovy": {"mime_type": null, "group": null, "description": null, "extensions": ["groovy"], "is_text": true}, "gzip": {"mime_type": "application/gzip", "group": "archive", "description": "gzip compressed data", "extensions": ["gz", "gzip", "tgz", "tar.gz"], "is_text": false}, "h": {"mime_type": null, "group": null, "description": null, "extensions": ["h"], "is_text": true}, "h5": {"mime_type": "application/x-hdf5", "group": null, "description": "Hierarchical Data Format v5", "extensions": ["h5", "hdf5"], "is_text": false}, "handlebars": {"mime_type": null, "group": null, "description": null, "extensions": ["hbs", "handlebars"], "is_text": true}, "haskell": {"mime_type": "text/plain", "group": null, "description": "Haskell", "extensions": ["hs", "lhs"], "is_text": true}, "hcl": {"mime_type": null, "group": null, "description": "HashiCorp configuration language.", "extensions": ["hcl"], "is_text": true}, "heif": {"mime_type": "image/heic", "group": "image", "description": "High Efficiency Image File", "extensions": ["heif", "heifs", "heic", "heics"], "is_text": false}, "hfs": {"mime_type": "application/x-hfs", "group": null, "description": null, "extensions": ["hfs"], "is_text": false}, "hlp": {"mime_type": "application/winhlp", "group": "application", "description": "MS Windows help", "extensions": ["hlp"], "is_text": false}, "hpp": {"mime_type": "text/x-h", "group": "code", "description": null, "extensions": ["hh", "hpp", "hxx", "h++"], "is_text": true}, "hta": {"mime_type": "application/hta", "group": "code", "description": "HTML Application", "extensions": ["hta"], "is_text": false}, "htaccess": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": true}, "html": {"mime_type": "text/html", "group": "code", "description": "HTML document", "extensions": ["html", "htm", "xhtml", "xht"], "is_text": true}, "hve": {"mime_type": null, "group": "unknown", "description": null, "extensions": [], "is_text": false}, "hwp": {"mime_type": "application/x-hwp", "group": "document", "description": "Hangul Word Processor", "extensions": ["hwp"], "is_text": false}, "icc": {"mime_type": "application/vnd.iccprofile", "group": null, "description": "ICC profile", "extensions": ["icc"], "is_text": false}, "icns": {"mime_type": null, "group": null, "description": "Mac OS X icon", "extensions": ["icns"], "is_text": false}, "ico": {"mime_type": "image/vnd.microsoft.icon", "group": "image", "description": "MS Windows icon resource", "extensions": ["ico"], "is_text": false}, "ics": {"mime_type": "text/calendar", "group": null, "description": "Internet Calendaring and Scheduling", "extensions": ["ics"], "is_text": true}, "ignorefile": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": true}, "img": {"mime_type": null, "group": null, "description": null, "extensions": ["img"], "is_text": false}, "ini": {"mime_type": "text/plain", "group": "text", "description": "INI configuration file", "extensions": ["ini"], "is_text": true}, "internetshortcut": {"mime_type": "application/x-mswinurl", "group": "application", "description": "MS Windows Internet shortcut", "extensions": ["url"], "is_text": true}, "iosapp": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "ipynb": {"mime_type": null, "group": null, "description": null, "extensions": ["ipynb"], "is_text": true}, "iso": {"mime_type": "application/x-iso9660-image", "group": "archive", "description": "ISO 9660 CD-ROM filesystem data", "extensions": ["iso"], "is_text": false}, "jar": {"mime_type": "application/java-archive", "group": "archive", "description": "Java archive data (JAR)", "extensions": ["jar", "klib"], "is_text": false}, "java": {"mime_type": "text/x-java", "group": "code", "description": "Java source", "extensions": ["java"], "is_text": true}, "javabytecode": {"mime_type": "application/x-java-applet", "group": "executable", "description": "Java compiled bytecode", "extensions": ["class"], "is_text": false}, "javascript": {"mime_type": "application/javascript", "group": "code", "description": "JavaScript source", "extensions": ["js", "mjs", "cjs"], "is_text": true}, "jinja": {"mime_type": null, "group": null, "description": "Jinja Template", "extensions": ["jinja", "jinja2", "j2"], "is_text": true}, "jng": {"mime_type": "image/jng", "group": "image", "description": "JPEG network graphics", "extensions": ["jng"], "is_text": false}, "jnlp": {"mime_type": "application/x-java-jnlp-file", "group": "code", "description": "Java Network Launch Protocol", "extensions": ["jnlp"], "is_text": true}, "jp2": {"mime_type": "image/jpeg2000", "group": "image", "description": "jpeg2000", "extensions": ["jp2"], "is_text": false}, "jpeg": {"mime_type": "image/jpeg", "group": "image", "description": "JPEG image data", "extensions": ["jpg", "jpeg"], "is_text": false}, "json": {"mime_type": "application/json", "group": "code", "description": "JSON document", "extensions": ["json"], "is_text": true}, "jsonc": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "jsonl": {"mime_type": null, "group": null, "description": null, "extensions": ["jsonl", "jsonld"], "is_text": true}, "jsx": {"mime_type": null, "group": null, "description": null, "extensions": ["jsx", "mjsx", "cjsx"], "is_text": true}, "julia": {"mime_type": "text/x-julia", "group": "document", "description": "Julia", "extensions": ["jl"], "is_text": true}, "jxl": {"mime_type": "image/jxl", "group": "image", "description": "JPEG XL", "extensions": ["jxl"], "is_text": false}, "ko": {"mime_type": "application/x-executable-elf", "group": "executable", "description": "ELF executable, kernel object", "extensions": ["ko"], "is_text": false}, "kotlin": {"mime_type": "text/plain", "group": "code", "description": "Kotlin", "extensions": ["kt", "kts"], "is_text": true}, "ks": {"mime_type": null, "group": null, "description": "Tyrano", "extensions": ["ks"], "is_text": true}, "latex": {"mime_type": "text/x-tex", "group": "text", "description": "LaTeX document", "extensions": ["tex", "sty"], "is_text": true}, "latexaux": {"mime_type": null, "group": null, "description": null, "extensions": ["aux"], "is_text": false}, "less": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "lha": {"mime_type": "application/x-lha", "group": null, "description": "LHarc", "extensions": ["lha", "lzh"], "is_text": false}, "license": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": true}, "lisp": {"mime_type": "text/x-lisp", "group": "code", "description": "Lisp source", "extensions": ["lisp", "lsp", "l", "cl"], "is_text": true}, "litcs": {"mime_type": null, "group": null, "description": "Literate CS", "extensions": ["litcoffee"], "is_text": false}, "lnk": {"mime_type": "application/x-ms-shortcut", "group": "application", "description": "MS Windows shortcut", "extensions": ["lnk"], "is_text": false}, "lock": {"mime_type": null, "group": null, "description": null, "extensions": ["lock"], "is_text": true}, "lrz": {"mime_type": "application/x-lrzip", "group": null, "description": "LRZip", "extensions": ["lrz"], "is_text": false}, "lua": {"mime_type": "text/plain", "group": "text", "description": "Lua", "extensions": ["lua"], "is_text": true}, "lz": {"mime_type": "application/x-lzip", "group": "archive", "description": "LZip", "extensions": ["lz"], "is_text": false}, "lz4": {"mime_type": "application/x-lz4", "group": "archive", "description": "LZ4", "extensions": ["lz4"], "is_text": false}, "lzx": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "m3u": {"mime_type": "text/plain", "group": "application", "description": "M3U playlist", "extensions": ["m3u8", "m3u"], "is_text": true}, "m4": {"mime_type": "text/plain", "group": "code", "description": "GNU Macro", "extensions": ["m4"], "is_text": true}, "macho": {"mime_type": "application/x-mach-o", "group": "executable", "description": "Mach-O executable", "extensions": [], "is_text": false}, "maff": {"mime_type": "application/x-maff", "group": null, "description": null, "extensions": ["maff"], "is_text": false}, "makefile": {"mime_type": "text/x-makefile", "group": "code", "description": "Makefile source", "extensions": [], "is_text": true}, "markdown": {"mime_type": "text/markdown", "group": "text", "description": "Markdown document", "extensions": ["md", "markdown"], "is_text": true}, "matlab": {"mime_type": null, "group": null, "description": "Matlab Source", "extensions": ["m", "matlab"], "is_text": true}, "mht": {"mime_type": "application/x-mimearchive", "group": "code", "description": "MHTML document", "extensions": ["mht"], "is_text": true}, "midi": {"mime_type": "audio/midi", "group": "audio", "description": "Midi", "extensions": ["mid"], "is_text": false}, "mkv": {"mime_type": "video/x-matroska", "group": "video", "description": "Matroska", "extensions": ["mkv"], "is_text": false}, "mp2": {"mime_type": null, "group": null, "description": "MP2 stream", "extensions": ["mp2"], "is_text": false}, "mp3": {"mime_type": "audio/mpeg", "group": "audio", "description": "MP3 media file", "extensions": ["mp3"], "is_text": false}, "mp4": {"mime_type": "video/mp4", "group": "video", "description": "MP4 media file", "extensions": ["mp4"], "is_text": false}, "mpegts": {"mime_type": "video/MP2T", "group": "video", "description": "MPEG Transport stream", "extensions": ["ts", "tsv", "tsa", "m2t"], "is_text": false}, "mscompress": {"mime_type": "application/x-ms-compress-szdd", "group": "archive", "description": "MS Compress archive data", "extensions": [], "is_text": false}, "msi": {"mime_type": "application/x-msi", "group": "archive", "description": "Microsoft Installer file", "extensions": ["msi"], "is_text": false}, "msix": {"mime_type": null, "group": null, "description": "Windows app package", "extensions": ["msix"], "is_text": false}, "mst": {"mime_type": null, "group": null, "description": null, "extensions": ["mst"], "is_text": false}, "mui": {"mime_type": "application/x-dosexec", "group": "application", "description": "PE Windows executable", "extensions": ["mui"], "is_text": false}, "mum": {"mime_type": "text/xml", "group": "application", "description": "Windows Update Package file", "extensions": ["mum"], "is_text": true}, "mun": {"mime_type": null, "group": null, "description": null, "extensions": ["mun"], "is_text": false}, "nim": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "npy": {"mime_type": null, "group": null, "description": "Numpy Array", "extensions": ["npy"], "is_text": false}, "npz": {"mime_type": null, "group": null, "description": "Numpy Arrays Archive", "extensions": ["npz"], "is_text": false}, "null": {"mime_type": null, "group": null, "description": null, "extensions": ["null"], "is_text": false}, "nupkg": {"mime_type": null, "group": null, "description": "NuGet Package", "extensions": ["nupkg"], "is_text": false}, "object": {"mime_type": null, "group": null, "description": null, "extensions": ["o"], "is_text": false}, "objectivec": {"mime_type": "text/x-objcsrc", "group": null, "description": "ObjectiveC", "extensions": ["m", "mm"], "is_text": true}, "ocaml": {"mime_type": "text-ocaml", "group": "text", "description": "OCaml", "extensions": ["ml", "mli"], "is_text": true}, "ocx": {"mime_type": "application/x-dosexec", "group": "executable", "description": "PE Windows executable", "extensions": ["ocx"], "is_text": false}, "odex": {"mime_type": "application/x-executable-elf", "group": "executable", "description": "ODEX ELF executable", "extensions": ["odex"], "is_text": false}, "odin": {"mime_type": null, "group": "code", "description": "Odin", "extensions": ["odin"], "is_text": true}, "odp": {"mime_type": "application/vnd.oasis.opendocument.presentation", "group": "document", "description": "OpenDocument Presentation", "extensions": ["odp"], "is_text": false}, "ods": {"mime_type": "application/vnd.oasis.opendocument.spreadsheet", "group": "document", "description": "OpenDocument Spreadsheet", "extensions": ["ods"], "is_text": false}, "odt": {"mime_type": "application/vnd.oasis.opendocument.text", "group": "document", "description": "OpenDocument Text", "extensions": ["odt"], "is_text": false}, "ogg": {"mime_type": "audio/ogg", "group": "audio", "description": "Ogg data", "extensions": ["ogg"], "is_text": false}, "ole": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "one": {"mime_type": "application/msonenote", "group": "document", "description": "One Note", "extensions": ["one"], "is_text": false}, "onnx": {"mime_type": null, "group": null, "description": "Open Neural Network Exchange", "extensions": ["onnx"], "is_text": false}, "ooxml": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "otf": {"mime_type": "font/otf", "group": "font", "description": "OpenType font", "extensions": ["otf"], "is_text": false}, "outlook": {"mime_type": "application/vnd.ms-outlook", "group": "application", "description": "MS Outlook Message", "extensions": [], "is_text": false}, "palmos": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "parquet": {"mime_type": "application/vnd.apache.parquet", "group": "unknown", "description": "Apache Parquet", "extensions": ["pqt", "parquet"], "is_text": false}, "pascal": {"mime_type": "text/x-pascal", "group": "code", "description": null, "extensions": ["pas", "pp"], "is_text": true}, "pbm": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "pcap": {"mime_type": "application/vnd.tcpdump.pcap", "group": "application", "description": "pcap capture file", "extensions": ["pcap", "pcapng"], "is_text": false}, "pdb": {"mime_type": null, "group": null, "description": "Windows Program Database", "extensions": ["pdb"], "is_text": false}, "pdf": {"mime_type": "application/pdf", "group": "document", "description": "PDF document", "extensions": ["pdf"], "is_text": false}, "pebin": {"mime_type": "application/x-dosexec", "group": "executable", "description": "PE Windows executable", "extensions": ["exe", "dll"], "is_text": false}, "pem": {"mime_type": "application/x-pem-file", "group": "application", "description": "PEM certificate", "extensions": ["pem", "pub", "gpg"], "is_text": true}, "perl": {"mime_type": "text/x-perl", "group": "code", "description": "Perl source", "extensions": ["pl"], "is_text": true}, "pgp": {"mime_type": "application/pgp-keys", "group": null, "description": "PGP", "extensions": ["gpg", "pgp"], "is_text": false}, "php": {"mime_type": "text/x-php", "group": "code", "description": "PHP source", "extensions": ["php"], "is_text": true}, "pickle": {"mime_type": null, "group": null, "description": "Python pickle", "extensions": ["pickle", "pkl"], "is_text": false}, "png": {"mime_type": "image/png", "group": "image", "description": "PNG image", "extensions": ["png"], "is_text": false}, "po": {"mime_type": null, "group": null, "description": "Portable Object (PO) for i18n", "extensions": ["po"], "is_text": true}, "postscript": {"mime_type": "application/postscript", "group": "document", "description": "PostScript document", "extensions": ["ps"], "is_text": false}, "powershell": {"mime_type": "application/x-powershell", "group": "code", "description": "Powershell source", "extensions": ["ps1"], "is_text": true}, "ppt": {"mime_type": "application/vnd.ms-powerpoint", "group": "document", "description": "Microsoft PowerPoint CDF document", "extensions": ["ppt"], "is_text": false}, "pptx": {"mime_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "group": "document", "description": "Microsoft PowerPoint 2007+ document", "extensions": ["pptx", "pptm"], "is_text": false}, "printfox": {"mime_type": null, "group": null, "description": "c64", "extensions": [], "is_text": false}, "prolog": {"mime_type": "text/x-prolog", "group": "code", "description": null, "extensions": ["pl", "pro", "P"], "is_text": true}, "proteindb": {"mime_type": null, "group": null, "description": "Protein DB", "extensions": ["pdb"], "is_text": true}, "proto": {"mime_type": null, "group": null, "description": null, "extensions": ["proto"], "is_text": true}, "protobuf": {"mime_type": "application/protobuf", "group": "unknown", "description": "Protocol buffers", "extensions": ["protobuf", "pb"], "is_text": false}, "psd": {"mime_type": "image/vnd.adobe.photoshop", "group": "image", "description": "Adobe Photoshop", "extensions": ["psd"], "is_text": false}, "pytorch": {"mime_type": null, "group": null, "description": "Pytorch storage file", "extensions": ["pt", "pth"], "is_text": false}, "pub": {"mime_type": "application/x-mspublisher", "group": null, "description": null, "extensions": ["pub"], "is_text": false}, "python": {"mime_type": "text/x-python", "group": "code", "description": "Python source", "extensions": ["py", "pyi"], "is_text": true}, "pythonbytecode": {"mime_type": "application/x-bytecode.python", "group": "executable", "description": "Python compiled bytecode", "extensions": ["pyc", "pyo"], "is_text": false}, "pythonpar": {"mime_type": null, "group": null, "description": null, "extensions": ["par"], "is_text": false}, "qoi": {"mime_type": "image/x-qoi", "group": "image", "description": "Quite Ok Image", "extensions": ["qoi"], "is_text": false}, "qt": {"mime_type": "video/quicktime", "group": "video", "description": "QuickTime", "extensions": ["mov"], "is_text": false}, "r": {"mime_type": "text/x-R", "group": "code", "description": "R (language)", "extensions": ["R"], "is_text": true}, "randomascii": {"mime_type": "text/plain", "group": "text", "description": "Random ASCII characters", "extensions": [], "is_text": true}, "randombytes": {"mime_type": "application/octet-stream", "group": "unknown", "description": "Random bytes", "extensions": [], "is_text": false}, "rar": {"mime_type": "application/x-rar", "group": "archive", "description": "RAR archive data", "extensions": ["rar"], "is_text": false}, "rdf": {"mime_type": "application/rdf+xml", "group": "text", "description": "Resource Description Framework document (RDF)", "extensions": ["rdf"], "is_text": true}, "riff": {"mime_type": "application/x-riff", "group": null, "description": null, "extensions": [], "is_text": false}, "rlib": {"mime_type": "application/x-archive", "group": "archive", "description": "rust library", "extensions": ["rlib"], "is_text": false}, "rll": {"mime_type": null, "group": "executable", "description": "Resource Library", "extensions": ["rll"], "is_text": false}, "rpm": {"mime_type": "application/x-rpm", "group": "archive", "description": "RedHat Package Manager archive (RPM)", "extensions": ["rpm"], "is_text": false}, "rst": {"mime_type": "text/x-rst", "group": "text", "description": "ReStructuredText document", "extensions": ["rst"], "is_text": true}, "rtf": {"mime_type": "text/rtf", "group": "text", "description": "Rich Text Format document", "extensions": ["rtf"], "is_text": true}, "ruby": {"mime_type": "application/x-ruby", "group": "code", "description": "Ruby source", "extensions": ["rb"], "is_text": true}, "rust": {"mime_type": "application/x-rust", "group": "code", "description": "Rust source", "extensions": ["rs"], "is_text": true}, "rzip": {"mime_type": null, "group": null, "description": "Rzip", "extensions": ["rz"], "is_text": false}, "scala": {"mime_type": "application/x-scala", "group": "code", "description": "Scala source", "extensions": ["scala"], "is_text": true}, "scheme": {"mime_type": "text/x-scheme", "group": "code", "description": null, "extensions": ["scm", "ss"], "is_text": false}, "scr": {"mime_type": "application/x-dosexec", "group": "executable", "description": "PE Windows executable", "extensions": ["scr"], "is_text": false}, "scriptwsf": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "scss": {"mime_type": null, "group": null, "description": null, "extensions": ["scss"], "is_text": true}, "sevenzip": {"mime_type": "application/x-7z-compressed", "group": "archive", "description": "7-zip archive data", "extensions": ["7z"], "is_text": false}, "sgml": {"mime_type": "application/sgml", "group": "text", "description": "sgml", "extensions": ["sgml"], "is_text": true}, "sh3d": {"mime_type": null, "group": null, "description": null, "extensions": ["sh3d"], "is_text": false}, "shell": {"mime_type": "text/x-shellscript", "group": "code", "description": "Shell script", "extensions": ["sh"], "is_text": true}, "smali": {"mime_type": "application/x-smali", "group": "code", "description": "Smali source", "extensions": ["smali"], "is_text": true}, "snap": {"mime_type": null, "group": null, "description": null, "extensions": ["snap"], "is_text": false}, "so": {"mime_type": "application/x-executable-elf", "group": "executable", "description": "ELF executable, shared library", "extensions": ["so"], "is_text": false}, "solidity": {"mime_type": null, "group": null, "description": null, "extensions": ["sol"], "is_text": true}, "sql": {"mime_type": "application/x-sql", "group": "code", "description": "SQL source", "extensions": ["sql"], "is_text": true}, "sqlite": {"mime_type": null, "group": null, "description": "SQLITE database", "extensions": ["sqlite", "sqlite3"], "is_text": false}, "squashfs": {"mime_type": "application/octet-stream", "group": "archive", "description": "Squash filesystem", "extensions": [], "is_text": false}, "srt": {"mime_type": null, "group": null, "description": "SubRip Text Format", "extensions": ["srt"], "is_text": true}, "stlbinary": {"mime_type": "application/sla", "group": "image", "description": "Stereolithography CAD (binary)", "extensions": ["stl"], "is_text": false}, "stltext": {"mime_type": "application/sla", "group": "image", "description": "Stereolithography CAD (text)", "extensions": ["stl"], "is_text": true}, "sum": {"mime_type": null, "group": null, "description": null, "extensions": ["sum"], "is_text": true}, "svd": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "svg": {"mime_type": "image/svg+xml", "group": "image", "description": "SVG Scalable Vector Graphics image data", "extensions": ["svg"], "is_text": true}, "swf": {"mime_type": "application/x-shockwave-flash", "group": "executable", "description": "Small Web File", "extensions": ["swf"], "is_text": false}, "swift": {"mime_type": "text/x-swift", "group": "code", "description": "Swift", "extensions": ["swift"], "is_text": true}, "symlink": {"mime_type": "inode/symlink", "group": "inode", "description": "Symbolic link", "extensions": [], "is_text": false}, "symlinktext": {"mime_type": "text/plain", "group": "application", "description": "Symbolic link (textual representation)", "extensions": [], "is_text": true}, "sys": {"mime_type": "application/x-windows-driver", "group": "executable", "description": "PE Windows executable", "extensions": ["sys"], "is_text": false}, "tar": {"mime_type": "application/x-tar", "group": "archive", "description": "POSIX tar archive", "extensions": ["tar"], "is_text": false}, "tcl": {"mime_type": "application/x-tcl", "group": "text", "description": "Tickle", "extensions": ["tcl"], "is_text": true}, "textproto": {"mime_type": null, "group": null, "description": null, "extensions": ["textproto", "textpb", "pbtxt"], "is_text": true}, "tga": {"mime_type": "image/x-tga", "group": "image", "description": "Targa image data", "extensions": ["tga"], "is_text": false}, "thumbsdb": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "tiff": {"mime_type": "image/tiff", "group": "image", "description": "TIFF image data", "extensions": ["tiff", "tif"], "is_text": false}, "tmdx": {"mime_type": null, "group": null, "description": null, "extensions": ["tmdx", "tmvx"], "is_text": false}, "toml": {"mime_type": null, "group": "text", "description": null, "extensions": ["toml"], "is_text": true}, "torrent": {"mime_type": "application/x-bittorrent", "group": "application", "description": "BitTorrent file", "extensions": ["torrent"], "is_text": false}, "troff": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "tsv": {"mime_type": "text/tsv", "group": "code", "description": "TSV document", "extensions": ["tsv"], "is_text": true}, "tsx": {"mime_type": null, "group": null, "description": null, "extensions": ["tsx", "mtsx", "ctsx"], "is_text": true}, "ttf": {"mime_type": "font/sfnt", "group": "font", "description": "TrueType Font data", "extensions": ["ttf", "ttc"], "is_text": false}, "twig": {"mime_type": null, "group": null, "description": null, "extensions": ["twig"], "is_text": true}, "txt": {"mime_type": "text/plain", "group": "text", "description": "Generic text document", "extensions": ["txt"], "is_text": true}, "txtascii": {"mime_type": "text/plain", "group": "text", "description": "Generic text document encoded in ASCII", "extensions": ["txt"], "is_text": true}, "txtutf16": {"mime_type": "text/plain", "group": "text", "description": "Generic text document encoded in UTF-16", "extensions": ["txt"], "is_text": true}, "txtutf8": {"mime_type": "text/plain", "group": "text", "description": "Generic text document encoded in UTF-8", "extensions": ["txt"], "is_text": true}, "typescript": {"mime_type": "application/typescript", "group": "text", "description": "Typescript", "extensions": ["ts", "mts", "cts"], "is_text": true}, "udf": {"mime_type": "application/x-udf-image", "group": null, "description": "Universal Disc Format", "extensions": [], "is_text": false}, "undefined": {"mime_type": "application/undefined", "group": "undefined", "description": "Undefined", "extensions": [], "is_text": false}, "unixcompress": {"mime_type": "application/x-compress", "group": null, "description": null, "extensions": ["z"], "is_text": false}, "unknown": {"mime_type": "application/octet-stream", "group": "unknown", "description": "Unknown binary data", "extensions": [], "is_text": false}, "vba": {"mime_type": "text/vbscript", "group": "code", "description": "MS Visual Basic source (VBA)", "extensions": ["vbs", "vba", "vb"], "is_text": true}, "vbe": {"mime_type": null, "group": "code", "description": "EncryptedVBS", "extensions": ["vbe"], "is_text": false}, "vcard": {"mime_type": null, "group": null, "description": null, "extensions": ["vcard"], "is_text": false}, "vcs": {"mime_type": null, "group": null, "description": null, "extensions": [], "is_text": false}, "vcxproj": {"mime_type": null, "group": null, "description": null, "extensions": ["vcxproj"], "is_text": true}, "verilog": {"mime_type": null, "group": "code", "description": null, "extensions": ["v", "verilog", "vlg", "vh"], "is_text": true}, "vhd": {"mime_type": "application/x-vhd", "group": null, "description": "Virtual Hard Disk", "extensions": [], "is_text": false}, "vhdl": {"mime_type": null, "group": null, "description": "VHDL", "extensions": ["vhd"], "is_text": true}, "visio": {"mime_type": "application/vnd.ms-visio.drawing.main+xml", "group": "document", "description": "Microsoft Visio", "extensions": ["vsd", "vsdm", "vsdx", "vdw"], "is_text": false}, "vtt": {"mime_type": null, "group": null, "description": "Web Video Text Tracks", "extensions": ["vtt", "webvtt"], "is_text": true}, "vue": {"mime_type": null, "group": null, "description": null, "extensions": ["vue"], "is_text": true}, "wad": {"mime_type": "application/wad", "group": "archive", "description": "WAD", "extensions": ["wad"], "is_text": false}, "wasm": {"mime_type": "application/wasm", "group": "executable", "description": "Web Assembly", "extensions": ["wasm"], "is_text": false}, "wav": {"mime_type": "audio/x-wav", "group": "audio", "description": "Waveform Audio file (WAV)", "extensions": ["wav"], "is_text": false}, "webm": {"mime_type": "video/webm", "group": "video", "description": "WebM", "extensions": ["webm"], "is_text": false}, "webp": {"mime_type": "image/webp", "group": "image", "description": "WebP", "extensions": ["webp"], "is_text": false}, "wim": {"mime_type": "application/x-ms-wim", "group": "unknown", "description": "Windows Imaging Format", "extensions": ["wim", "swm", "esd"], "is_text": false}, "winregistry": {"mime_type": "text/x-ms-regedit", "group": "application", "description": "Windows Registry text", "extensions": ["reg"], "is_text": true}, "wma": {"mime_type": "audio/x-ms-wma", "group": "audio", "description": "Windows Media Audio", "extensions": ["wma"], "is_text": false}, "wmf": {"mime_type": "image/wmf", "group": "image", "description": "Windows metafile", "extensions": ["wmf"], "is_text": false}, "wmv": {"mime_type": "video/x-ms-wmv", "group": "video", "description": "Windows Media Video", "extensions": ["wmv"], "is_text": false}, "woff": {"mime_type": "font/woff", "group": "font", "description": "Web Open Font Format", "extensions": ["woff"], "is_text": false}, "woff2": {"mime_type": "font/woff2", "group": "font", "description": "Web Open Font Format v2", "extensions": ["woff2"], "is_text": false}, "xar": {"mime_type": "application/x-xar", "group": "archive", "description": "XAR archive compressed data", "extensions": ["pkg", "xar"], "is_text": false}, "xcf": {"mime_type": "image/x-xcf", "group": "image", "description": "Gimp image", "extensions": ["xcf"], "is_text": false}, "xls": {"mime_type": "application/vnd.ms-excel", "group": "document", "description": "Microsoft Excel CDF document", "extensions": ["xls"], "is_text": false}, "xlsb": {"mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "group": "document", "description": "Microsoft Excel 2007+ document (binary format)", "extensions": ["xlsb"], "is_text": false}, "xlsx": {"mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "group": "document", "description": "Microsoft Excel 2007+ document", "extensions": ["xlsx", "xlsm"], "is_text": false}, "xml": {"mime_type": "text/xml", "group": "code", "description": "XML document", "extensions": ["xml"], "is_text": true}, "xpi": {"mime_type": "application/zip", "group": "archive", "description": "Compressed installation archive (XPI)", "extensions": ["xpi"], "is_text": false}, "xsd": {"mime_type": null, "group": null, "description": null, "extensions": ["xsd"], "is_text": false}, "xz": {"mime_type": "application/x-xz", "group": "archive", "description": "XZ compressed data", "extensions": ["xz"], "is_text": false}, "yaml": {"mime_type": "application/x-yaml", "group": "code", "description": "YAML source", "extensions": ["yml", "yaml"], "is_text": true}, "yara": {"mime_type": null, "group": null, "description": null, "extensions": ["yar", "yara"], "is_text": true}, "zig": {"mime_type": "text/zig", "group": "code", "description": "Zig source", "extensions": ["zig"], "is_text": true}, "zip": {"mime_type": "application/zip", "group": "archive", "description": "Zip archive data", "extensions": ["zip"], "is_text": false}, "zlibstream": {"mime_type": "application/zlib", "group": "application", "description": "zlib compressed data", "extensions": [], "is_text": false}, "zst": {"mime_type": "application/zstd", "group": "archive", "description": "Zstandard", "extensions": ["zst"], "is_text": false}}
\ No newline at end of file
diff --git a/python/magika/config/magika_config.json b/python/magika/config/magika_config.json
deleted file mode 100644
index cb2035e1..00000000
--- a/python/magika/config/magika_config.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "default_model_name": "standard_v1",
-    "medium_confidence_threshold": 0.5,
-    "min_file_size_for_dl": 16,
-    "padding_token": 256,
-    "block_size": 4096
-}
\ No newline at end of file
diff --git a/python/magika/content_types.py b/python/magika/content_types.py
deleted file mode 100644
index 312fd5d0..00000000
--- a/python/magika/content_types.py
+++ /dev/null
@@ -1,441 +0,0 @@
-# Copyright 2024 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-import json
-import sys
-from collections import defaultdict
-from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional, Set
-
-CONTENT_TYPES_CONFIG_PATH = (
-    Path(__file__).parent / "config" / "content_types_config.json"
-)
-
-
-"""
-This module defines ContentType, an abstraction for a content type and its
-associated metadata, and ContentTypesManager, a class that wraps a number of
-utility functions related to contentt types. Note that the ContentTypesManager
-is extensively used by the training framework, and not just by the magika python
-module.
-"""
-
-
-class ContentType:
-    # the tool returned unknown, '',  None, or similar
-    UNKNOWN = "unknown"
-    UNKNOWN_MIME_TYPE = "application/unknown"
-    UNKNOWN_CONTENT_TYPE_GROUP = "unknown"
-    UNKNOWN_MAGIC = "Unknown"
-    UNKNOWN_DESCRIPTION = "Unknown type"
-
-    # the tool returned an output that we currently do not map to our content types
-    UNSUPPORTED = "unsupported"
-
-    # the tool exited with returncode != 0
-    ERROR = "error"
-
-    # there is no result for this tool
-    MISSING = "missing"
-
-    # the file is empty (or just \x00, spaces, etc.)
-    EMPTY = "empty"
-
-    # the output of the tool is gibberish / meaningless type
-    CORRUPTED = "corrupted"
-
-    # the tool did not return in time
-    TIMEOUT = "timeout"
-
-    # the mapping functions returned a type we don't recognized, and we flag it
-    # as NOT VALID
-    NOT_VALID = "not_valid"
-
-    # Used when a file path does not exist
-    FILE_DOES_NOT_EXIST = "file_does_not_exist"
-
-    # Used when a file path exists, but there are permission issues, e.g., can't
-    # read file
-    PERMISSION_ERROR = "permission_error"
-
-    # more special labels
-    DIRECTORY = "directory"
-    SYMLINK = "symlink"
-
-    GENERIC_TEXT = "txt"
-
-    def __init__(
-        self,
-        name: str,
-        extensions: List[str],
-        mime_type: Optional[str],
-        group: Optional[str],
-        magic: Optional[str],
-        description: Optional[str],
-        vt_type: Optional[str],
-        datasets: List[str],
-        parent: Optional[str],
-        tags: List[str],
-        model_target_label: Optional[str],
-        target_label: Optional[str],
-        correct_labels: List[str],
-        in_scope_for_output_content_type: bool,
-        add_automatic_tags: bool = True,
-    ):
-        self.name = name
-        self.extensions = extensions
-        self.mime_type = mime_type
-        self.group = group
-        self.magic = magic
-        self.description = description
-        self.vt_type = vt_type
-        self.datasets = datasets
-        self.parent = parent
-        self.tags = tags
-        self.model_target_label = model_target_label
-        self.target_label = target_label
-        self.correct_labels = correct_labels
-        self.in_scope_for_output_content_type = in_scope_for_output_content_type
-
-        # add automatic tags based on dataset
-        if add_automatic_tags:
-            if self.datasets is not None:
-                for dataset in self.datasets:
-                    self.tags.append(f"dataset:{dataset}")
-            if self.model_target_label is not None:
-                self.tags.append(f"model_target_label:{self.model_target_label}")
-            if self.target_label is not None:
-                self.tags.append(f"target_label:{self.target_label}")
-            if self.correct_labels is not None:
-                for cl in self.correct_labels:
-                    self.tags.append(f"correct_label:{cl}")
-
-    @property
-    def is_text(self) -> bool:
-        return "text" in self.tags
-
-    @property
-    def in_scope_for_training(self) -> bool:
-        if len(self.datasets) == 0:
-            return False
-        if self.model_target_label is None:
-            return False
-        if self.target_label is None:
-            return False
-        if len(self.correct_labels) == 0:
-            return False
-        return True
-
-    def to_dict(self) -> Dict[str, Any]:
-        info: Dict[str, Any] = {
-            "name": self.name,
-            "extensions": self.extensions,
-            "mime_type": self.mime_type,
-            "group": self.group,
-            "magic": self.magic,
-            "description": self.description,
-            "vt_type": self.vt_type,
-            "datasets": self.datasets,
-            "parent": self.parent,
-            "tags": self.tags,
-            "model_target_label": self.model_target_label,
-            "target_label": self.target_label,
-            "correct_labels": self.correct_labels,
-            "in_scope_for_output_content_type": self.in_scope_for_output_content_type,
-            "in_scope_for_training": self.in_scope_for_training,
-        }
-        return info
-
-    @staticmethod
-    def from_dict(info_d: Dict, add_automatic_tags: bool = True) -> ContentType:
-        info_d_copy = dict(info_d)
-        info_d_copy.pop("in_scope_for_training")
-        ct = ContentType(add_automatic_tags=add_automatic_tags, **info_d_copy)
-        return ct
-
-    def __str__(self) -> str:
-        return f"<{self.name}>"
-
-    def __repr__(self) -> str:
-        return str(self)
-
-
-class ContentTypesManager:
-    SPECIAL_CONTENT_TYPES: List[str] = [
-        ContentType.UNKNOWN,
-        ContentType.UNSUPPORTED,
-        ContentType.ERROR,
-        ContentType.MISSING,
-        ContentType.EMPTY,
-        ContentType.CORRUPTED,
-        ContentType.NOT_VALID,
-        ContentType.PERMISSION_ERROR,
-        ContentType.GENERIC_TEXT,
-    ]
-
-    SUPPORTED_TARGET_LABELS_SPEC = [
-        "content-type",
-        "model-target-label",
-        "target-label",
-    ]
-
-    def __init__(
-        self,
-        content_type_config_path: Path = CONTENT_TYPES_CONFIG_PATH,
-        add_automatic_tags: bool = True,
-    ):
-        self.cts: Dict[str, ContentType] = {}
-        # tag to content type map
-        self.tag2cts: Dict[str, List[ContentType]] = defaultdict(list)
-        # map from extension to content types
-        self.ext2cts: Dict[str, List[ContentType]] = defaultdict(list)
-        self.load_content_types_info(
-            content_type_config_path=content_type_config_path,
-            add_automatic_tags=add_automatic_tags,
-        )
-
-    def load_content_types_info(
-        self, content_type_config_path: Path, add_automatic_tags: bool = True
-    ) -> None:
-        with open(content_type_config_path) as f:
-            info = json.load(f)
-        self.cts = {}
-        for k, v in info.items():
-            assert k == v["name"]
-            ct = ContentType.from_dict(v, add_automatic_tags=add_automatic_tags)
-            self.cts[k] = ct
-            for tag in ct.tags:
-                self.tag2cts[tag].append(ct)
-            for ext in ct.extensions:
-                self.ext2cts[ext].append(ct)
-
-    def get(self, content_type_name: str) -> Optional[ContentType]:
-        return self.cts.get(content_type_name)
-
-    def get_or_raise(self, content_type_name: Optional[str]) -> ContentType:
-        if content_type_name is None:
-            raise Exception("Input content_type_name is None")
-        ct = self.get(content_type_name)
-        if ct is None:
-            raise Exception(f'Could not get a ContentType for "{content_type_name}"')
-        return ct
-
-    def get_mime_type(
-        self, content_type_name: str, default: str = ContentType.UNKNOWN_MIME_TYPE
-    ) -> str:
-        ct = self.get(content_type_name)
-        if ct is None:
-            return default
-        if ct.mime_type is None:
-            return default
-        return ct.mime_type
-
-    def get_group(
-        self,
-        content_type_name: str,
-        default: str = ContentType.UNKNOWN_CONTENT_TYPE_GROUP,
-    ) -> str:
-        ct = self.get(content_type_name)
-        if ct is None:
-            return default
-        if ct.group is None:
-            return default
-        return ct.group
-
-    def get_magic(
-        self,
-        content_type_name: str,
-        default: str = ContentType.UNKNOWN_MAGIC,
-        fallback_to_label: bool = True,
-    ) -> str:
-        ct = self.get(content_type_name)
-        if ct is None or ct.magic is None:
-            if fallback_to_label:
-                return content_type_name
-            else:
-                return default
-        return ct.magic
-
-    def get_description(
-        self,
-        content_type_name: str,
-        default: str = ContentType.UNKNOWN_DESCRIPTION,
-        fallback_to_label: bool = True,
-    ) -> str:
-        ct = self.get(content_type_name)
-        if ct is None or ct.description is None:
-            if fallback_to_label:
-                return content_type_name
-            else:
-                return default
-        return ct.description
-
-    def get_is_text(
-        self,
-        content_type_name: str,
-        default: bool = False,
-    ) -> bool:
-        ct = self.get(content_type_name)
-        if ct is None:
-            return default
-        else:
-            return ct.is_text
-
-    def get_cts_by_ext(self, ext: str) -> List[ContentType]:
-        return self.ext2cts.get(ext, list())
-
-    def get_cts_by_ext_or_raise(self, ext: str) -> List[ContentType]:
-        cts = self.get_cts_by_ext(ext)
-        if len(cts) == 0:
-            raise Exception(f'Could not find ContentType for extension "{ext}"')
-        return cts
-
-    def get_valid_tags(self, only_explicit: bool = True) -> List[str]:
-        if only_explicit:
-            all_tags = sorted(
-                filter(
-                    lambda x: (
-                        not x.split(":")[0].endswith("_label")
-                        and not x.startswith("dataset")
-                    ),
-                    self.tag2cts.keys(),
-                )
-            )
-        else:
-            all_tags = sorted(self.tag2cts.keys())
-        return all_tags
-
-    def is_valid_ct_label(self, label: str) -> bool:
-        if self.get(label) is not None:
-            return True
-        if label in ContentTypesManager.SPECIAL_CONTENT_TYPES:
-            return True
-        return False
-
-    def is_valid_tag(self, tag: str) -> bool:
-        return tag in self.tag2cts.keys()
-
-    def select(
-        self, query: Optional[str] = None, must_be_in_scope_for_training: bool = True
-    ) -> List[ContentType]:
-        ct_names = self.select_names(
-            query=query, must_be_in_scope_for_training=must_be_in_scope_for_training
-        )
-        # we know these are valid content types
-        return list(map(self.get_or_raise, ct_names))
-
-    def select_names(
-        self, query: Optional[str] = None, must_be_in_scope_for_training: bool = True
-    ) -> List[str]:
-        ct_names_set: Set[str] = set()
-        if query is None:
-            # select them all, honoring must_be_in_scope_for_training
-            for ct in self.cts.values():
-                if must_be_in_scope_for_training and not ct.in_scope_for_training:
-                    continue
-                ct_names_set.add(ct.name)
-        else:
-            # consider each element of the query in sequence and add/remove
-            # content types as appropriate (also honoring
-            # must_be_in_scope_for_training)
-            entries = query.split(",")
-            for entry in entries:
-                if entry in ["*", "all"]:
-                    # we know we get list of strings because we set only_names=True
-                    ct_names_set.update(
-                        self.select_names(
-                            must_be_in_scope_for_training=must_be_in_scope_for_training
-                        )
-                    )
-                elif entry.startswith("tag:"):
-                    entry = entry[4:]
-                    if not self.is_valid_tag(entry):
-                        print(
-                            f'ERROR: "entry" is not a valid tag. Valid tags: {sorted(self.tag2cts.keys())}.'
-                        )
-                        sys.exit(1)
-                    for ct in self.tag2cts[entry]:
-                        if (
-                            must_be_in_scope_for_training
-                            and not ct.in_scope_for_training
-                        ):
-                            continue
-                        ct_names_set.add(ct.name)
-                elif entry.startswith("-tag:"):
-                    entry = entry[5:]
-                    assert self.is_valid_tag(entry)
-                    for ct in self.tag2cts[entry]:
-                        # no need to check for must_be_in_scope_for_training when removing
-                        if ct.name in ct_names_set:
-                            ct_names_set.remove(ct.name)
-                elif entry[0] == "-":
-                    entry = entry[1:]
-                    assert self.is_valid_ct_label(entry)
-                    # no need to check for must_be_in_scope_for_training when removing
-                    if entry in ct_names_set:
-                        ct_names_set.remove(entry)
-                else:
-                    assert self.is_valid_ct_label(entry)
-                    # this ct was manually specified, if it does not honor
-                    # must_be_in_scope_for_training, that's a problem.
-                    if must_be_in_scope_for_training:
-                        candidate_ct: ContentType | None = self.get(entry)
-                        assert candidate_ct is not None
-                        assert candidate_ct.in_scope_for_training
-                    ct_names_set.add(entry)
-
-        ct_names = sorted(ct_names_set)
-        return ct_names
-
-    def get_content_types_space(self) -> List[str]:
-        """Returns the full list of possible content types, including out of
-        scope and special types. Returns only the names."""
-
-        # We know that we get content type names (str), and not a list of
-        # ContentType
-        return sorted(
-            set(self.select_names(must_be_in_scope_for_training=False))
-            | set(self.SPECIAL_CONTENT_TYPES)
-        )
-
-    def get_output_content_types(self) -> List[ContentType]:
-        """Return a sorted list of ContentType objects representing valid output
-        content types."""
-        return sorted(
-            set(
-                map(
-                    lambda ct: self.get_or_raise(ct.target_label),
-                    filter(
-                        lambda ct: ct.in_scope_for_output_content_type
-                        and ct.target_label is not None,
-                        set(self.select(must_be_in_scope_for_training=False)),
-                    ),
-                )
-            ),
-            key=lambda ct: ct.name,
-        )
-
-    def get_output_content_types_names(self) -> List[str]:
-        """Return a sorted list of content type names representing valid output
-        content types."""
-        return [ct.name for ct in self.get_output_content_types()]
-
-    def get_invalid_labels(self, labels: Iterable[str]) -> List[str]:
-        not_valid_labels = set()
-        for label in set(labels):
-            if not self.is_valid_ct_label(label):
-                not_valid_labels.add(label)
-        return sorted(not_valid_labels)
diff --git a/python/magika/magika.py b/python/magika/magika.py
index f4b599d8..d519bbfe 100644
--- a/python/magika/magika.py
+++ b/python/magika/magika.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 import json
 import logging
 import os
@@ -19,26 +20,28 @@
 import time
 from collections import defaultdict
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple
 
 import numpy as np
 import numpy.typing as npt
 import onnxruntime as rt
-from tqdm.auto import tqdm
 
-from magika.content_types import ContentType, ContentTypesManager
 from magika.logger import get_logger
-from magika.prediction_mode import PredictionMode
 from magika.seekable import Buffer, File, Seekable
 from magika.types import (
-    MagikaOutputFields,
+    ContentTypeInfo,
+    ContentTypeLabel,
     MagikaResult,
+    ModelConfig,
     ModelFeatures,
-    ModelFeaturesV2,
     ModelOutput,
-    ModelOutputFields,
+    PredictionMode,
+    Status,
+    StatusOr,
 )
 
+DEFAULT_MODEL_NAME = "draft_standard_v2"
+
 
 class Magika:
     def __init__(
@@ -52,44 +55,22 @@ def __init__(
     ) -> None:
         self._log = get_logger(use_colors=use_colors)
 
-        self._disable_progress_bar = True
-
-        self._magika_config = Magika._get_magika_config()
-
-        # Default model, used in case not specified via the Magika constructor
-        self._default_model_name = self._magika_config["default_model_name"]
-        # Minimum threshold for "default" prediction mode
-        self._medium_confidence_threshold = self._magika_config[
-            "medium_confidence_threshold"
-        ]
-        # Minimum file size for using the DL model
-        self._min_file_size_for_dl = self._magika_config["min_file_size_for_dl"]
-        # Which integer we use to indicate padding
-        self._padding_token = self._magika_config["padding_token"]
-        self._block_size = self._magika_config["block_size"]
-
         if verbose:
             self._log.setLevel(logging.INFO)
-            self._disable_progress_bar = False
 
         if debug:
             self._log.setLevel(logging.DEBUG)
-            self._disable_progress_bar = False
 
         if model_dir is not None:
             self._model_dir = model_dir
         else:
             # use default model
             self._model_dir = (
-                Path(__file__).parent / "models" / self._default_model_name
+                Path(__file__).parent / "models" / self.get_default_model_name()
             )
 
         self._model_path = self._model_dir / "model.onnx"
-        self._model_config_path = self._model_dir / "model_config.json"
-        self._thresholds_path = self._model_dir / "thresholds.json"
-        self._model_output_overwrite_map_path = (
-            self._model_dir / "model_output_overwrite_map.json"
-        )
+        self._model_config_path = self._model_dir / "config.min.json"
 
         if not self._model_dir.is_dir():
             raise MagikaError(f"model dir not found at {str(self._model_dir)}")
@@ -99,48 +80,38 @@ def __init__(
             raise MagikaError(
                 f"model config not found at {str(self._model_config_path)}"
             )
-        if not self._thresholds_path.is_file():
-            raise MagikaError(f"thresholds not found at {str(self._thresholds_path)}")
-        if not self._model_output_overwrite_map_path.is_file():
-            raise MagikaError(
-                f"thresholds not found at {str(self._model_output_overwrite_map_path)}"
-            )
-
-        self._model_config = json.loads(self._model_config_path.read_text())
 
-        self._thresholds = json.loads(self._thresholds_path.read_text())["thresholds"]
-
-        self._model_output_overwrite_map: Dict[str, str] = json.loads(
-            self._model_output_overwrite_map_path.read_text()
+        self._model_config: ModelConfig = Magika._load_model_config(
+            self._model_config_path
         )
 
-        self._input_sizes: Dict[str, int] = {
-            "beg": self._model_config["cfg"]["input_sizes"]["beg"],
-            "mid": self._model_config["cfg"]["input_sizes"]["mid"],
-            "end": self._model_config["cfg"]["input_sizes"]["end"],
-        }
         self._target_labels_space_np = np.array(
-            self._model_config["train_dataset_info"]["target_labels_info"][
-                "target_labels_space"
-            ]
+            list(map(str, self._model_config.target_labels_space))
         )
 
         self._prediction_mode = prediction_mode
 
         self._no_dereference = no_dereference
 
-        self._ctm = ContentTypesManager()
+        content_types_kb_path = (
+            Path(__file__).parent / "config" / "content_types_kb.min.json"
+        )
+        self._cts_infos = Magika._load_content_types_kb(content_types_kb_path)
+
+        # self._ctm = ContentTypesManager()
         self._onnx_session = self._init_onnx_session()
 
         self._perf_stats: Dict[str, List[float]] = defaultdict(list)
 
-    def identify_path(self, path: Path) -> MagikaResult:
+    def identify_path(self, path: Path) -> StatusOr[MagikaResult]:
         return self._get_result_from_path(path)
 
-    def identify_paths(self, paths: List[Path]) -> List[MagikaResult]:
+    def identify_paths(self, paths: List[Path]) -> List[StatusOr[MagikaResult]]:
         return self._get_results_from_paths(paths)
 
-    def identify_bytes(self, content: bytes) -> MagikaResult:
+    def identify_bytes(self, content: bytes) -> StatusOr[MagikaResult]:
+        if not isinstance(content, bytes):
+            raise Exception(f"Content must have type 'bytes', not {type(content)}.")
         return self._get_result_from_bytes(content)
 
     @staticmethod
@@ -151,11 +122,53 @@ def get_default_model_name() -> str:
         print help, etc.) without the need to instantiate a Magika object.
         """
 
-        return str(Magika._get_magika_config()["default_model_name"])
+        return DEFAULT_MODEL_NAME
 
-    def get_model_name(self) -> str:
+    def get_model_dir_name(self) -> str:
         return self._model_dir.name
 
+    @staticmethod
+    def _load_content_types_kb(
+        content_types_kb_json_path: Path,
+    ) -> Dict[ContentTypeLabel, ContentTypeInfo]:
+        TXT_MIME_TYPE = "text/plain"
+        UNKNOWN_MIME_TYPE = "application/octet-stream"
+        UNKNOWN_GROUP = "unknown"
+
+        out = {}
+        for ct_name, ct_info in json.loads(
+            content_types_kb_json_path.read_text()
+        ).items():
+            is_text = ct_info["is_text"]
+            if is_text:
+                default_mime_type = TXT_MIME_TYPE
+            else:
+                default_mime_type = UNKNOWN_MIME_TYPE
+            mime_type = (
+                default_mime_type
+                if ct_info["mime_type"] is None
+                else ct_info["mime_type"]
+            )
+            group = UNKNOWN_GROUP if ct_info["group"] is None else ct_info["group"]
+            description = (
+                ct_name if ct_info["description"] is None else ct_info["description"]
+            )
+            extensions = ct_info["extensions"]
+            out[ContentTypeLabel(ct_name)] = ContentTypeInfo(
+                label=ContentTypeLabel(ct_name),
+                mime_type=mime_type,
+                group=group,
+                description=description,
+                extensions=extensions,
+                is_text=is_text,
+            )
+        return out
+
+    @staticmethod
+    def _load_model_config(model_config_path: Path) -> ModelConfig:
+        config = json.loads(model_config_path.read_text())
+        return ModelConfig(**config)
+
     def _init_onnx_session(self) -> rt.InferenceSession:
         start_time = time.time()
         rt.disable_telemetry_events()
@@ -170,12 +183,12 @@ def _init_onnx_session(self) -> rt.InferenceSession:
         )
         return onnx_session
 
-    @staticmethod
-    def _get_magika_config() -> Dict[str, Any]:
-        config_path = Path(__file__).parent / "config" / "magika_config.json"
-        return json.loads(config_path.read_text())  # type: ignore[no-any-return]
+    def _get_ct_info(self, content_type: ContentTypeLabel) -> ContentTypeInfo:
+        return self._cts_infos[content_type]
 
-    def _get_results_from_paths(self, paths: List[Path]) -> List[MagikaResult]:
+    def _get_results_from_paths(
+        self, paths: List[Path]
+    ) -> List[StatusOr[MagikaResult]]:
         """Given a list of paths, returns a list of predictions. Each prediction
         is a dict with the relevant information, such as the file path, the
         output of the DL model, the output of the tool, and the associated
@@ -188,7 +201,7 @@ def _get_results_from_paths(self, paths: List[Path]) -> List[MagikaResult]:
 
         # We use a "str" instead of Path because it makes it easier later on to
         # serialize.
-        all_outputs: Dict[str, MagikaResult] = {}  # {path: MagikaOutput, ...}
+        all_outputs: Dict[str, StatusOr[MagikaResult]] = {}  # {path: <output>, ...}
 
         # We use a list and not the dict because that's what we need later on
         # for inference.
@@ -198,7 +211,7 @@ def _get_results_from_paths(self, paths: List[Path]) -> List[MagikaResult]:
             f"Processing input files and extracting features for {len(paths)} samples"
         )
         start_time = time.time()
-        for path in tqdm(paths, disable=self._disable_progress_bar):
+        for path in paths:
             output, features = self._get_result_or_features_from_path(path)
             if output is not None:
                 all_outputs[str(path)] = output
@@ -209,8 +222,8 @@ def _get_results_from_paths(self, paths: List[Path]) -> List[MagikaResult]:
         self._log.debug(f"First pass and features extracted in {elapsed_time:.03f} ms")
 
         # Get the outputs via DL for the files that need it.
-        outputs_with_dl = self._get_results_from_features(all_features)
-        all_outputs.update(outputs_with_dl)
+        for path_str, result in self._get_results_from_features(all_features).items():
+            all_outputs[path_str] = result
 
         # Finally, we collect the predictions in a final list, sorted by the
         # initial paths list (and not by insertion order).
@@ -219,10 +232,10 @@ def _get_results_from_paths(self, paths: List[Path]) -> List[MagikaResult]:
             sorted_outputs.append(all_outputs[str(path)])
         return sorted_outputs
 
-    def _get_result_from_path(self, path: Path) -> MagikaResult:
+    def _get_result_from_path(self, path: Path) -> StatusOr[MagikaResult]:
         return self._get_results_from_paths([path])[0]
 
-    def _get_result_from_bytes(self, content: bytes) -> MagikaResult:
+    def _get_result_from_bytes(self, content: bytes) -> StatusOr[MagikaResult]:
         result, features = self._get_result_or_features_from_bytes(content)
         if result is not None:
             return result
@@ -237,11 +250,18 @@ def _extract_features_from_path(
         end_size: int,
         padding_token: int,
         block_size: int,
+        use_inputs_at_offsets: bool,
     ) -> ModelFeatures:
         # TODO: reimplement this using a context manager
         seekable = File(file_path)
         mf = Magika._extract_features_from_seekable(
-            seekable, beg_size, mid_size, end_size, padding_token, block_size
+            seekable,
+            beg_size,
+            mid_size,
+            end_size,
+            padding_token,
+            block_size,
+            use_inputs_at_offsets,
         )
         seekable.close()
         return mf
@@ -254,10 +274,17 @@ def _extract_features_from_bytes(
         end_size: int,
         padding_token: int,
         block_size: int,
+        use_inputs_at_offsets: bool,
     ) -> ModelFeatures:
         buffer = Buffer(content)
         return Magika._extract_features_from_seekable(
-            buffer, beg_size, mid_size, end_size, padding_token, block_size
+            buffer,
+            beg_size,
+            mid_size,
+            end_size,
+            padding_token,
+            block_size,
+            use_inputs_at_offsets,
         )
 
     @staticmethod
@@ -268,83 +295,8 @@ def _extract_features_from_seekable(
         end_size: int,
         padding_token: int,
         block_size: int,
+        use_inputs_at_offsets: bool,
     ) -> ModelFeatures:
-        """This implement features extraction from a seekable, which is an
-        abstraction about anything that can be "read_at" a specific offset, such
-        as a file or buffer. This is implemented so that we do not need to load
-        the entire content of the file in memory, and we do not need to scan the
-        entire buffer.
-
-        High-level overview on what we do:
-        - beg: we read the first block in memory, we lstrip() it, and we use this as
-        the basis to extract beg_size integers (we either truncate to beg_size
-        or we add padding as suffix up to beg_size).
-        - end: same as "beg", but we read the last block in memory, and the padding
-        is prefixed (and not suffixed).
-        - mid: we consider the remaining content (after stripping whitespace),
-        and we take the mid_size bytes in the middle. If needed, we add padding
-        to the left and to the right.
-        """
-
-        if seekable.size < (2 * block_size + mid_size):
-            # If the content is small, we take this shortcut to avoid
-            # checking for too many corner cases.
-            content = seekable.read_at(0, seekable.size)
-            content = content.strip()
-            beg_content = content
-            mid_content = content
-            end_content = content
-
-        else:  # seekable.size >= (2 * block_size + mid_size)
-            # If the content is big enough, the implementation becomes much
-            # simpler. In this path of the code, we know we have enough content
-            # to strip up to "block_size" bytes from both sides, and still have
-            # enough data for mid_size.
-
-            beg_content = seekable.read_at(0, block_size).lstrip()
-
-            end_content = seekable.read_at(
-                seekable.size - block_size, block_size
-            ).rstrip()
-
-            # we extract "mid" from the middle of the content that we have not
-            # trimmed
-            trimmed_beg_bytes_num = block_size - len(beg_content)
-            trimmed_end_bytes_num = block_size - len(end_content)
-            # mid_idx points to the first byte of the middle block
-            mid_idx = (
-                trimmed_beg_bytes_num
-                + (
-                    seekable.size
-                    - trimmed_beg_bytes_num
-                    - trimmed_end_bytes_num
-                    - mid_size
-                )
-                // 2
-            )
-            mid_content = seekable.read_at(mid_idx, mid_size)
-
-        beg_ints = Magika._get_beg_ints_with_padding(
-            beg_content, beg_size, padding_token
-        )
-        mid_ints = Magika._get_mid_ints_with_padding(
-            mid_content, mid_size, padding_token
-        )
-        end_ints = Magika._get_end_ints_with_padding(
-            end_content, end_size, padding_token
-        )
-
-        return ModelFeatures(beg=beg_ints, mid=mid_ints, end=end_ints)
-
-    @staticmethod
-    def _extract_features_from_seekable_v2(
-        seekable: Seekable,
-        beg_size: int,
-        mid_size: int,
-        end_size: int,
-        padding_token: int,
-        block_size: int,
-    ) -> ModelFeaturesV2:
         """This implement v2 of the features extraction v2 from a seekable,
         which is an abstraction about anything that can be "read_at" a specific
         offset, such as a file or buffer. This is implemented so that we do not
@@ -357,25 +309,18 @@ def _extract_features_from_seekable_v2(
         we have too many or too few.
 
         Blocks extraction and padding:
-        - beg: we read the first block in memory, we lstrip() it, and we use
+        - beg: we read the first block_size bytes, we lstrip() it, and we use
         this as the basis to extract beg_size integers. If we have too many
         bytes, we only consider the first beg_size ones. If we do not have
         enough, we add padding as suffix (up to beg_size integers).
         - mid: we determine "where the middle is" by using the entire content's
-        size, and we take the mid_size bytes in the middle. If we do not have
-        enough bytes, we add padding to the left and to the right. In case we
-        need to add an odd number of padding integers, we add an extra one to
-        the right.
-        - end: same as "beg", but we read the last block in memory, we rstrip()
+        size (before stripping the whitespace-like characters), and we take the
+        mid_size bytes in the middle. If we do not have enough bytes, we add
+        padding to the left and to the right. In case we need to add an odd
+        number of padding integers, we add an extra one to the right.
+        - end: same as "beg", but we read the last block_size bytes, we rstrip()
         (instead of lstrip()), and, if needed, we add padding as a prefix (and
         not as a suffix like we do with "beg").
-
-        Notes about similarities and differences with v1: the main difference is
-        that whether we strip some bytes from beg and end does not influence
-        which bytes we pick for the middle part. This makes the implementation
-        of v2 much simpler. And it makes it possible for a client to just read a
-        block at the beginning, middle, and end, and send it to our backend for
-        features extraction -- no need for additional check on the client side.
         """
 
         assert beg_size < block_size
@@ -385,41 +330,56 @@ def _extract_features_from_seekable_v2(
         # we read at most block_size bytes
         bytes_num_to_read = min(block_size, seekable.size)
 
-        beg_content = seekable.read_at(0, bytes_num_to_read).lstrip()
-        beg_ints = Magika._get_beg_ints_with_padding(
-            beg_content, beg_size, padding_token
-        )
-
-        end_content = seekable.read_at(
-            seekable.size - bytes_num_to_read, bytes_num_to_read
-        ).rstrip()
-        end_ints = Magika._get_end_ints_with_padding(
-            end_content, end_size, padding_token
-        )
+        if beg_size > 0:
+            beg_content = seekable.read_at(0, bytes_num_to_read).lstrip()
+            beg_ints = Magika._get_beg_ints_with_padding(
+                beg_content, beg_size, padding_token
+            )
+        else:
+            beg_ints = []
+
+        if mid_size > 0:
+            # mid_idx points to the left-most offset to read for the "mid" component
+            # of the features.
+            mid_bytes_num_to_read = min(seekable.size, mid_size)
+            mid_idx = (seekable.size - mid_bytes_num_to_read) // 2
+            mid_content = seekable.read_at(mid_idx, mid_bytes_num_to_read)
+            mid_ints = Magika._get_mid_ints_with_padding(
+                mid_content, mid_size, padding_token
+            )
+        else:
+            mid_ints = []
 
-        # mid_idx points to the left-most offset to read for the "mid" component
-        # of the features.
-        mid_bytes_num_to_read = min(seekable.size, mid_size)
-        mid_idx = (seekable.size - mid_bytes_num_to_read) // 2
-        mid_content = seekable.read_at(mid_idx, mid_bytes_num_to_read)
-        mid_ints = Magika._get_mid_ints_with_padding(
-            mid_content, mid_size, padding_token
-        )
+        if end_size > 0:
+            end_content = seekable.read_at(
+                seekable.size - bytes_num_to_read, bytes_num_to_read
+            ).rstrip()
+            end_ints = Magika._get_end_ints_with_padding(
+                end_content, end_size, padding_token
+            )
+        else:
+            end_ints = []
 
-        offset_0x8000_0x8007 = Magika._get_ints_at_offset_or_padding(
-            seekable, 0x8000, 8, padding_token
-        )
-        offset_0x8800_0x8807 = Magika._get_ints_at_offset_or_padding(
-            seekable, 0x8800, 8, padding_token
-        )
-        offset_0x9000_0x9007 = Magika._get_ints_at_offset_or_padding(
-            seekable, 0x9000, 8, padding_token
-        )
-        offset_0x9800_0x9807 = Magika._get_ints_at_offset_or_padding(
-            seekable, 0x9800, 8, padding_token
-        )
+        if use_inputs_at_offsets:
+            offset_0x8000_0x8007 = Magika._get_ints_at_offset_or_padding(
+                seekable, 0x8000, 8, padding_token
+            )
+            offset_0x8800_0x8807 = Magika._get_ints_at_offset_or_padding(
+                seekable, 0x8800, 8, padding_token
+            )
+            offset_0x9000_0x9007 = Magika._get_ints_at_offset_or_padding(
+                seekable, 0x9000, 8, padding_token
+            )
+            offset_0x9800_0x9807 = Magika._get_ints_at_offset_or_padding(
+                seekable, 0x9800, 8, padding_token
+            )
+        else:
+            offset_0x8000_0x8007 = []
+            offset_0x8800_0x8807 = []
+            offset_0x9000_0x9007 = []
+            offset_0x9800_0x9807 = []
 
-        return ModelFeaturesV2(
+        return ModelFeatures(
             beg=beg_ints,
             mid=mid_ints,
             end=end_ints,
@@ -433,9 +393,11 @@ def _extract_features_from_seekable_v2(
     def _get_beg_ints_with_padding(
         beg_content: bytes, beg_size: int, padding_token: int
     ) -> List[int]:
-        """Take an (already-stripped) buffer as input and extract beg ints. If
+        """Take an (already-stripped) buffer as input and extract beg ints.
+        This returns a list of integers whose length is exactly beg_size. If
         the buffer is bigger than required, take only the initial portion. If
-        the buffer is shorter, add padding at the end."""
+        the buffer is shorter, add padding at the end.
+        """
 
         if beg_size < len(beg_content):
             # we don't need so many bytes
@@ -455,8 +417,9 @@ def _get_beg_ints_with_padding(
     def _get_mid_ints_with_padding(
         mid_content: bytes, mid_size: int, padding_token: int
     ) -> List[int]:
-        """Take a buffer as input and extract mid ints. If the buffer is bigger
-        than required, take only its middle part. If the buffer is shorter, add
+        """Take a buffer as input and extract mid ints. This returns a list of
+        integers whose length is exactly mid_size. If the buffer is bigger than
+        required, take only its middle part. If the buffer is shorter, add
         padding to its left and right. If we need to add an odd number of
         padding integers, add an extra one to the right.
         """
@@ -486,8 +449,9 @@ def _get_mid_ints_with_padding(
     def _get_end_ints_with_padding(
         end_content: bytes, end_size: int, padding_token: int
     ) -> List[int]:
-        """Take an (already-stripped) buffer as input and extract end ints. If
-        the buffer is bigger than required, take only the last portion. If the
+        """Take an (already-stripped) buffer as input and extract end ints. This
+        returns a list of integers whose length is exactly end_size.  If the
+        buffer is bigger than required, take only the last portion. If the
         buffer is shorter, add padding at the beginning.
         """
 
@@ -522,7 +486,7 @@ def _get_model_outputs_from_features(
         scores = np.max(raw_preds, axis=1)
 
         return [
-            (path, ModelOutput(ct_label=ct_label, score=float(score)))
+            (path, ModelOutput(ct_label=ContentTypeLabel(ct_label), score=float(score)))
             for (path, _), ct_label, score in zip(
                 all_features, preds_content_types_labels, scores
             )
@@ -530,14 +494,14 @@ def _get_model_outputs_from_features(
 
     def _get_results_from_features(
         self, all_features: List[Tuple[Path, ModelFeatures]]
-    ) -> Dict[str, MagikaResult]:
+    ) -> Dict[str, StatusOr[MagikaResult]]:
         # We now do inference for those files that need it.
 
         if len(all_features) == 0:
             # nothing to be done
             return {}
 
-        outputs: Dict[str, MagikaResult] = {}
+        results: Dict[str, StatusOr[MagikaResult]] = {}
 
         for path, model_output in self._get_model_outputs_from_features(all_features):
             # In additional to the content type label from the DL model, we
@@ -550,18 +514,17 @@ def _get_results_from_features(
                 model_output.ct_label, model_output.score
             )
 
-            outputs[str(path)] = self._get_result_from_labels_and_score(
-                path,
+            results[str(path)] = self._get_result_from_labels_and_score(
                 dl_ct_label=model_output.ct_label,
                 output_ct_label=output_ct_label,
                 score=model_output.score,
             )
 
-        return outputs
+        return results
 
     def _get_result_from_features(
         self, features: ModelFeatures, path: Optional[Path] = None
-    ) -> MagikaResult:
+    ) -> StatusOr[MagikaResult]:
         # This is useful to scan from stream of bytes
         if path is None:
             path = Path("-")
@@ -570,24 +533,27 @@ def _get_result_from_features(
         return result_with_dl
 
     def _get_output_ct_label_from_dl_result(
-        self, dl_ct_label: str, score: float
-    ) -> str:
+        self, dl_ct_label: ContentTypeLabel, score: float
+    ) -> ContentTypeLabel:
         # overwrite ct_label if specified in the config
-        dl_ct_label = self._model_output_overwrite_map.get(dl_ct_label, dl_ct_label)
+        dl_ct_label = self._model_config.overwrite_map.get(dl_ct_label, dl_ct_label)
 
         if self._prediction_mode == PredictionMode.BEST_GUESS:
             # We take the model predictions, no matter what the score is.
             output_ct_label = dl_ct_label
         elif (
             self._prediction_mode == PredictionMode.HIGH_CONFIDENCE
-            and score >= self._thresholds[dl_ct_label]
+            and score
+            >= self._model_config.thresholds.get(
+                dl_ct_label, self._model_config.medium_confidence_threshold
+            )
         ):
             # The model score is higher than the per-content-type
             # high-confidence threshold.
             output_ct_label = dl_ct_label
         elif (
             self._prediction_mode == PredictionMode.MEDIUM_CONFIDENCE
-            and score >= self._medium_confidence_threshold
+            and score >= self._model_config.medium_confidence_threshold
         ):
             # We take the model prediction only if the score is above a given
             # relatively loose threshold.
@@ -598,70 +564,30 @@ def _get_output_ct_label_from_dl_result(
             # the model has, at the very least, got the binary vs. text category
             # right. This allows us to pick between unknown and txt without the
             # need to read or scan the file bytes once again.
-            if self._ctm.get_or_raise(dl_ct_label).is_text:
-                output_ct_label = ContentType.GENERIC_TEXT
+            if self._get_ct_info(dl_ct_label).is_text:
+                output_ct_label = ContentTypeLabel.TXT
             else:
-                output_ct_label = ContentType.UNKNOWN
+                output_ct_label = ContentTypeLabel.UNKNOWN
 
         return output_ct_label
 
     def _get_result_from_labels_and_score(
-        self, path: Path, dl_ct_label: Optional[str], score: float, output_ct_label: str
-    ) -> MagikaResult:
-        dl_score = None if dl_ct_label is None else score
-        output_score = score
-
-        # add group info
-        dl_group = None if dl_ct_label is None else self._ctm.get_group(dl_ct_label)
-        output_group = self._ctm.get_group(output_ct_label)
-
-        # add mime type info
-        dl_mime_type = (
-            None if dl_ct_label is None else self._ctm.get_mime_type(dl_ct_label)
-        )
-        output_mime_type = self._ctm.get_mime_type(output_ct_label)
-
-        # add magic
-        dl_magic = None if dl_ct_label is None else self._ctm.get_magic(dl_ct_label)
-        output_magic = self._ctm.get_magic(output_ct_label)
-
-        # add description
-        dl_description = (
-            None if dl_ct_label is None else self._ctm.get_description(dl_ct_label)
-        )
-        output_description = self._ctm.get_description(output_ct_label)
-
-        # add is_text
-        dl_is_text = None if dl_ct_label is None else self._ctm.get_is_text(dl_ct_label)
-        output_is_text = self._ctm.get_is_text(output_ct_label)
-
-        magika_result = MagikaResult(
-            path=str(path),
-            dl=ModelOutputFields(
-                ct_label=dl_ct_label,
-                score=dl_score,
-                group=dl_group,
-                mime_type=dl_mime_type,
-                magic=dl_magic,
-                description=dl_description,
-                is_text=dl_is_text,
-            ),
-            output=MagikaOutputFields(
-                ct_label=output_ct_label,
-                score=output_score,
-                group=output_group,
-                mime_type=output_mime_type,
-                magic=output_magic,
-                description=output_description,
-                is_text=output_is_text,
-            ),
+        self,
+        dl_ct_label: ContentTypeLabel,
+        output_ct_label: ContentTypeLabel,
+        score: float,
+    ) -> StatusOr[MagikaResult]:
+        return StatusOr(
+            value=MagikaResult(
+                dl=self._get_ct_info(dl_ct_label),
+                output=self._get_ct_info(output_ct_label),
+                score=score,
+            )
         )
 
-        return magika_result
-
     def _get_result_or_features_from_path(
         self, path: Path
-    ) -> Tuple[Optional[MagikaResult], Optional[ModelFeatures]]:
+    ) -> Tuple[Optional[StatusOr[MagikaResult]], Optional[ModelFeatures]]:
         """
         Given a path, we return either a MagikaOutput or a MagikaFeatures.
 
@@ -677,61 +603,46 @@ def _get_result_or_features_from_path(
 
         if self._no_dereference and path.is_symlink():
             result = self._get_result_from_labels_and_score(
-                path, dl_ct_label=None, output_ct_label=ContentType.SYMLINK, score=1.0
-            )
-            # The magic and description fields for symlink contain a placeholder
-            # for <path>; let's patch the output to reflect that.
-            result.output.magic = result.output.magic.replace(
-                "<path>", str(path.resolve())
-            )
-            result.output.description = result.output.description.replace(
-                "<path>", str(path.resolve())
+                dl_ct_label=ContentTypeLabel.UNDEFINED,
+                output_ct_label=ContentTypeLabel.SYMLINK,
+                score=1.0,
             )
             return result, None
 
         if not path.exists():
-            result = self._get_result_from_labels_and_score(
-                path,
-                dl_ct_label=None,
-                output_ct_label=ContentType.FILE_DOES_NOT_EXIST,
-                score=1.0,
-            )
-            return result, None
+            return StatusOr(status=Status.FILE_NOT_FOUND_ERROR), None
 
         if path.is_file():
             if path.stat().st_size == 0:
                 result = self._get_result_from_labels_and_score(
-                    path, dl_ct_label=None, output_ct_label=ContentType.EMPTY, score=1.0
+                    dl_ct_label=ContentTypeLabel.UNDEFINED,
+                    output_ct_label=ContentTypeLabel.EMPTY,
+                    score=1.0,
                 )
                 return result, None
 
             elif not os.access(path, os.R_OK):
-                result = self._get_result_from_labels_and_score(
-                    path,
-                    dl_ct_label=None,
-                    output_ct_label=ContentType.PERMISSION_ERROR,
-                    score=1.0,
-                )
-                return result, None
+                return StatusOr(status=Status.PERMISSION_ERROR), None
 
-            elif path.stat().st_size <= self._min_file_size_for_dl:
+            elif path.stat().st_size <= self._model_config.min_file_size_for_dl:
                 result = self._get_result_from_first_block_of_file(path)
                 return result, None
 
             else:
                 file_features = Magika._extract_features_from_path(
                     path,
-                    self._input_sizes["beg"],
-                    self._input_sizes["mid"],
-                    self._input_sizes["end"],
-                    self._padding_token,
-                    self._block_size,
+                    self._model_config.beg_size,
+                    self._model_config.mid_size,
+                    self._model_config.end_size,
+                    self._model_config.padding_token,
+                    self._model_config.block_size,
+                    self._model_config.use_inputs_at_offsets,
                 )
                 # Check whether we have enough bytes for a meaningful
                 # detection, and not just padding.
                 if (
-                    file_features.beg[self._min_file_size_for_dl - 1]
-                    == self._padding_token
+                    file_features.beg[self._model_config.min_file_size_for_dl - 1]
+                    == self._model_config.padding_token
                 ):
                     # If the n-th token is padding, then it means that,
                     # post-stripping, we do not have enough meaningful
@@ -747,13 +658,17 @@ def _get_result_or_features_from_path(
 
         elif path.is_dir():
             result = self._get_result_from_labels_and_score(
-                path, dl_ct_label=None, output_ct_label=ContentType.DIRECTORY, score=1.0
+                dl_ct_label=ContentTypeLabel.UNDEFINED,
+                output_ct_label=ContentTypeLabel.DIRECTORY,
+                score=1.0,
             )
             return result, None
 
         else:
             result = self._get_result_from_labels_and_score(
-                path, dl_ct_label=None, output_ct_label=ContentType.UNKNOWN, score=1.0
+                dl_ct_label=ContentTypeLabel.UNDEFINED,
+                output_ct_label=ContentTypeLabel.UNKNOWN,
+                score=1.0,
             )
             return result, None
 
@@ -761,67 +676,73 @@ def _get_result_or_features_from_path(
 
     def _get_result_or_features_from_bytes(
         self, content: bytes
-    ) -> Tuple[Optional[MagikaResult], Optional[ModelFeatures]]:
+    ) -> Tuple[Optional[StatusOr[MagikaResult]], Optional[ModelFeatures]]:
         if len(content) == 0:
-            output = self._get_result_from_labels_and_score(
-                Path("-"),
-                dl_ct_label=None,
-                output_ct_label=ContentType.EMPTY,
+            result = self._get_result_from_labels_and_score(
+                dl_ct_label=ContentTypeLabel.UNDEFINED,
+                output_ct_label=ContentTypeLabel.EMPTY,
                 score=1.0,
             )
-            return output, None
+            return result, None
 
-        elif len(content) <= self._min_file_size_for_dl:
-            output = self._get_result_of_few_bytes(content)
-            return output, None
+        elif len(content) <= self._model_config.min_file_size_for_dl:
+            result = self._get_result_from_few_bytes(content)
+            return result, None
 
         else:
             file_features = Magika._extract_features_from_bytes(
                 content,
-                self._input_sizes["beg"],
-                self._input_sizes["mid"],
-                self._input_sizes["end"],
-                self._padding_token,
-                self._block_size,
+                self._model_config.beg_size,
+                self._model_config.mid_size,
+                self._model_config.end_size,
+                self._model_config.padding_token,
+                self._model_config.block_size,
+                self._model_config.use_inputs_at_offsets,
             )
             # Check whether we have enough bytes for a meaningful
             # detection, and not just padding.
-            if file_features.beg[self._min_file_size_for_dl - 1] == self._padding_token:
+            if (
+                file_features.beg[self._model_config.min_file_size_for_dl - 1]
+                == self._model_config.padding_token
+            ):
                 # If the n-th token is padding, then it means that,
                 # post-stripping, we do not have enough meaningful
                 # bytes.
-                output = self._get_result_of_few_bytes(content)
-                return output, None
+                result = self._get_result_from_few_bytes(content)
+                return result, None
 
             else:
                 # We have enough bytes, scheduling this file for model
                 # prediction.
-                # features.append((path, file_features))
                 return None, file_features
 
         raise Exception("unreachable")
 
-    def _get_result_from_first_block_of_file(self, path: Path) -> MagikaResult:
+    def _get_result_from_first_block_of_file(
+        self, path: Path
+    ) -> StatusOr[MagikaResult]:
         # We read at most "block_size" bytes
         with open(path, "rb") as f:
-            content = f.read(self._block_size)
-        return self._get_result_of_few_bytes(content, path)
+            content = f.read(self._model_config.block_size)
+        return self._get_result_from_few_bytes(content, path)
 
-    def _get_result_of_few_bytes(
+    def _get_result_from_few_bytes(
         self, content: bytes, path: Path = Path("-")
-    ) -> MagikaResult:
-        assert len(content) <= 4 * self._block_size
-        ct_label = self._get_ct_label_of_few_bytes(content)
+    ) -> StatusOr[MagikaResult]:
+        assert len(content) <= 4 * self._model_config.block_size
+        ct_label = self._get_ct_label_from_few_bytes(content)
         return self._get_result_from_labels_and_score(
-            path, dl_ct_label=None, output_ct_label=ct_label, score=1.0
+            dl_ct_label=ContentTypeLabel.UNDEFINED,
+            output_ct_label=ct_label,
+            score=1.0,
         )
 
-    def _get_ct_label_of_few_bytes(self, content: bytes) -> str:
+    def _get_ct_label_from_few_bytes(self, content: bytes) -> ContentTypeLabel:
         try:
-            ct_label = ContentType.GENERIC_TEXT
+            ct_label = ContentTypeLabel.TXT
             _ = content.decode("utf-8")
         except UnicodeDecodeError:
-            ct_label = ContentType.UNKNOWN
+            ct_label = ContentTypeLabel.UNKNOWN
         return ct_label
 
     def _get_raw_predictions(
@@ -832,20 +753,18 @@ def _get_raw_predictions(
         matrix encoding the predictions.
         """
 
-        dataset_format = self._model_config["train_dataset_info"]["dataset_format"]
-        assert dataset_format == "int-concat/one-hot"
         start_time = time.time()
         X_bytes = []
         for _, fs in features:
             sample_bytes = []
-            if self._input_sizes["beg"] > 0:
-                sample_bytes.extend(fs.beg[: self._input_sizes["beg"]])
-            if self._input_sizes["mid"] > 0:
-                sample_bytes.extend(fs.mid[: self._input_sizes["mid"]])
-            if self._input_sizes["end"] > 0:
-                sample_bytes.extend(fs.end[-self._input_sizes["end"] :])
+            if self._model_config.beg_size > 0:
+                sample_bytes.extend(fs.beg[: self._model_config.beg_size])
+            if self._model_config.mid_size > 0:
+                sample_bytes.extend(fs.mid[: self._model_config.mid_size])
+            if self._model_config.end_size > 0:
+                sample_bytes.extend(fs.end[-self._model_config.end_size :])
             X_bytes.append(sample_bytes)
-        X = np.array(X_bytes).astype(np.float32)
+        X = np.array(X_bytes, dtype=np.int32)
         elapsed_time = 1000 * (time.time() - start_time)
         self._log.debug(f"DL input prepared in {elapsed_time:.03f} ms")
 
diff --git a/python/magika/models/draft_begonly_v2/config.min.json b/python/magika/models/draft_begonly_v2/config.min.json
new file mode 100644
index 00000000..63dad503
--- /dev/null
+++ b/python/magika/models/draft_begonly_v2/config.min.json
@@ -0,0 +1 @@
+{"beg_size": 2048, "mid_size": 0, "end_size": 0, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {}, "overwrite_map": {}}
\ No newline at end of file
diff --git a/python/magika/models/draft_begonly_v2/model.onnx b/python/magika/models/draft_begonly_v2/model.onnx
new file mode 100644
index 00000000..ed0e099c
Binary files /dev/null and b/python/magika/models/draft_begonly_v2/model.onnx differ
diff --git a/python/magika/models/draft_fast_v2/config.min.json b/python/magika/models/draft_fast_v2/config.min.json
new file mode 100644
index 00000000..87042418
--- /dev/null
+++ b/python/magika/models/draft_fast_v2/config.min.json
@@ -0,0 +1 @@
+{"beg_size": 512, "mid_size": 0, "end_size": 512, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {}, "overwrite_map": {}}
\ No newline at end of file
diff --git a/python/magika/models/draft_fast_v2/model.onnx b/python/magika/models/draft_fast_v2/model.onnx
new file mode 100644
index 00000000..01f66430
Binary files /dev/null and b/python/magika/models/draft_fast_v2/model.onnx differ
diff --git a/python/magika/models/draft_standard_v2/config.min.json b/python/magika/models/draft_standard_v2/config.min.json
new file mode 100644
index 00000000..ff83e7bc
--- /dev/null
+++ b/python/magika/models/draft_standard_v2/config.min.json
@@ -0,0 +1 @@
+{"beg_size": 2048, "mid_size": 0, "end_size": 2048, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {}, "overwrite_map": {}}
\ No newline at end of file
diff --git a/python/magika/models/draft_standard_v2/model.onnx b/python/magika/models/draft_standard_v2/model.onnx
new file mode 100644
index 00000000..19e0d4bf
Binary files /dev/null and b/python/magika/models/draft_standard_v2/model.onnx differ
diff --git a/python/magika/types/__init__.py b/python/magika/types/__init__.py
new file mode 100644
index 00000000..990e2170
--- /dev/null
+++ b/python/magika/types/__init__.py
@@ -0,0 +1,38 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from magika.types.content_type_info import ContentTypeInfo  # noqa: F401
+from magika.types.content_type_label import ContentTypeLabel  # noqa: F401
+from magika.types.magika_result import MagikaResult  # noqa: F401
+from magika.types.model import (  # noqa: F401
+    ModelConfig,
+    ModelFeatures,
+    ModelOutput,
+)
+from magika.types.prediction_mode import PredictionMode  # noqa: F401
+from magika.types.status import Status  # noqa: F401
+from magika.types.statusor import StatusOr  # noqa: F401
+
+__all__ = [
+    "ContentTypeInfo",
+    "ContentTypeLabel",
+    "MagikaResult",
+    "ModelConfig",
+    "ModelFeatures",
+    "ModelOutput",
+    "PredictionMode",
+    "Status",
+    "StatusOr",
+]
diff --git a/python/magika/types/content_type_info.py b/python/magika/types/content_type_info.py
new file mode 100644
index 00000000..2c2ff0b6
--- /dev/null
+++ b/python/magika/types/content_type_info.py
@@ -0,0 +1,14 @@
+from dataclasses import dataclass
+from typing import List
+
+from magika.types.content_type_label import ContentTypeLabel
+
+
+@dataclass(frozen=True)
+class ContentTypeInfo:
+    label: ContentTypeLabel
+    mime_type: str
+    group: str
+    description: str
+    extensions: List[str]
+    is_text: bool
diff --git a/python/magika/types/content_type_label.py b/python/magika/types/content_type_label.py
new file mode 100644
index 00000000..b5130a9c
--- /dev/null
+++ b/python/magika/types/content_type_label.py
@@ -0,0 +1,375 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from magika.types.strenum import StrEnum
+
+# NOTE: DO NOT EDIT --- This file is automatically generated.
+
+
+# This is the list of all possible content types we know about; however, models
+# support a smaller subset of them. See model's config for details.
+class ContentTypeLabel(StrEnum):
+    _3DS = "3ds"
+    _3DSM = "3dsm"
+    _3DSX = "3dsx"
+    _3GP = "3gp"
+    _3MF = "3mf"
+    ABNF = "abnf"
+    ACE = "ace"
+    ADA = "ada"
+    AFF = "aff"
+    AI = "ai"
+    AIDL = "aidl"
+    ALGOL68 = "algol68"
+    ANI = "ani"
+    APK = "apk"
+    APPLEBPLIST = "applebplist"
+    APPLEDOUBLE = "appledouble"
+    APPLEPLIST = "appleplist"
+    APPLESINGLE = "applesingle"
+    AR = "ar"
+    ARC = "arc"
+    ARJ = "arj"
+    ARROW = "arrow"
+    ASC = "asc"
+    ASD = "asd"
+    ASF = "asf"
+    ASM = "asm"
+    ASP = "asp"
+    AUTOHOTKEY = "autohotkey"
+    AUTOIT = "autoit"
+    AVI = "avi"
+    AVIF = "avif"
+    AVRO = "avro"
+    AWK = "awk"
+    AX = "ax"
+    BATCH = "batch"
+    BAZEL = "bazel"
+    BCAD = "bcad"
+    BIB = "bib"
+    BMP = "bmp"
+    BPG = "bpg"
+    BPL = "bpl"
+    BRAINFUCK = "brainfuck"
+    BRF = "brf"
+    BZIP = "bzip"
+    BZIP3 = "bzip3"
+    C = "c"
+    CAB = "cab"
+    CAD = "cad"
+    CAT = "cat"
+    CDF = "cdf"
+    CHM = "chm"
+    CLOJURE = "clojure"
+    CMAKE = "cmake"
+    COBOL = "cobol"
+    COFF = "coff"
+    COFFEESCRIPT = "coffeescript"
+    COM = "com"
+    CPL = "cpl"
+    CPP = "cpp"
+    CRT = "crt"
+    CRX = "crx"
+    CS = "cs"
+    CSPROJ = "csproj"
+    CSS = "css"
+    CSV = "csv"
+    CTL = "ctl"
+    DART = "dart"
+    DEB = "deb"
+    DEX = "dex"
+    DEY = "dey"
+    DICOM = "dicom"
+    DIFF = "diff"
+    DIRECTORY = "directory"
+    DJANGO = "django"
+    DLL = "dll"
+    DM = "dm"
+    DMG = "dmg"
+    DMIGD = "dmigd"
+    DMSCRIPT = "dmscript"
+    DOC = "doc"
+    DOCKERFILE = "dockerfile"
+    DOCX = "docx"
+    DOSMBR = "dosmbr"
+    DOTX = "dotx"
+    DSSTORE = "dsstore"
+    DWG = "dwg"
+    DXF = "dxf"
+    DYLIB = "dylib"
+    EBML = "ebml"
+    ELF = "elf"
+    ELIXIR = "elixir"
+    EMF = "emf"
+    EML = "eml"
+    EMPTY = "empty"
+    EPUB = "epub"
+    ERB = "erb"
+    ERLANG = "erlang"
+    ESE = "ese"
+    EXE = "exe"
+    EXP = "exp"
+    FLAC = "flac"
+    FLUTTER = "flutter"
+    FLV = "flv"
+    FORTRAN = "fortran"
+    FPX = "fpx"
+    GEMFILE = "gemfile"
+    GEMSPEC = "gemspec"
+    GIF = "gif"
+    GITATTRIBUTES = "gitattributes"
+    GITMODULES = "gitmodules"
+    GLEAM = "gleam"
+    GO = "go"
+    GPX = "gpx"
+    GRADLE = "gradle"
+    GROOVY = "groovy"
+    GZIP = "gzip"
+    H = "h"
+    H5 = "h5"
+    HANDLEBARS = "handlebars"
+    HASKELL = "haskell"
+    HCL = "hcl"
+    HEIF = "heif"
+    HFS = "hfs"
+    HLP = "hlp"
+    HPP = "hpp"
+    HTA = "hta"
+    HTACCESS = "htaccess"
+    HTML = "html"
+    HVE = "hve"
+    HWP = "hwp"
+    ICC = "icc"
+    ICNS = "icns"
+    ICO = "ico"
+    ICS = "ics"
+    IGNOREFILE = "ignorefile"
+    IMG = "img"
+    INI = "ini"
+    INTERNETSHORTCUT = "internetshortcut"
+    IOSAPP = "iosapp"
+    IPYNB = "ipynb"
+    ISO = "iso"
+    JAR = "jar"
+    JAVA = "java"
+    JAVABYTECODE = "javabytecode"
+    JAVASCRIPT = "javascript"
+    JINJA = "jinja"
+    JNG = "jng"
+    JNLP = "jnlp"
+    JP2 = "jp2"
+    JPEG = "jpeg"
+    JSON = "json"
+    JSONC = "jsonc"
+    JSONL = "jsonl"
+    JSX = "jsx"
+    JULIA = "julia"
+    JXL = "jxl"
+    KO = "ko"
+    KOTLIN = "kotlin"
+    KS = "ks"
+    LATEX = "latex"
+    LATEXAUX = "latexaux"
+    LESS = "less"
+    LHA = "lha"
+    LICENSE = "license"
+    LISP = "lisp"
+    LITCS = "litcs"
+    LNK = "lnk"
+    LOCK = "lock"
+    LRZ = "lrz"
+    LUA = "lua"
+    LZ = "lz"
+    LZ4 = "lz4"
+    LZX = "lzx"
+    M3U = "m3u"
+    M4 = "m4"
+    MACHO = "macho"
+    MAFF = "maff"
+    MAKEFILE = "makefile"
+    MARKDOWN = "markdown"
+    MATLAB = "matlab"
+    MHT = "mht"
+    MIDI = "midi"
+    MKV = "mkv"
+    MP2 = "mp2"
+    MP3 = "mp3"
+    MP4 = "mp4"
+    MPEGTS = "mpegts"
+    MSCOMPRESS = "mscompress"
+    MSI = "msi"
+    MSIX = "msix"
+    MST = "mst"
+    MUI = "mui"
+    MUM = "mum"
+    MUN = "mun"
+    NIM = "nim"
+    NPY = "npy"
+    NPZ = "npz"
+    NULL = "null"
+    NUPKG = "nupkg"
+    OBJECT = "object"
+    OBJECTIVEC = "objectivec"
+    OCAML = "ocaml"
+    OCX = "ocx"
+    ODEX = "odex"
+    ODIN = "odin"
+    ODP = "odp"
+    ODS = "ods"
+    ODT = "odt"
+    OGG = "ogg"
+    OLE = "ole"
+    ONE = "one"
+    ONNX = "onnx"
+    OOXML = "ooxml"
+    OTF = "otf"
+    OUTLOOK = "outlook"
+    PALMOS = "palmos"
+    PARQUET = "parquet"
+    PASCAL = "pascal"
+    PBM = "pbm"
+    PCAP = "pcap"
+    PDB = "pdb"
+    PDF = "pdf"
+    PEBIN = "pebin"
+    PEM = "pem"
+    PERL = "perl"
+    PGP = "pgp"
+    PHP = "php"
+    PICKLE = "pickle"
+    PNG = "png"
+    PO = "po"
+    POSTSCRIPT = "postscript"
+    POWERSHELL = "powershell"
+    PPT = "ppt"
+    PPTX = "pptx"
+    PRINTFOX = "printfox"
+    PROLOG = "prolog"
+    PROTEINDB = "proteindb"
+    PROTO = "proto"
+    PROTOBUF = "protobuf"
+    PSD = "psd"
+    PUB = "pub"
+    PYTHON = "python"
+    PYTHONBYTECODE = "pythonbytecode"
+    PYTHONPAR = "pythonpar"
+    PYTORCH = "pytorch"
+    QOI = "qoi"
+    QT = "qt"
+    R = "r"
+    RANDOMASCII = "randomascii"
+    RANDOMBYTES = "randombytes"
+    RAR = "rar"
+    RDF = "rdf"
+    RIFF = "riff"
+    RLIB = "rlib"
+    RLL = "rll"
+    RPM = "rpm"
+    RST = "rst"
+    RTF = "rtf"
+    RUBY = "ruby"
+    RUST = "rust"
+    RZIP = "rzip"
+    SCALA = "scala"
+    SCHEME = "scheme"
+    SCR = "scr"
+    SCRIPTWSF = "scriptwsf"
+    SCSS = "scss"
+    SEVENZIP = "sevenzip"
+    SGML = "sgml"
+    SH3D = "sh3d"
+    SHELL = "shell"
+    SMALI = "smali"
+    SNAP = "snap"
+    SO = "so"
+    SOLIDITY = "solidity"
+    SQL = "sql"
+    SQLITE = "sqlite"
+    SQUASHFS = "squashfs"
+    SRT = "srt"
+    STLBINARY = "stlbinary"
+    STLTEXT = "stltext"
+    SUM = "sum"
+    SVD = "svd"
+    SVG = "svg"
+    SWF = "swf"
+    SWIFT = "swift"
+    SYMLINK = "symlink"
+    SYMLINKTEXT = "symlinktext"
+    SYS = "sys"
+    TAR = "tar"
+    TCL = "tcl"
+    TEXTPROTO = "textproto"
+    TGA = "tga"
+    THUMBSDB = "thumbsdb"
+    TIFF = "tiff"
+    TMDX = "tmdx"
+    TOML = "toml"
+    TORRENT = "torrent"
+    TROFF = "troff"
+    TSV = "tsv"
+    TSX = "tsx"
+    TTF = "ttf"
+    TWIG = "twig"
+    TXT = "txt"
+    TXTASCII = "txtascii"
+    TXTUTF16 = "txtutf16"
+    TXTUTF8 = "txtutf8"
+    TYPESCRIPT = "typescript"
+    UDF = "udf"
+    UNDEFINED = "undefined"
+    UNIXCOMPRESS = "unixcompress"
+    UNKNOWN = "unknown"
+    VBA = "vba"
+    VBE = "vbe"
+    VCARD = "vcard"
+    VCS = "vcs"
+    VCXPROJ = "vcxproj"
+    VERILOG = "verilog"
+    VHD = "vhd"
+    VHDL = "vhdl"
+    VISIO = "visio"
+    VTT = "vtt"
+    VUE = "vue"
+    WAD = "wad"
+    WASM = "wasm"
+    WAV = "wav"
+    WEBM = "webm"
+    WEBP = "webp"
+    WIM = "wim"
+    WINREGISTRY = "winregistry"
+    WMA = "wma"
+    WMF = "wmf"
+    WMV = "wmv"
+    WOFF = "woff"
+    WOFF2 = "woff2"
+    XAR = "xar"
+    XCF = "xcf"
+    XLS = "xls"
+    XLSB = "xlsb"
+    XLSX = "xlsx"
+    XML = "xml"
+    XPI = "xpi"
+    XSD = "xsd"
+    XZ = "xz"
+    YAML = "yaml"
+    YARA = "yara"
+    ZIG = "zig"
+    ZIP = "zip"
+    ZLIBSTREAM = "zlibstream"
+    ZST = "zst"
+
+    def __repr__(self) -> str:
+        return str(self)
diff --git a/python/magika/types/magika_result.py b/python/magika/types/magika_result.py
new file mode 100644
index 00000000..34a3a34e
--- /dev/null
+++ b/python/magika/types/magika_result.py
@@ -0,0 +1,26 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from magika.types.content_type_info import ContentTypeInfo
+
+
+@dataclass(frozen=True)
+class MagikaResult:
+    dl: ContentTypeInfo
+    output: ContentTypeInfo
+    score: float
diff --git a/python/magika/types.py b/python/magika/types/model.py
similarity index 53%
rename from python/magika/types.py
rename to python/magika/types/model.py
index 60c00b05..94fcc9c2 100644
--- a/python/magika/types.py
+++ b/python/magika/types/model.py
@@ -13,21 +13,14 @@
 # limitations under the License.
 
 
-from __future__ import annotations
-
 from dataclasses import dataclass
-from typing import List, Optional
+from typing import Dict, List
 
-
-@dataclass
-class ModelFeatures:
-    beg: List[int]
-    mid: List[int]
-    end: List[int]
+from magika.types.content_type_label import ContentTypeLabel
 
 
-@dataclass
-class ModelFeaturesV2:
+@dataclass(frozen=True)
+class ModelFeatures:
     beg: List[int]
     mid: List[int]
     end: List[int]
@@ -39,43 +32,22 @@ class ModelFeaturesV2:
     offset_0x9800_0x9807: List[int]
 
 
-@dataclass
+@dataclass(frozen=True)
 class ModelOutput:
-    ct_label: str
-    score: float
-
-
-@dataclass
-class MagikaResult:
-    path: str
-    dl: ModelOutputFields
-    output: MagikaOutputFields
-
-
-@dataclass
-class ModelOutputFields:
-    ct_label: Optional[str]
-    score: Optional[float]
-    group: Optional[str]
-    mime_type: Optional[str]
-    magic: Optional[str]
-    description: Optional[str]
-    is_text: Optional[bool]
-
-
-@dataclass
-class MagikaOutputFields:
-    ct_label: str
+    ct_label: ContentTypeLabel
     score: float
-    group: str
-    mime_type: str
-    magic: str
-    description: str
-    is_text: bool
 
 
-@dataclass
-class FeedbackReport:
-    hash: str
-    features: ModelFeatures
-    result: MagikaResult
+@dataclass(frozen=True)
+class ModelConfig:
+    beg_size: int
+    mid_size: int
+    end_size: int
+    use_inputs_at_offsets: bool
+    medium_confidence_threshold: float
+    min_file_size_for_dl: int
+    padding_token: int
+    block_size: int
+    target_labels_space: List[ContentTypeLabel]
+    thresholds: Dict[ContentTypeLabel, float]
+    overwrite_map: Dict[ContentTypeLabel, ContentTypeLabel]
diff --git a/python/magika/prediction_mode.py b/python/magika/types/prediction_mode.py
similarity index 94%
rename from python/magika/prediction_mode.py
rename to python/magika/types/prediction_mode.py
index 6bd8e53f..fb54eaa7 100644
--- a/python/magika/prediction_mode.py
+++ b/python/magika/types/prediction_mode.py
@@ -17,7 +17,7 @@
 import enum
 from typing import List
 
-from magika.strenum import LowerCaseStrEnum
+from magika.types.strenum import LowerCaseStrEnum
 
 
 class PredictionMode(LowerCaseStrEnum):
diff --git a/python/magika/types/status.py b/python/magika/types/status.py
new file mode 100644
index 00000000..ee590aa8
--- /dev/null
+++ b/python/magika/types/status.py
@@ -0,0 +1,29 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from magika.types.strenum import StrEnum
+
+
+class Status(StrEnum):
+    OK = "ok"
+
+    # Used when a file path does not exist
+    FILE_NOT_FOUND_ERROR = "file_not_found_error"
+
+    # Used when a file path exists, but there are permission issues, e.g., can't
+    # read file
+    PERMISSION_ERROR = "permission_error"
+
+    # Represents a generic error-like unknown status.
+    UNKNOWN = "unknown"
diff --git a/python/magika/types/statusor.py b/python/magika/types/statusor.py
new file mode 100644
index 00000000..7f1e1cf1
--- /dev/null
+++ b/python/magika/types/statusor.py
@@ -0,0 +1,54 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Generic, Optional, TypeVar
+
+from magika.types.status import Status
+
+T = TypeVar("T")
+
+
+class StatusOr(Generic[T]):
+    def __init__(self, *, status: Status = Status.OK, value: Optional[T] = None):
+        self._status = status
+        self._value = value
+
+    def __post_init__(self) -> None:
+        if self._status == Status.OK:
+            if self._value is None:
+                raise ValueError("value must be set when status == OK")
+        else:
+            if self._value is not None:
+                raise ValueError("value cannot be set when status != OK")
+
+    @property
+    def ok(self) -> bool:
+        return self._status == Status.OK
+
+    @property
+    def status(self) -> Status:
+        return self._status
+
+    @property
+    def value(self) -> T:
+        if self.ok:
+            assert self._value is not None
+            return self._value
+        raise ValueError("value is not set when status != OK")
+
+    def __repr__(self) -> str:
+        return str(self)
+
+    def __str__(self) -> str:
+        return f"StatusOr(status={self.status}, value={self.value})"
diff --git a/python/magika/strenum.py b/python/magika/types/strenum.py
similarity index 85%
rename from python/magika/strenum.py
rename to python/magika/types/strenum.py
index db219285..de4bcb1a 100644
--- a/python/magika/strenum.py
+++ b/python/magika/types/strenum.py
@@ -30,20 +30,20 @@ class Example(StrEnum):
         assert Example.MixedCase == "MixedCase"
     """
 
-    def __new__(cls, value: Union[str, StrEnum], *args, **kwargs):
+    def __new__(cls, value: Union[str, StrEnum], *args, **kwargs):  # type: ignore[no-untyped-def]
         if not isinstance(value, (str, enum.auto)):
             raise TypeError(
                 f"Values of StrEnums must be strings: {value!r} is a {type(value)}"
             )
         return super().__new__(cls, value, *args, **kwargs)
 
-    def __str__(self):
+    def __str__(self) -> str:
         return str(self.value)
 
-    def _generate_next_value_(name, *_):
+    def _generate_next_value_(name, *_):  # type: ignore[no-untyped-def]
         return name
 
 
 class LowerCaseStrEnum(StrEnum):
-    def _generate_next_value_(name, *_):
+    def _generate_next_value_(name, *_):  # type: ignore[no-untyped-def]
         return name.lower().replace("_", "-")
diff --git a/python/pyproject.toml b/python/pyproject.toml
index b74dadd5..23de4466 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,9 +1,10 @@
 [tool.poetry]
 name = "magika"
-version = "0.5.2-dev"
-description = "A tool to determine the content type of a file with deep-learning"
-authors = ["Yanick Fratantonio <yanickf@google.com>"]
+version = "0.6.0-dev"
+description = "A tool to determine the content type of a file with deep learning"
+authors = ["Magika Developers <magika-dev@google.com>"]
 readme = "README.md"
+license = "Apache License 2.0"
 packages = [{include = "magika"}]
 
 [tool.poetry.dependencies]
@@ -18,9 +19,6 @@ numpy = [
 tabulate = "^0.9.0"
 python-dotenv = "^1.0.1"
 
-[tool.poetry.scripts]
-magika = "magika.cli.magika:main"
-
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.0.1"
 ipython = [
diff --git a/python/tests/test_features_extraction.py b/python/tests/test_features_extraction.py
index 85920477..ce80550b 100644
--- a/python/tests/test_features_extraction.py
+++ b/python/tests/test_features_extraction.py
@@ -18,14 +18,13 @@
 import math
 import random
 import string
-import tempfile
 from dataclasses import asdict, dataclass
 from pathlib import Path
 from typing import List, Tuple
 
 from magika import Magika
 from magika.seekable import Buffer
-from magika.types import ModelFeatures, ModelFeaturesV2
+from magika.types import ModelFeatures
 from tests.utils import get_tests_data_dir
 
 random.seed(42)
@@ -45,73 +44,23 @@ class TestInfo:
     __test__ = False
 
 
-def test_features_extraction(debug: bool = False) -> None:
-    """This iterates over the content in the test suite and checks whether the
-    trivial implementation matches the python module one, which is the reference
-    code."""
-
-    tests_cases = _get_tests_cases_from_reference()
-
-    for test_case in tests_cases:
-        test_info = TestInfo(**test_case["test_info"])
-        test_content = base64.b64decode(test_case["content"])
-        expected_features = ModelFeatures(**test_case["features_v1"])
-
-        beg_size = test_info.beg_size
-        mid_size = test_info.mid_size
-        end_size = test_info.end_size
-        block_size = test_info.block_size
-        padding_token = test_info.padding_token
-
-        if debug:
-            print(f"Test details: {test_info} =>")
-
-        features_from_bytes = Magika._extract_features_from_bytes(
-            test_content, beg_size, mid_size, end_size, padding_token, block_size
-        )
-        with tempfile.TemporaryDirectory() as td:
-            tf_path = Path(td) / "test.dat"
-            tf_path.write_bytes(test_content)
-            features_from_path = Magika._extract_features_from_path(
-                tf_path, beg_size, mid_size, end_size, padding_token, block_size
-            )
-
-        comparison_by_bytes = {}
-        comparison_by_bytes["beg"] = features_from_bytes.beg == expected_features.beg
-        comparison_by_bytes["mid"] = features_from_bytes.mid == expected_features.mid
-        comparison_by_bytes["end"] = features_from_bytes.end == expected_features.end
-        comparison_by_bytes["all"] = set(comparison_by_bytes.values()) == set([True])
-
-        comparison_by_path = {}
-        comparison_by_path["beg"] = features_from_path.beg == expected_features.beg
-        comparison_by_path["mid"] = features_from_path.mid == expected_features.mid
-        comparison_by_path["end"] = features_from_path.end == expected_features.end
-        comparison_by_path["all"] = set(comparison_by_path.values()) == set([True])
-
-        if debug:
-            print("comparison_by_bytes: " + json.dumps(comparison_by_bytes))
-
-        if not comparison_by_bytes["all"] or not comparison_by_path["all"]:
-            raise Exception
-
-
 def test_features_extraction_v2(debug: bool = False) -> None:
     tests_cases = _get_tests_cases_from_reference()
 
     for test_case in tests_cases:
         test_info = TestInfo(**test_case["test_info"])
         test_content = base64.b64decode(test_case["content"])
-        expected_features = ModelFeaturesV2(**test_case["features_v2"])
-
-        beg_size = test_info.beg_size
-        mid_size = test_info.mid_size
-        end_size = test_info.end_size
-        block_size = test_info.block_size
-        padding_token = test_info.padding_token
+        expected_features = ModelFeatures(**test_case["features_v2"])
 
         s = Buffer(test_content)
-        features = Magika._extract_features_from_seekable_v2(
-            s, beg_size, mid_size, end_size, padding_token, block_size
+        features = Magika._extract_features_from_seekable(
+            s,
+            beg_size=test_info.beg_size,
+            mid_size=test_info.mid_size,
+            end_size=test_info.end_size,
+            padding_token=test_info.padding_token,
+            block_size=test_info.block_size,
+            use_inputs_at_offsets=True,
         )
 
         with_error = False
@@ -197,17 +146,19 @@ def generate_features_extraction_reference():
 
     for test_info, test_content in test_suite:
         s = Buffer(test_content)
-        features_v1 = Magika._extract_features_from_seekable(
-            s, beg_size, mid_size, end_size, padding_token, block_size
-        )
-        features_v2 = Magika._extract_features_from_seekable_v2(
-            s, beg_size, mid_size, end_size, padding_token, block_size
+        features_v2 = Magika._extract_features_from_seekable(
+            s,
+            beg_size,
+            mid_size,
+            end_size,
+            padding_token,
+            block_size,
+            use_inputs_at_offsets=True,
         )
 
         test_case = {
             "test_info": asdict(test_info),
             "content": base64.b64encode(test_content).decode("ascii"),
-            "features_v1": asdict(features_v1),
             "features_v2": asdict(features_v2),
         }
         ref_features_extraction_tests.append(test_case)
@@ -316,5 +267,4 @@ def _get_features_extration_tests_path() -> Path:
 
 
 if __name__ == "__main__":
-    test_features_extraction(debug=True)
     test_features_extraction_v2(debug=True)
diff --git a/python/tests/test_magika_python_cli.py b/python/tests/test_magika_python_cli.py
deleted file mode 100644
index 42cc21e1..00000000
--- a/python/tests/test_magika_python_cli.py
+++ /dev/null
@@ -1,779 +0,0 @@
-# Copyright 2024 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import signal
-import subprocess
-import tempfile
-from pathlib import Path
-from typing import Any
-
-import pytest
-
-from magika.content_types import ContentType, ContentTypesManager
-from magika.prediction_mode import PredictionMode
-from tests import utils
-from tests.utils_magika_python_client import MagikaClientError, run_magika_python_cli
-
-
-@pytest.mark.smoketest
-def test_magika_python_cli_with_one_test_file() -> None:
-    test_file_path = utils.get_basic_test_files_paths()[0]
-
-    stdout, stderr = run_magika_python_cli([test_file_path])
-    utils.check_magika_cli_output_matches_expected_by_ext(
-        [test_file_path], stdout, stderr
-    )
-
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], extra_cli_options=["--json"]
-    )
-    utils.check_magika_cli_output_matches_expected_by_ext(
-        [test_file_path], stdout, stderr, json_output=True
-    )
-
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], extra_cli_options=["--jsonl"]
-    )
-    utils.check_magika_cli_output_matches_expected_by_ext(
-        [test_file_path], stdout, stderr, jsonl_output=True
-    )
-
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], extra_cli_options=["--output-score"]
-    )
-    utils.check_magika_cli_output_matches_expected_by_ext(
-        [test_file_path], stdout, stderr, output_score=True
-    )
-
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], extra_cli_options=["--mime-type"]
-    )
-    utils.check_magika_cli_output_matches_expected_by_ext(
-        [test_file_path], stdout, stderr, mime_output=True
-    )
-
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], extra_cli_options=["--label"]
-    )
-    utils.check_magika_cli_output_matches_expected_by_ext(
-        [test_file_path], stdout, stderr, label_output=True
-    )
-
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], extra_cli_options=["--compatibility-mode"]
-    )
-    utils.check_magika_cli_output_matches_expected_by_ext(
-        [test_file_path], stdout, stderr, compatibility_mode=True
-    )
-
-
-def test_magika_python_cli_with_very_small_test_files() -> None:
-    """Magika does not use the DL model for very small files. This test covers
-    these scenarios.
-    """
-
-    with tempfile.TemporaryDirectory() as td:
-        text_test_path = Path(td) / "small.txt"
-        text_test_path.write_text("small test")
-        stdout, stderr = run_magika_python_cli([text_test_path], label_output=True)
-        assert (
-            utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)[0][1]
-            == ContentType.GENERIC_TEXT
-        )
-
-        binary_test_path = Path(td) / "small.dat"
-        binary_test_path.write_bytes(b"\x80\xff")
-        stdout, stderr = run_magika_python_cli([binary_test_path], label_output=True)
-        assert (
-            utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)[0][1]
-            == ContentType.UNKNOWN
-        )
-
-
-def test_magika_cli_with_small_test_files() -> None:
-    """Magika needs to pad files that are small. This test covers scenarios
-    where padding is relevant.
-    """
-
-    with tempfile.TemporaryDirectory() as td:
-        text_test_path = Path(td) / "small.txt"
-        # small, but bigger than the threshold to use the DL model
-        text_test_path.write_text("A" * 32)
-        _ = run_magika_python_cli([text_test_path], label_output=True)
-        # we do not care about the prediction
-
-
-def test_magika_cli_with_empty_file() -> None:
-    with tempfile.TemporaryDirectory() as td:
-        empty_test_path = Path(td) / "empty.dat"
-        empty_test_path.touch()
-        stdout, stderr = run_magika_python_cli([empty_test_path], label_output=True)
-        assert (
-            utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)[0][1]
-            == ContentType.EMPTY
-        )
-
-
-def test_magika_cli_with_directories() -> None:
-    with tempfile.TemporaryDirectory() as td:
-        test_files_num = 3
-        for idx in range(test_files_num):
-            p = Path(td) / f"test-{idx}.txt"
-            p.write_text("test")
-
-        # run without recursive mode
-        stdout, stderr = run_magika_python_cli([Path(td)], label_output=True)
-        predicted_cts = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)
-        assert len(predicted_cts) == 1
-        assert predicted_cts[0][1] == "directory"
-
-        # run with recursive mode
-        stdout, stderr = run_magika_python_cli(
-            [Path(td)], label_output=True, extra_cli_options=["--recursive"]
-        )
-        predicted_cts = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)
-        assert len(predicted_cts) == test_files_num
-        for _, ct in predicted_cts:
-            assert ct == ContentType.GENERIC_TEXT
-
-
-def test_magika_cli_with_symlinks() -> None:
-    with tempfile.TemporaryDirectory() as td:
-        test_path = Path(td) / "test.txt"
-        test_path.write_text("test")
-
-        symlink_path = Path(td) / "symlink-test.txt"
-        symlink_path.symlink_to(test_path)
-
-        # run without --no-dereference mode; symlinks are dereferenced
-        stdout, stderr = run_magika_python_cli([symlink_path], label_output=True)
-        predicted_cts = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)
-        assert len(predicted_cts) == 1
-        assert predicted_cts[0][1] == ContentType.GENERIC_TEXT
-
-        # run with --no-dereference, to avoid dereferencing symlinks
-        stdout, stderr = run_magika_python_cli(
-            [symlink_path], label_output=True, extra_cli_options=["--no-dereference"]
-        )
-        predicted_cts = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)
-        assert len(predicted_cts) == 1
-        assert predicted_cts[0][1] == "symlink"
-
-        # run with --no-dereference, to avoid dereferencing symlinks
-        stdout, stderr = run_magika_python_cli(
-            [symlink_path], extra_cli_options=["--no-dereference"]
-        )
-        predicted_cts = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)
-        assert len(predicted_cts) == 1
-        assert isinstance(predicted_cts[0][0], Path)
-        assert isinstance(predicted_cts[0][1], str)
-        assert predicted_cts[0][1].startswith("Symbolic link")
-        assert predicted_cts[0][1].find(str(test_path)) >= 0
-
-
-def test_magika_cli_with_files_with_permission_errors() -> None:
-    with tempfile.TemporaryDirectory() as td:
-        unreadable_test_path = Path(td) / "test1.txt"
-        unreadable_test_path.write_text("test")
-
-        # make it unreadable
-        unreadable_test_path.chmod(0o000)
-
-        stdout, stderr = run_magika_python_cli(
-            [unreadable_test_path], label_output=True
-        )
-        predicted_cts = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)
-        assert len(predicted_cts) == 1
-        assert predicted_cts[0][1] == ContentType.PERMISSION_ERROR
-
-        # add another, readable file, and check that it is scanned properly
-        readable_test_path = Path(td) / "test2.txt"
-        readable_test_path.write_text("test")
-        stdout, stderr = run_magika_python_cli(
-            [unreadable_test_path, readable_test_path], label_output=True
-        )
-        predicted_cts = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)
-        assert len(predicted_cts) == 2
-        assert predicted_cts[0][1] == ContentType.PERMISSION_ERROR
-        assert predicted_cts[1][1] == ContentType.GENERIC_TEXT
-
-        # try the same, but passing the directory as input
-        stdout, stderr = run_magika_python_cli(
-            [Path(td)], label_output=True, extra_cli_options=["--recursive"]
-        )
-        predicted_cts = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)
-        assert len(predicted_cts) == 2
-        assert predicted_cts[0][1] == ContentType.PERMISSION_ERROR
-        assert predicted_cts[1][1] == ContentType.GENERIC_TEXT
-
-
-def test_magika_cli_with_basic_test_files() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for n in [1, 2, 5, 10, len(test_files_paths)]:
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n])
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr
-        )
-
-
-def test_magika_cli_with_mitra_test_files() -> None:
-    test_files_paths = utils.get_mitra_test_files_paths()
-
-    stdout, stderr = run_magika_python_cli(test_files_paths)
-    utils.check_magika_cli_output_matches_expected_by_ext(
-        test_files_paths, stdout, stderr
-    )
-
-
-def test_magika_cli_with_basic_test_files_and_json_output() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for n in [1, 2, len(test_files_paths)]:
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n], json_output=True)
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, json_output=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["--json"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, json_output=True
-        )
-
-
-def test_magika_cli_with_basic_test_files_and_jsonl_output() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for n in [1, 2, len(test_files_paths)]:
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n], jsonl_output=True)
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, jsonl_output=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["--jsonl"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, jsonl_output=True
-        )
-
-
-def test_magika_cli_with_basic_test_files_and_probability() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for n in [1, 2, len(test_files_paths)]:
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n], output_score=True)
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, output_score=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["-s"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, output_score=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["--output-score"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, output_score=True
-        )
-
-
-def test_magika_cli_with_basic_test_files_and_mime_output() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for n in [1, 2, len(test_files_paths)]:
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n], mime_output=True)
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, mime_output=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["-i"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, mime_output=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["--mime-type"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, mime_output=True
-        )
-
-
-def test_magika_cli_with_basic_test_files_and_label_output() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for n in [1, 2, len(test_files_paths)]:
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n], label_output=True)
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, label_output=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["-l"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, label_output=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["--label"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, label_output=True
-        )
-
-
-def test_magika_cli_with_basic_test_files_and_compatibility_mode() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for n in [1, 2, len(test_files_paths)]:
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], compatibility_mode=True
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, compatibility_mode=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["-c"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, compatibility_mode=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["--compatibility-mode"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, compatibility_mode=True
-        )
-
-
-def test_magika_cli_output_with_low_confidence_prediction() -> None:
-    # This is something that looks like MarkDown, such that the model's best
-    # guess will be MarkDown, but not high enought to be trusted. Here we check
-    # that what we print is reasonable.
-    low_confidence_test_content = "# This is a very simple text"
-    # This is a short textual string, which will not even hit the DL model.
-    high_confidence_test_content = "Test"
-
-    ctm = ContentTypesManager()
-    txt_ct = ctm.get_or_raise(ContentType.GENERIC_TEXT)
-    txt_description = txt_ct.description
-    txt_group = txt_ct.group
-    md_ct = ctm.get_or_raise("markdown")
-    md_description = md_ct.description
-    md_group = md_ct.group
-
-    with tempfile.TemporaryDirectory() as td:
-        # test the low confidence prediction
-        low_confidence_tf_path = Path(td) / "low_confidence_test.txt"
-        low_confidence_tf_path.write_text(low_confidence_test_content)
-        stdout, stderr = run_magika_python_cli(
-            [low_confidence_tf_path],
-        )
-
-        low_confidence_expected_stdout_prefix = f"{str(low_confidence_tf_path)}: {txt_description} ({txt_group}) [Low-confidence model best-guess: {md_description} ({md_group}), score="
-
-        assert stdout.startswith(low_confidence_expected_stdout_prefix)
-        assert stderr == ""
-
-        # test the high confidence prediction
-        high_confidence_tf_path = Path(td) / "high_confidence_test.txt"
-        high_confidence_tf_path.write_text(high_confidence_test_content)
-        stdout, stderr = run_magika_python_cli(
-            [high_confidence_tf_path],
-        )
-
-        high_confidence_expected_stdout = (
-            f"{str(high_confidence_tf_path)}: {txt_description} ({txt_group})"
-        )
-
-        assert stdout.strip() == high_confidence_expected_stdout
-        assert stderr == ""
-
-
-def test_magika_cli_with_basic_test_files_and_different_prediction_modes() -> None:
-    # Here we test only the CLI aspect; we test the different behaviors with
-    # different prediction modes when we test the Magika module.
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for n in [1, 2]:
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n])
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n],
-            extra_cli_options=["--prediction-mode", PredictionMode.MEDIUM_CONFIDENCE],
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n],
-            extra_cli_options=["--prediction-mode", PredictionMode.BEST_GUESS],
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n],
-            extra_cli_options=["--prediction-mode", PredictionMode.HIGH_CONFIDENCE],
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr
-        )
-
-        # Test with invalid prediction mode
-        with pytest.raises(MagikaClientError):
-            _ = run_magika_python_cli(
-                test_files_paths[:n],
-                extra_cli_options=["--prediction-mode", "non-existing-mode"],
-            )
-
-
-def test_magika_cli_with_python_and_not_python_files() -> None:
-    with tempfile.TemporaryDirectory() as td:
-        # the test needs to be longer than "too small for DL model"
-        python_test_path = Path(td) / "real.py"
-        python_test_path.write_text("import flask\nimport requests")
-        not_python_test_path = Path(td) / "not-real.py"
-        not_python_test_path.write_text("xmport asd\nxmport requests")
-
-        # check that a python file is detected as such
-        stdout, stderr = run_magika_python_cli(
-            [python_test_path], extra_cli_options=["--label"]
-        )
-        predicted_ct = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)[
-            0
-        ][1]
-        assert predicted_ct == "python"
-
-        # check that a file that is very far from being a python file is
-        # detected as text
-        stdout, stderr = run_magika_python_cli(
-            [not_python_test_path], extra_cli_options=["--label"]
-        )
-        predicted_ct = utils.get_magika_cli_output_from_stdout_stderr(stdout, stderr)[
-            0
-        ][1]
-        assert predicted_ct == "txt"
-
-
-def test_magika_cli_with_basic_test_files_and_custom_batch_sizes() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for batch_size in [1, 2, 3, 16]:
-        for n in [1, 2, 5, len(test_files_paths)]:
-            stdout, stderr = run_magika_python_cli(
-                test_files_paths[:n], batch_size=batch_size
-            )
-            utils.check_magika_cli_output_matches_expected_by_ext(
-                test_files_paths[:n], stdout, stderr
-            )
-
-            stdout, stderr = run_magika_python_cli(
-                test_files_paths[:n],
-                extra_cli_options=["--batch-size", str(batch_size)],
-            )
-            utils.check_magika_cli_output_matches_expected_by_ext(
-                test_files_paths[:n], stdout, stderr
-            )
-
-
-def test_magika_cli_with_multiple_copies_of_the_same_file() -> None:
-    max_repetitions_num = 10
-    test_file_path = utils.get_one_basic_test_file_path()
-    test_files_paths = [test_file_path] * max_repetitions_num
-
-    for n in [2, max_repetitions_num]:
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n])
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr
-        )
-
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n], json_output=True)
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, json_output=True
-        )
-
-        stdout, stderr = run_magika_python_cli(test_files_paths[:n], jsonl_output=True)
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, jsonl_output=True
-        )
-
-
-def test_magika_cli_with_many_files() -> None:
-    test_file_path = utils.get_one_basic_test_file_path()
-
-    for n in [100, 1000]:
-        test_files_paths = [test_file_path] * n
-        stdout, stderr = run_magika_python_cli(test_files_paths)
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths, stdout, stderr
-        )
-
-
-@pytest.mark.slow
-def test_magika_cli_with_really_many_files() -> None:
-    test_file_path = utils.get_one_basic_test_file_path()
-
-    for n in [10000]:
-        test_files_paths = [test_file_path] * n
-        stdout, stderr = run_magika_python_cli(test_files_paths)
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths, stdout, stderr
-        )
-
-
-@pytest.mark.slow
-def test_magika_cli_with_big_file() -> None:
-    def signal_handler(signum: int, frame: Any) -> None:
-        raise Exception("Timeout")
-
-    signal.signal(signal.SIGALRM, signal_handler)
-
-    # It should take much less than this, but pytest weird scheduling sometimes
-    # creates unexpected slow downs.
-    timeout = 2
-
-    for sample_size in [1000, 10000, 1_000_000, 1_000_000_000, 10_000_000_000]:
-        with tempfile.TemporaryDirectory() as td:
-            sample_path = Path(td) / "sample.dat"
-            utils.write_random_file_with_size(sample_path, sample_size)
-            print(f"Starting running Magika with a timeout of {timeout}")
-            signal.alarm(timeout)
-            _ = run_magika_python_cli([sample_path])
-            signal.alarm(0)
-            print("Done running Magika")
-
-
-def test_magika_cli_with_bad_input() -> None:
-    test_file_path = utils.get_one_basic_test_file_path()
-
-    # Test without any argument or option
-    with pytest.raises(MagikaClientError) as e_info:
-        p = Path("/this/does/not/exist")
-        _ = run_magika_python_cli([])
-    assert e_info.value.stdout == ""
-    assert (
-        e_info.value.stderr
-        == "ERROR: You need to pass at least one path, or - to read from stdin.\n"
-    )
-
-    # Test with file that does not exist
-    with pytest.raises(MagikaClientError) as e_info:
-        p = Path("/this/does/not/exist")
-        _ = run_magika_python_cli([p], label_output=True)
-    assert e_info.value.stdout == ""
-    assert (
-        e_info.value.stderr == f'ERROR: File or directory "{str(p)}" does not exist.\n'
-    )
-
-    # Test with incompatible list of options
-    with pytest.raises(MagikaClientError) as e_info:
-        _ = run_magika_python_cli([test_file_path], json_output=True, jsonl_output=True)
-    assert e_info.value.stdout == ""
-    assert (
-        e_info.value.stderr
-        == "ERROR: You should use either --json or --jsonl, not both.\n"
-    )
-
-    # Test with an option does not exist
-    with pytest.raises(MagikaClientError) as e_info:
-        _ = run_magika_python_cli(
-            [test_file_path], extra_cli_options=["--non-existing-option"]
-        )
-    assert e_info.value.stdout == ""
-    error_lines = e_info.value.stderr.split("\n")
-    assert error_lines[0].startswith("Usage: magika [OPTIONS] [FILE]...")
-    assert error_lines[-2].startswith("Error: No such option:")
-    assert error_lines[-1] == ""
-
-
-def test_magika_cli_with_reading_from_stdin() -> None:
-    ctm = ContentTypesManager()
-    test_file_path = utils.get_one_basic_test_file_path()
-
-    cmd = f"cat {str(test_file_path)} | magika - --jsonl"
-    p = subprocess.run(cmd, capture_output=True, text=True, check=True, shell=True)
-    stdout, stderr = p.stdout, p.stderr
-
-    entries = utils.get_magika_cli_output_from_stdout_stderr(
-        stdout, stderr, jsonl_output=True
-    )
-    sample_path, entry = entries[0]
-    assert isinstance(sample_path, Path)
-    assert isinstance(entry, dict)
-
-    file_ext = test_file_path.suffix.lstrip(".")
-    true_cts = ctm.get_cts_by_ext(file_ext)
-    true_cts_names = [ct.name for ct in true_cts]
-
-    assert str(sample_path) == "-"
-    assert str(entry["path"]) == "-"
-    assert entry["output"]["ct_label"] in true_cts_names
-
-    # test with some bad input
-    cmd = f"cat {str(test_file_path)} | magika - {str(test_file_path)}"
-    p = subprocess.run(cmd, capture_output=True, text=True, check=False, shell=True)
-    assert p.returncode == 1
-    assert p.stdout == ""
-    assert p.stderr.find('ERROR: If you pass "-", you cannot pass anything else.') >= 0
-
-    cmd = f"cat {str(test_file_path)} | magika - -r"
-    p = subprocess.run(cmd, capture_output=True, text=True, check=False, shell=True)
-    assert p.returncode == 1
-    assert p.stdout == ""
-    assert (
-        p.stderr.find('ERROR: If you pass "-", recursive scan is not meaningful.') >= 0
-    )
-
-
-def test_magika_cli_with_colors() -> None:
-    test_file_path = utils.get_one_basic_test_file_path()
-
-    # check that it does not crash when using colors and that we are actually
-    # using colors
-    stdout, stderr = run_magika_python_cli([test_file_path], with_colors=True)
-    assert stdout.find("\033") >= 0 or stderr.find("\033") >= 0
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], with_colors=True, mime_output=True
-    )
-    assert stdout.find("\033") >= 0 or stderr.find("\033") >= 0
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], with_colors=True, verbose=True, debug=True
-    )
-    assert stdout.find("\033") >= 0 or stderr.find("\033") >= 0
-
-
-def test_magika_cli_with_no_colors() -> None:
-    test_file_path = utils.get_one_basic_test_file_path()
-
-    # check that we are not using colors when --no-colors is passed
-    stdout, stderr = run_magika_python_cli([test_file_path], with_colors=False)
-    assert stdout.find("\033") == -1 and stderr.find("\033") == -1
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], with_colors=False, mime_output=True
-    )
-    assert stdout.find("\033") == -1 and stderr.find("\033") == -1
-    stdout, stderr = run_magika_python_cli(
-        [test_file_path], with_colors=False, verbose=True, debug=True
-    )
-    assert stdout.find("\033") == -1 and stderr.find("\033") == -1
-
-
-def test_magika_cli_generate_report() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    for n in [1, 2, len(test_files_paths)]:
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], generate_report=True
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, generate_report=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["--generate-report"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, generate_report=True
-        )
-
-        stdout, stderr = run_magika_python_cli(
-            test_files_paths[:n], extra_cli_options=["--mime-type", "--generate-report"]
-        )
-        utils.check_magika_cli_output_matches_expected_by_ext(
-            test_files_paths[:n], stdout, stderr, mime_output=True, generate_report=True
-        )
-
-
-def test_magika_cli_output_version() -> None:
-    stdout, stderr = run_magika_python_cli([], extra_cli_options=["--version"])
-
-    lines = utils.get_lines_from_stream(stdout)
-    assert len(lines) == 2
-    assert lines[0].startswith("Magika version")
-    assert lines[1].startswith("Default model")
-
-    assert stderr == ""
-
-
-def test_magika_cli_help() -> None:
-    stdout_short, stderr_short = run_magika_python_cli([], extra_cli_options=["-h"])
-    stdout_long, stderr_long = run_magika_python_cli([], extra_cli_options=["--help"])
-
-    for stdout, stderr in zip([stdout_short, stdout_long], [stderr_short, stderr_long]):
-        assert stdout.find("Magika version") >= 0
-        assert stdout.find("Default model") >= 0
-
-        assert stderr == ""
-
-
-def test_magika_cli_list_content_types() -> None:
-    test_file_path = utils.get_one_basic_test_file_path()
-
-    stdout, stderr = run_magika_python_cli([], list_output_content_types=True)
-
-    lines = utils.get_lines_from_stream(stdout)
-    header = lines[0]
-    assert header.find("Content Type Label") >= 0
-    assert header.find("Description") >= 0
-    assert stderr == ""
-
-    with pytest.raises(MagikaClientError):
-        _ = run_magika_python_cli([test_file_path], list_output_content_types=True)
-
-
-def test_magika_cli_performance_statistics_report() -> None:
-    test_files_paths = utils.get_basic_test_files_paths()
-
-    _, stderr = run_magika_python_cli([test_files_paths[0]])
-    assert stderr == ""
-
-    _, stderr = run_magika_python_cli(test_files_paths)
-    assert stderr == ""
-
-    _, stderr = run_magika_python_cli(
-        test_files_paths[:10],
-        batch_size=10,
-        extra_cli_options=["--dump-performance-stats"],
-    )
-    stderr_lines = stderr.split("\n")
-    assert stderr_lines[0].startswith("PERFORMANCE STATISTICS REPORT")
-    assert stderr_lines[1].startswith("Not enough data")
-
-    _, stderr = run_magika_python_cli(
-        test_files_paths[:10],
-        batch_size=1,
-        extra_cli_options=["--dump-performance-stats"],
-    )
-    stderr_lines = stderr.split("\n")
-    assert stderr_lines[0].startswith("PERFORMANCE STATISTICS REPORT")
-    assert stderr_lines[1].startswith("KEY")
-    assert stderr_lines[2].startswith("mean")
diff --git a/python/tests/test_magika_python_module.py b/python/tests/test_magika_python_module.py
index 0524af06..c61f6a78 100644
--- a/python/tests/test_magika_python_module.py
+++ b/python/tests/test_magika_python_module.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import signal
 import tempfile
 from pathlib import Path
+from typing import Any
 
 import pytest
 
 from magika import Magika, PredictionMode
-from magika.content_types import ContentType, ContentTypesManager
+from magika.types import ContentTypeLabel, Status
 from tests import utils
 
 
@@ -48,17 +50,15 @@ def test_magika_module_with_basic_tests_by_paths() -> None:
     tests_paths = utils.get_basic_test_files_paths()
 
     m = Magika(model_dir=model_dir)
-    ctm = ContentTypesManager()
 
     results = m.identify_paths(tests_paths)
 
     for test_path, result in zip(tests_paths, results):
-        file_ext = test_path.suffix.lstrip(".")
-        true_cts = ctm.get_cts_by_ext(file_ext)
-        assert len(true_cts) > 0
-        true_cts_labels = [ct.name for ct in true_cts]
-        assert result.path == str(test_path)
-        assert result.output.ct_label in true_cts_labels
+        assert result.ok
+        expected_ct_label = get_expected_content_type_label_from_test_file_path(
+            test_path
+        )
+        assert result.value.output.label == expected_ct_label
 
 
 def test_magika_module_with_basic_tests_by_path() -> None:
@@ -66,16 +66,14 @@ def test_magika_module_with_basic_tests_by_path() -> None:
     tests_paths = utils.get_basic_test_files_paths()
 
     m = Magika(model_dir=model_dir)
-    ctm = ContentTypesManager()
 
     for test_path in tests_paths:
         result = m.identify_path(test_path)
-        file_ext = test_path.suffix.lstrip(".")
-        true_cts = ctm.get_cts_by_ext(file_ext)
-        assert len(true_cts) > 0
-        true_cts_labels = [ct.name for ct in true_cts]
-        assert result.path == str(test_path)
-        assert result.output.ct_label in true_cts_labels
+        assert result.ok
+        expected_ct_label = get_expected_content_type_label_from_test_file_path(
+            test_path
+        )
+        assert result.value.output.label == expected_ct_label
 
 
 def test_magika_module_with_basic_tests_by_bytes() -> None:
@@ -83,17 +81,32 @@ def test_magika_module_with_basic_tests_by_bytes() -> None:
     tests_paths = utils.get_basic_test_files_paths()
 
     m = Magika(model_dir=model_dir)
-    ctm = ContentTypesManager()
 
     for test_path in tests_paths:
         content = test_path.read_bytes()
         result = m.identify_bytes(content)
-        file_ext = test_path.suffix.lstrip(".")
-        true_cts = ctm.get_cts_by_ext(file_ext)
-        assert len(true_cts) > 0
-        true_cts_labels = [ct.name for ct in true_cts]
-        assert result.path == "-"
-        assert result.output.ct_label in true_cts_labels
+        assert result.ok
+        expected_ct_label = get_expected_content_type_label_from_test_file_path(
+            test_path
+        )
+        assert result.value.output.label == expected_ct_label
+
+
+def test_magika_module_with_mitra_tests_by_paths() -> None:
+    model_dir = utils.get_default_model_dir()
+    tests_paths = utils.get_mitra_test_files_paths()
+
+    m = Magika(model_dir=model_dir)
+
+    results = m.identify_paths(tests_paths)
+
+    for test_path, result in zip(tests_paths, results):
+        print(f"Test: {test_path}")
+        assert result.ok
+        expected_ct_label = get_expected_content_type_label_from_test_file_path(
+            test_path
+        )
+        assert result.value.output.label == expected_ct_label
 
 
 def test_magika_module_with_empty_content() -> None:
@@ -102,18 +115,19 @@ def test_magika_module_with_empty_content() -> None:
     empty_content = b""
 
     res = m.identify_bytes(empty_content)
-    assert res.path == "-"
-    assert res.dl.ct_label is None
-    assert res.output.ct_label == ContentType.EMPTY
-    assert res.output.score == 1.0
+    assert res.ok
+    assert res.value.dl.label == ContentTypeLabel.UNDEFINED
+    assert res.value.output.label == ContentTypeLabel.EMPTY
+    assert res.value.score == 1.0
 
     with tempfile.TemporaryDirectory() as td:
         tf_path = Path(td) / "empty.dat"
         tf_path.write_bytes(empty_content)
         res = m.identify_path(tf_path)
-        assert res.path == str(tf_path)
-        assert res.dl.ct_label is None
-        assert res.output.score == 1.0
+        assert res.ok
+        assert res.value.dl.label == ContentTypeLabel.UNDEFINED
+        assert res.value.output.label == ContentTypeLabel.EMPTY
+        assert res.value.score == 1.0
 
 
 def test_magika_module_with_short_content() -> None:
@@ -122,68 +136,268 @@ def test_magika_module_with_short_content() -> None:
     text_content = b"asd"
     binary_content = b"\x80\x80\x80"
 
-    res = m.identify_bytes(text_content)
-    assert res.path == "-"
-    assert res.dl.ct_label is None
-    assert res.output.ct_label == ContentType.GENERIC_TEXT
-    assert res.output.score == 1.0
-
-    res = m.identify_bytes(binary_content)
-    assert res.path == "-"
-    assert res.dl.ct_label is None
-    assert res.output.ct_label == ContentType.UNKNOWN
-    assert res.output.score == 1.0
-
     for content, expected_ct_label in zip(
-        [text_content, binary_content], [ContentType.GENERIC_TEXT, ContentType.UNKNOWN]
+        [text_content, binary_content],
+        [ContentTypeLabel.TXT, ContentTypeLabel.UNKNOWN],
     ):
         with tempfile.TemporaryDirectory() as td:
+            # prediction via path
             tf_path = Path(td) / "file.txt"
             tf_path.write_bytes(content)
             res = m.identify_path(tf_path)
-            assert res.path == str(tf_path)
-            assert res.dl.ct_label is None
-            assert res.output.ct_label == expected_ct_label
-            assert res.output.score == 1.0
+            assert res.ok
+            assert res.value.dl.label == ContentTypeLabel.UNDEFINED
+            assert res.value.output.label == expected_ct_label
+            assert res.value.score == 1.0
+
+            # prediction via content
+            res = m.identify_bytes(content)
+            assert res.ok
+            assert res.value.dl.label == ContentTypeLabel.UNDEFINED
+            assert res.value.output.label == expected_ct_label
+            assert res.value.score == 1.0
+
+
+def test_magika_module_with_python_and_non_python_content() -> None:
+    python_content = (
+        b"import flask\nimport requests\n\ndef foo(a):\n    print(f'Test {a}')\n"
+    )
+    non_python_content = b"xmport asd\nxmport requests"
+
+    m = Magika()
+
+    res = m.identify_bytes(python_content)
+    assert res.ok
+    assert res.value.output.label == ContentTypeLabel.PYTHON
+
+    res = m.identify_bytes(non_python_content)
+    assert res.ok
+    assert res.value.output.label == ContentTypeLabel.TXT
 
 
 def test_magika_module_with_different_prediction_modes() -> None:
     model_dir = utils.get_default_model_dir()
     m = Magika(model_dir=model_dir, prediction_mode=PredictionMode.BEST_GUESS)
-    assert m._get_output_ct_label_from_dl_result("python", 0.01) == "python"
-    assert m._get_output_ct_label_from_dl_result("python", 0.40) == "python"
-    assert m._get_output_ct_label_from_dl_result("python", 0.60) == "python"
-    assert m._get_output_ct_label_from_dl_result("python", 0.99) == "python"
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.01)
+        == ContentTypeLabel.PYTHON
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.40)
+        == ContentTypeLabel.PYTHON
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.60)
+        == ContentTypeLabel.PYTHON
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.99)
+        == ContentTypeLabel.PYTHON
+    )
 
-    # test that the default is HIGH_CONFIDENCE
-    m = Magika(model_dir=model_dir)
-    assert m._get_output_ct_label_from_dl_result("python", 0.01) == "txt"
+    m = Magika(model_dir=model_dir, prediction_mode=PredictionMode.MEDIUM_CONFIDENCE)
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.01)
+        == ContentTypeLabel.TXT
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(
+            ContentTypeLabel.PYTHON, m._model_config.medium_confidence_threshold - 0.01
+        )
+        == ContentTypeLabel.TXT
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.60)
+        == ContentTypeLabel.PYTHON
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.99)
+        == ContentTypeLabel.PYTHON
+    )
+
+    m = Magika(model_dir=model_dir, prediction_mode=PredictionMode.HIGH_CONFIDENCE)
+    high_confidence_threshold = m._model_config.thresholds.get(
+        ContentTypeLabel.PYTHON, m._model_config.medium_confidence_threshold
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.01)
+        == ContentTypeLabel.TXT
+    )
     assert (
         m._get_output_ct_label_from_dl_result(
-            "python", m._medium_confidence_threshold - 0.01
+            ContentTypeLabel.PYTHON, high_confidence_threshold - 0.01
         )
-        == "txt"
+        == ContentTypeLabel.TXT
     )
     assert (
         m._get_output_ct_label_from_dl_result(
-            "python", m._medium_confidence_threshold + 0.01
+            ContentTypeLabel.PYTHON, high_confidence_threshold + 0.01
         )
-        == "txt"
+        == ContentTypeLabel.PYTHON
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.99)
+        == ContentTypeLabel.PYTHON
     )
-    assert m._get_output_ct_label_from_dl_result("python", 0.99) == "python"
 
-    m = Magika(model_dir=model_dir, prediction_mode=PredictionMode.MEDIUM_CONFIDENCE)
-    assert m._get_output_ct_label_from_dl_result("python", 0.01) == "txt"
+    # test that the default is HIGH_CONFIDENCE
+    m = Magika(model_dir=model_dir)
+    high_confidence_threshold = m._model_config.thresholds.get(
+        ContentTypeLabel.PYTHON, m._model_config.medium_confidence_threshold
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.01)
+        == ContentTypeLabel.TXT
+    )
     assert (
         m._get_output_ct_label_from_dl_result(
-            "python", m._medium_confidence_threshold - 0.01
+            ContentTypeLabel.PYTHON, high_confidence_threshold - 0.01
         )
-        == "txt"
+        == ContentTypeLabel.TXT
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(
+            ContentTypeLabel.PYTHON, high_confidence_threshold + 0.01
+        )
+        == ContentTypeLabel.PYTHON
+    )
+    assert (
+        m._get_output_ct_label_from_dl_result(ContentTypeLabel.PYTHON, 0.99)
+        == ContentTypeLabel.PYTHON
     )
-    assert m._get_output_ct_label_from_dl_result("python", 0.60) == "python"
-    assert m._get_output_ct_label_from_dl_result("python", 0.99) == "python"
 
-    m = Magika(model_dir=model_dir, prediction_mode=PredictionMode.HIGH_CONFIDENCE)
-    assert m._get_output_ct_label_from_dl_result("python", 0.01) == "txt"
-    assert m._get_output_ct_label_from_dl_result("python", 0.60) == "txt"
-    assert m._get_output_ct_label_from_dl_result("python", 0.99) == "python"
+
+def test_magika_module_with_directory() -> None:
+    m = Magika()
+
+    with tempfile.TemporaryDirectory() as td:
+        td_path = Path(td)
+        res = m.identify_path(td_path)
+        assert res.ok
+        assert res.value.dl.label == ContentTypeLabel.UNDEFINED
+        assert res.value.output.label == ContentTypeLabel.DIRECTORY
+        assert res.value.score == 1.0
+
+
+def test_magika_module_multiple_copies_of_the_same_file() -> None:
+    with tempfile.TemporaryDirectory() as td:
+        test_path = Path(td) / "test.txt"
+        test_path.write_text("test")
+
+        test_paths = [test_path] * 3
+
+        m = Magika()
+        results = m.identify_paths(test_paths)
+        assert len(results) == len(test_paths)
+        for result in results:
+            assert result.ok
+            assert result.value.output.label == ContentTypeLabel.TXT
+
+
+def test_magika_cli_with_many_files() -> None:
+    test_file_path = utils.get_one_basic_test_file_path()
+
+    m = Magika()
+
+    for n in [10, 100]:
+        test_files_paths = [test_file_path] * n
+        results = m.identify_paths(test_files_paths)
+        for result in results:
+            assert result.ok
+            # TODO: check that the result is actually correct
+
+
+def test_magika_module_with_symlink() -> None:
+    with tempfile.TemporaryDirectory() as td:
+        test_path = Path(td) / "test.txt"
+        test_path.write_text("test")
+
+        symlink_path = Path(td) / "symlink-test.txt"
+        symlink_path.symlink_to(test_path)
+
+        m = Magika()
+        res = m.identify_path(test_path)
+        assert res.ok
+        assert res.value.output.label == ContentTypeLabel.TXT
+        res = m.identify_path(symlink_path)
+        assert res.ok
+        assert res.value.output.label == ContentTypeLabel.TXT
+
+        m = Magika(no_dereference=True)
+        res = m.identify_path(test_path)
+        assert res.ok
+        assert res.value.output.label == ContentTypeLabel.TXT
+        res = m.identify_path(symlink_path)
+        assert res.ok
+        assert res.value.output.label == ContentTypeLabel.SYMLINK
+
+
+def test_magika_module_with_non_existing_file() -> None:
+    m = Magika()
+
+    with tempfile.TemporaryDirectory() as td:
+        non_existing_path = Path(td) / "non_existing.txt"
+
+        res = m.identify_path(non_existing_path)
+        assert not res.ok
+        assert res.status == Status.FILE_NOT_FOUND_ERROR
+
+
+def test_magika_module_with_permission_error() -> None:
+    m = Magika()
+
+    with tempfile.TemporaryDirectory() as td:
+        unreadable_test_path = Path(td) / "test.txt"
+        unreadable_test_path.write_text("text")
+
+        unreadable_test_path.chmod(0o000)
+
+        res = m.identify_path(unreadable_test_path)
+        assert not res.ok
+        assert res.status == Status.PERMISSION_ERROR
+
+
+@pytest.mark.skip
+def test_magika_module_with_really_many_files() -> None:
+    test_file_path = utils.get_one_basic_test_file_path()
+
+    m = Magika()
+
+    for n in [10000]:
+        test_files_paths = [test_file_path] * n
+
+        results = m.identify_paths(test_files_paths)
+        for result in results:
+            assert result.ok
+            # TODO: add more checks
+
+
+@pytest.mark.slow
+def test_magika_module_with_big_file() -> None:
+    def signal_handler(signum: int, frame: Any) -> None:
+        raise Exception("Timeout")
+
+    signal.signal(signal.SIGALRM, signal_handler)
+
+    # It should take much less than this, but pytest weird scheduling sometimes
+    # creates unexpected slow downs.
+    timeout = 2
+
+    m = Magika()
+
+    for sample_size in [1000, 10000, 1_000_000, 1_000_000_000, 10_000_000_000]:
+        with tempfile.TemporaryDirectory() as td:
+            sample_path = Path(td) / "sample.dat"
+            utils.write_random_file_with_size(sample_path, sample_size)
+            print(f"Starting running Magika with a timeout of {timeout}")
+            signal.alarm(timeout)
+            res = m.identify_path(sample_path)
+            assert res.ok
+            signal.alarm(0)
+            print("Done running Magika")
+
+
+def get_expected_content_type_label_from_test_file_path(
+    test_path: Path,
+) -> ContentTypeLabel:
+    return ContentTypeLabel(test_path.parent.name)
diff --git a/python/tests/utils.py b/python/tests/utils.py
index a249c58e..611474e0 100644
--- a/python/tests/utils.py
+++ b/python/tests/utils.py
@@ -12,19 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 import random
 import string
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
-
-from magika.content_types import ContentTypesManager
-from magika.types import (
-    MagikaOutputFields,
-    MagikaResult,
-    ModelFeatures,
-    ModelOutputFields,
-)
+from typing import List
 
 
 def get_tests_data_dir() -> Path:
@@ -51,13 +42,13 @@ def get_mitra_tests_files_dir() -> Path:
 
 def get_basic_test_files_paths() -> List[Path]:
     tests_files_dir = get_basic_tests_files_dir()
-    test_files_paths = sorted(filter(lambda p: p.is_file(), tests_files_dir.iterdir()))
+    test_files_paths = sorted(filter(lambda p: p.is_file(), tests_files_dir.rglob("*")))
     return test_files_paths
 
 
 def get_mitra_test_files_paths() -> List[Path]:
     tests_files_dir = get_mitra_tests_files_dir()
-    test_files_paths = sorted(filter(lambda p: p.is_file(), tests_files_dir.iterdir()))
+    test_files_paths = sorted(filter(lambda p: p.is_file(), tests_files_dir.rglob("*")))
     return test_files_paths
 
 
@@ -110,155 +101,3 @@ def get_default_model_dir() -> Path:
         / Magika.get_default_model_name()
     )
     return model_dir
-
-
-def check_magika_cli_output_matches_expected_by_ext(
-    samples_paths: List[Path], stdout: str, stderr: str, **kwargs: Any
-) -> None:
-    assert len(samples_paths) > 0
-    json_output = kwargs.get("json_output", False)
-    jsonl_output = kwargs.get("jsonl_output", False)
-    mime_output = kwargs.get("mime_output", False)
-    label_output = kwargs.get("label_output", False)
-    compatibility_mode = kwargs.get("compatibility_mode", False)
-    cpp_output = kwargs.get("cpp_output", False)
-    ctm = ContentTypesManager()
-    predicted_cts = get_magika_cli_output_from_stdout_stderr(stdout, stderr, **kwargs)
-    assert len(predicted_cts) > 0
-    assert len(samples_paths) == len(predicted_cts)
-    remaining_samples_paths = samples_paths[:]
-    for file_path, output in predicted_cts:
-        remaining_samples_paths.remove(file_path)
-        file_ext = file_path.suffix.lstrip(".")
-        if file_ext != "":
-            true_cts = ctm.get_cts_by_ext(file_ext)
-        else:
-            # The test file does not have any extension. In this case, we assume
-            # this is a test file path with the <dataset>/<content type>/<hash>
-            # pattern.
-            true_ct_name = file_path.parent.name
-            true_cts = [ctm.get_or_raise(true_ct_name)]
-        assert len(true_cts) > 0, f'File extension: "{file_ext}"'
-
-        true_cts_names = [ct.name for ct in true_cts]
-
-        if json_output or jsonl_output:
-            # check that each JSON entry satisfies the requirements
-            assert isinstance(output, dict)
-            dict_output: Dict[str, Any] = output
-            assert dict_output["output"]["ct_label"] in true_cts_names
-        elif cpp_output:
-            assert isinstance(output, str)
-            assert output.lower() in true_cts_names
-        else:
-            assert isinstance(output, str)
-            expected_outputs = []
-            if mime_output:
-                expected_outputs = [ctm.get_mime_type(ct.name) for ct in true_cts]
-            elif label_output:
-                expected_outputs = true_cts_names
-            elif compatibility_mode:
-                expected_outputs = [ctm.get_magic(ct.name) for ct in true_cts]
-            else:
-                expected_outputs = [
-                    f"{ctm.get_description(ct.name)} ({ctm.get_group(ct.name)})"
-                    for ct in true_cts
-                ]
-            assert (
-                output in expected_outputs
-            ), f'Output: "{output}", expected output: "{expected_outputs}"'
-
-    # Check that all input samples have been scanned
-    assert len(remaining_samples_paths) == 0
-
-
-def get_magika_cli_output_from_stdout_stderr(
-    stdout: str, stderr: str, **kwargs: Any
-) -> List[Tuple[Path, Union[Dict[str, Any], str]]]:
-    json_output = kwargs.get("json_output", False)
-    jsonl_output = kwargs.get("jsonl_output", False)
-    output_score = kwargs.get("output_score", False)
-    generate_report = kwargs.get("generate_report", False)
-    cpp_output = kwargs.get("cpp_output", False)
-    """
-    This function returns the output of magika for each input file. In case of
-    JSON or JSONL, it returns the full information dictionary for
-    each of them, not just the output content type label.
-    """
-
-    predicted_cts = []
-    if json_output:
-        # expect json
-        entries = json.loads(stdout)
-        for entry in entries:
-            predicted_cts.append((Path(entry["path"]), entry))
-    elif jsonl_output:
-        # expect jsonl
-        lines = get_lines_from_stream(stdout)
-        for line in lines:
-            entry = json.loads(line)
-            predicted_cts.append((Path(entry["path"]), entry))
-    elif cpp_output:
-        # output from magika-cpp client
-        lines = get_lines_from_stream(stdout)
-        for line in lines:
-            file_path_str, output = line.split(": ", 1)
-            ct_output, score_str = output.split(" ")
-            score_num = float(score_str)
-            assert 0 <= score_num <= 1
-            predicted_cts.append((Path(file_path_str), ct_output))
-    else:
-        # plain output
-        lines = get_lines_from_stream(stdout)
-        for line in lines:
-            if output_score:
-                file_path_str, output = line.split(": ", 1)
-                ct_output, score_str = output.rsplit(" ", 1)
-                assert score_str.endswith("%")
-                score_num_str = score_str[:-1]
-                assert 0 <= int(score_num_str) <= 100
-            else:
-                file_path_str, ct_output = line.split(": ", 1)
-            predicted_cts.append((Path(file_path_str), ct_output))
-
-        # check that we output the expected warnings
-        if generate_report:
-            stderr_lines = get_lines_from_stream(stderr)
-            assert len(stderr_lines) >= 1
-            if generate_report:
-                assert stderr_lines[0].startswith("#" * 10)
-                assert stderr_lines[1].find("REPORT") >= 0
-                assert stderr_lines[2].startswith("#" * 10)
-                assert stderr_lines[-4].startswith("#" * 10)
-                assert stderr_lines[-3].startswith("Please")
-                assert stderr_lines[-2].startswith("Please")
-                assert (
-                    stderr_lines[-1].startswith("IMPORTANT")
-                    and stderr_lines[-1].find("NOT") >= 0
-                    and stderr_lines[-1].find("PII") >= 0
-                )
-                report_info = json.loads(stderr_lines[3])
-                assert set(report_info.keys()) == {
-                    "version",
-                    "model_dir_name",
-                    "python_version",
-                    "reports",
-                }
-                for report in report_info["reports"]:
-                    assert set(report.keys()) == {"hash", "features", "result"}
-                    assert isinstance(report["hash"], str)
-                    # try to parse "features" as ModelFeatures
-                    _ = ModelFeatures(**json.loads(report["features"]))
-                    # try to parse "result" as MagikaResult
-                    result_dict = report["result"]
-                    mr = MagikaResult(
-                        path=result_dict["path"],
-                        dl=ModelOutputFields(**result_dict["dl"]),
-                        output=MagikaOutputFields(
-                            **result_dict["output"],
-                        ),
-                    )
-                    assert mr.path == "<REMOVED>"
-                    assert isinstance(mr.output.ct_label, str)
-
-    return predicted_cts
diff --git a/tests_data/basic/code.asm b/tests_data/basic/asm/code.asm
similarity index 100%
rename from tests_data/basic/code.asm
rename to tests_data/basic/asm/code.asm
diff --git a/tests_data/basic/code.c b/tests_data/basic/c/code.c
similarity index 100%
rename from tests_data/basic/code.c
rename to tests_data/basic/c/code.c
diff --git a/tests_data/basic/code.css b/tests_data/basic/css/code.css
similarity index 100%
rename from tests_data/basic/code.css
rename to tests_data/basic/css/code.css
diff --git a/tests_data/basic/dockerfile/Dockerfile b/tests_data/basic/dockerfile/Dockerfile
new file mode 100644
index 00000000..0052488e
--- /dev/null
+++ b/tests_data/basic/dockerfile/Dockerfile
@@ -0,0 +1,14 @@
+# syntax=docker/dockerfile:1
+
+ARG PYTHON_VERSION=3.11
+FROM python:${PYTHON_VERSION}-slim as base
+
+WORKDIR /magika
+
+# This requires buildx
+# RUN --mount=type=cache,target=/root/.cache/pip \
+#     pip install magika
+
+RUN pip install magika
+
+ENTRYPOINT ["magika"]
diff --git a/tests_data/basic/doc.docx b/tests_data/basic/docx/doc.docx
similarity index 100%
rename from tests_data/basic/doc.docx
rename to tests_data/basic/docx/doc.docx
diff --git a/tests_data/basic/doc.epub b/tests_data/basic/epub/doc.epub
similarity index 100%
rename from tests_data/basic/doc.epub
rename to tests_data/basic/epub/doc.epub
diff --git a/tests_data/basic/doc.html b/tests_data/basic/html/doc.html
similarity index 100%
rename from tests_data/basic/doc.html
rename to tests_data/basic/html/doc.html
diff --git a/tests_data/basic/doc.ini b/tests_data/basic/ini/doc.ini
similarity index 100%
rename from tests_data/basic/doc.ini
rename to tests_data/basic/ini/doc.ini
diff --git a/tests_data/basic/code.js b/tests_data/basic/javascript/code.js
similarity index 100%
rename from tests_data/basic/code.js
rename to tests_data/basic/javascript/code.js
diff --git a/tests_data/basic/doc.json b/tests_data/basic/json/doc.json
similarity index 100%
rename from tests_data/basic/doc.json
rename to tests_data/basic/json/doc.json
diff --git a/tests_data/basic/doc.odt b/tests_data/basic/odt/doc.odt
similarity index 100%
rename from tests_data/basic/doc.odt
rename to tests_data/basic/odt/doc.odt
diff --git a/tests_data/basic/doc.pem b/tests_data/basic/pem/doc.pem
similarity index 100%
rename from tests_data/basic/doc.pem
rename to tests_data/basic/pem/doc.pem
diff --git a/tests_data/basic/doc.pub b/tests_data/basic/pem/doc.pub
similarity index 100%
rename from tests_data/basic/doc.pub
rename to tests_data/basic/pem/doc.pub
diff --git a/tests_data/basic/code.py b/tests_data/basic/python/code.py
similarity index 100%
rename from tests_data/basic/code.py
rename to tests_data/basic/python/code.py
diff --git a/tests_data/basic/doc.rtf b/tests_data/basic/rtf/doc.rtf
similarity index 100%
rename from tests_data/basic/doc.rtf
rename to tests_data/basic/rtf/doc.rtf
diff --git a/tests_data/basic/code.rs b/tests_data/basic/rust/code.rs
similarity index 100%
rename from tests_data/basic/code.rs
rename to tests_data/basic/rust/code.rs
diff --git a/tests_data/basic/code.smali b/tests_data/basic/smali/code.smali
similarity index 100%
rename from tests_data/basic/code.smali
rename to tests_data/basic/smali/code.smali
diff --git a/tests_data/basic/toml/doc.toml b/tests_data/basic/toml/doc.toml
new file mode 100644
index 00000000..228b2da6
--- /dev/null
+++ b/tests_data/basic/toml/doc.toml
@@ -0,0 +1,40 @@
+[tool.poetry]
+name = "magika"
+version = "0.6.0-dev"
+description = "A tool to determine the content type of a file with deep-learning"
+authors = ["Yanick Fratantonio <yanickf@google.com>"]
+readme = "README.md"
+packages = [{include = "magika"}]
+
+[tool.poetry.dependencies]
+python = "^3.8,<3.13"
+click = "^8.1.3"
+tqdm = "^4.66.2"
+onnxruntime = "^1.17.0"
+numpy = [
+    {version = "^1.24", python = ">=3.8,<3.9"},
+    {version = "^1.26", python = ">=3.9,<3.13"}
+]
+tabulate = "^0.9.0"
+python-dotenv = "^1.0.1"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.0.1"
+ipython = [
+    {version = "^8.12.3", python = ">=3.8,<3.9"},
+    {version = "^8.18.1", python = ">=3.9,<3.10"},
+    {version = "^8.21.0", python = ">=3.10,<3.13"}
+]
+ruff = ">=0.2.2,<0.4.0"
+mypy = "^1.8.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.ruff.lint]
+# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.
+# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
+# McCabe complexity (`C901`) by default.
+select = ["E4", "E7", "E9", "F", "I001"]
+ignore = []
diff --git a/tests_data/basic/text.txt b/tests_data/basic/txt/text.txt
similarity index 100%
rename from tests_data/basic/text.txt
rename to tests_data/basic/txt/text.txt
diff --git a/tests_data/basic/typescript/code.ts b/tests_data/basic/typescript/code.ts
new file mode 100644
index 00000000..8f2076b5
--- /dev/null
+++ b/tests_data/basic/typescript/code.ts
@@ -0,0 +1,52 @@
+#! /usr/bin/env node
+// Command line tool to test MagikaJs. Please use the official command line
+// tool (`pip install magika`) for normal use.
+
+// To run this, you need to install the optional dependencies too.
+import {program} from 'commander';
+import {readFile} from 'fs/promises';
+import chalk from 'chalk';
+import {MagikaNode as Magika} from './magika_node.js';
+
+program
+    .description('Magika JS - file type detection with ML. https://google.github.io/magika')
+    .option('--json-output', 'Format output in JSON')
+    .option('--model-url <model-url>', 'Model URL', Magika.MODEL_URL)
+    .option( '--model-path <model-path>', 'Modle file path')
+    .option( '--config-url <config-url>', 'Config URL', Magika.CONFIG_URL)
+    .option( '--config-path <config-path>', 'Config file path')
+    .argument('<paths...>', 'Paths of the files to detect');
+
+program.parse();
+
+const flags = program.opts();
+const magika = new Magika();
+
+(async () => {
+    await magika.load({
+        modelURL: flags.modelUrl,
+        modelPath: flags.modelPath,
+        configURL: flags.configUrl,
+        configPath: flags.configPath
+    });
+    await Promise.all(program.args.map(async (path) => {
+        let data = null;
+        try {
+            data = await readFile(path);
+        } catch (error) {
+            console.error('Skipping file', path, error);
+        }
+
+        if (data != null) {
+            const prediction = await magika.identifyBytes(data);
+            if (flags.jsonOutput) {
+                console.log({path, ...prediction});
+            } else {
+                console.log(
+                    chalk.blue(path),
+                    chalk.green(prediction?.label, chalk.white(prediction?.score)),
+                );
+            }
+        }
+    }));
+})();
\ No newline at end of file
diff --git a/tests_data/basic/yara/rule.yar b/tests_data/basic/yara/rule.yar
new file mode 100644
index 00000000..8a6397e8
--- /dev/null
+++ b/tests_data/basic/yara/rule.yar
@@ -0,0 +1,12 @@
+rule Rule_485729_77379 {
+  strings:
+    $s1 = "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run"
+    $s2 = "Win32_Process"
+    $s3 = "Create" wide
+  condition:
+    $s1 and ($s2 and $s3)
+  meta:
+    author = "CyberThreatResearch"
+    date = "2019-09-23"
+    tags = "malware, persistence, registry"
+}
diff --git a/tests_data/mitra/bmp.bmp b/tests_data/mitra/bmp/bmp.bmp
similarity index 100%
rename from tests_data/mitra/bmp.bmp
rename to tests_data/mitra/bmp/bmp.bmp
diff --git a/tests_data/mitra/bzip2.bz2 b/tests_data/mitra/bzip/bzip2.bz2
similarity index 100%
rename from tests_data/mitra/bzip2.bz2
rename to tests_data/mitra/bzip/bzip2.bz2
diff --git a/tests_data/mitra/cab.cab b/tests_data/mitra/cab/cab.cab
similarity index 100%
rename from tests_data/mitra/cab.cab
rename to tests_data/mitra/cab/cab.cab
diff --git a/tests_data/mitra/elf.elf b/tests_data/mitra/elf/elf.elf
similarity index 100%
rename from tests_data/mitra/elf.elf
rename to tests_data/mitra/elf/elf.elf
diff --git a/tests_data/mitra/elf64.elf b/tests_data/mitra/elf/elf64.elf
similarity index 100%
rename from tests_data/mitra/elf64.elf
rename to tests_data/mitra/elf/elf64.elf
diff --git a/tests_data/mitra/flac.flac b/tests_data/mitra/flac/flac.flac
similarity index 100%
rename from tests_data/mitra/flac.flac
rename to tests_data/mitra/flac/flac.flac
diff --git a/tests_data/mitra/tiny.flac b/tests_data/mitra/flac/tiny.flac
similarity index 100%
rename from tests_data/mitra/tiny.flac
rename to tests_data/mitra/flac/tiny.flac
diff --git a/tests_data/mitra/gif87.gif b/tests_data/mitra/gif/gif87.gif
similarity index 100%
rename from tests_data/mitra/gif87.gif
rename to tests_data/mitra/gif/gif87.gif
diff --git a/tests_data/mitra/gif89.gif b/tests_data/mitra/gif/gif89.gif
similarity index 100%
rename from tests_data/mitra/gif89.gif
rename to tests_data/mitra/gif/gif89.gif
diff --git a/tests_data/mitra/gzip.gz b/tests_data/mitra/gzip/gzip.gz
similarity index 100%
rename from tests_data/mitra/gzip.gz
rename to tests_data/mitra/gzip/gzip.gz
diff --git a/tests_data/mitra/iso.iso b/tests_data/mitra/iso/iso.iso
similarity index 100%
rename from tests_data/mitra/iso.iso
rename to tests_data/mitra/iso/iso.iso
diff --git a/tests_data/mitra/java.class b/tests_data/mitra/javabytecode/java.class
similarity index 100%
rename from tests_data/mitra/java.class
rename to tests_data/mitra/javabytecode/java.class
diff --git a/tests_data/mitra/jpg.jpg b/tests_data/mitra/jpeg/jpg.jpg
similarity index 100%
rename from tests_data/mitra/jpg.jpg
rename to tests_data/mitra/jpeg/jpg.jpg
diff --git a/tests_data/mitra/id3v1.mp3 b/tests_data/mitra/mp3/id3v1.mp3
similarity index 100%
rename from tests_data/mitra/id3v1.mp3
rename to tests_data/mitra/mp3/id3v1.mp3
diff --git a/tests_data/mitra/id3v2.mp3 b/tests_data/mitra/mp3/id3v2.mp3
similarity index 100%
rename from tests_data/mitra/id3v2.mp3
rename to tests_data/mitra/mp3/id3v2.mp3
diff --git a/tests_data/mitra/mp4.mp4 b/tests_data/mitra/mp4/mp4.mp4
similarity index 100%
rename from tests_data/mitra/mp4.mp4
rename to tests_data/mitra/mp4/mp4.mp4
diff --git a/tests_data/mitra/vorbis.ogg b/tests_data/mitra/ogg/vorbis.ogg
similarity index 100%
rename from tests_data/mitra/vorbis.ogg
rename to tests_data/mitra/ogg/vorbis.ogg
diff --git a/tests_data/mitra/pcap.pcap b/tests_data/mitra/pcap/pcap.pcap
similarity index 100%
rename from tests_data/mitra/pcap.pcap
rename to tests_data/mitra/pcap/pcap.pcap
diff --git a/tests_data/mitra/pdf.pdf b/tests_data/mitra/pdf/pdf.pdf
similarity index 100%
rename from tests_data/mitra/pdf.pdf
rename to tests_data/mitra/pdf/pdf.pdf
diff --git a/tests_data/mitra/pe32.exe b/tests_data/mitra/pebin/pe32.exe
similarity index 100%
rename from tests_data/mitra/pe32.exe
rename to tests_data/mitra/pebin/pe32.exe
diff --git a/tests_data/mitra/pe64.exe b/tests_data/mitra/pebin/pe64.exe
similarity index 100%
rename from tests_data/mitra/pe64.exe
rename to tests_data/mitra/pebin/pe64.exe
diff --git a/tests_data/mitra/php.php b/tests_data/mitra/php/php.php
similarity index 100%
rename from tests_data/mitra/php.php
rename to tests_data/mitra/php/php.php
diff --git a/tests_data/mitra/cgbi.png b/tests_data/mitra/png/cgbi.png
similarity index 100%
rename from tests_data/mitra/cgbi.png
rename to tests_data/mitra/png/cgbi.png
diff --git a/tests_data/mitra/png.png b/tests_data/mitra/png/png.png
similarity index 100%
rename from tests_data/mitra/png.png
rename to tests_data/mitra/png/png.png
diff --git a/tests_data/mitra/rar4.rar b/tests_data/mitra/rar/rar4.rar
similarity index 100%
rename from tests_data/mitra/rar4.rar
rename to tests_data/mitra/rar/rar4.rar
diff --git a/tests_data/mitra/rar5.rar b/tests_data/mitra/rar/rar5.rar
similarity index 100%
rename from tests_data/mitra/rar5.rar
rename to tests_data/mitra/rar/rar5.rar
diff --git a/tests_data/mitra/rich.rtf b/tests_data/mitra/rtf/rich.rtf
similarity index 100%
rename from tests_data/mitra/rich.rtf
rename to tests_data/mitra/rtf/rich.rtf
diff --git a/tests_data/mitra/7-zip.7z b/tests_data/mitra/sevenzip/7-zip.7z
similarity index 100%
rename from tests_data/mitra/7-zip.7z
rename to tests_data/mitra/sevenzip/7-zip.7z
diff --git a/tests_data/mitra/svg.svg b/tests_data/mitra/svg/svg.svg
similarity index 100%
rename from tests_data/mitra/svg.svg
rename to tests_data/mitra/svg/svg.svg
diff --git a/tests_data/mitra/hello-gnu.tar b/tests_data/mitra/tar/hello-gnu.tar
similarity index 100%
rename from tests_data/mitra/hello-gnu.tar
rename to tests_data/mitra/tar/hello-gnu.tar
diff --git a/tests_data/mitra/hello-pax.tar b/tests_data/mitra/tar/hello-pax.tar
similarity index 100%
rename from tests_data/mitra/hello-pax.tar
rename to tests_data/mitra/tar/hello-pax.tar
diff --git a/tests_data/mitra/hello-ustar.tar b/tests_data/mitra/tar/hello-ustar.tar
similarity index 100%
rename from tests_data/mitra/hello-ustar.tar
rename to tests_data/mitra/tar/hello-ustar.tar
diff --git a/tests_data/mitra/tar.tar b/tests_data/mitra/tar/tar.tar
similarity index 100%
rename from tests_data/mitra/tar.tar
rename to tests_data/mitra/tar/tar.tar
diff --git a/tests_data/mitra/footer.tga b/tests_data/mitra/tga/footer.tga
similarity index 100%
rename from tests_data/mitra/footer.tga
rename to tests_data/mitra/tga/footer.tga
diff --git a/tests_data/mitra/tiff-be.tif b/tests_data/mitra/tiff/tiff-be.tif
similarity index 100%
rename from tests_data/mitra/tiff-be.tif
rename to tests_data/mitra/tiff/tiff-be.tif
diff --git a/tests_data/mitra/tiff-le.tif b/tests_data/mitra/tiff/tiff-le.tif
similarity index 100%
rename from tests_data/mitra/tiff-le.tif
rename to tests_data/mitra/tiff/tiff-le.tif
diff --git a/tests_data/mitra/riff.wav b/tests_data/mitra/wav/riff.wav
similarity index 100%
rename from tests_data/mitra/riff.wav
rename to tests_data/mitra/wav/riff.wav
diff --git a/tests_data/mitra/rifx.wav b/tests_data/mitra/wav/rifx.wav
similarity index 100%
rename from tests_data/mitra/rifx.wav
rename to tests_data/mitra/wav/rifx.wav
diff --git a/tests_data/mitra/webm.webm b/tests_data/mitra/webm/webm.webm
similarity index 100%
rename from tests_data/mitra/webm.webm
rename to tests_data/mitra/webm/webm.webm
diff --git a/tests_data/mitra/webp.webp b/tests_data/mitra/webp/webp.webp
similarity index 100%
rename from tests_data/mitra/webp.webp
rename to tests_data/mitra/webp/webp.webp
diff --git a/tests_data/mitra/webpl.webp b/tests_data/mitra/webp/webpl.webp
similarity index 100%
rename from tests_data/mitra/webpl.webp
rename to tests_data/mitra/webp/webpl.webp
diff --git a/tests_data/mitra/hello-world.xar b/tests_data/mitra/xar/hello-world.xar
similarity index 100%
rename from tests_data/mitra/hello-world.xar
rename to tests_data/mitra/xar/hello-world.xar
diff --git a/tests_data/mitra/mini.xar b/tests_data/mitra/xar/mini.xar
similarity index 100%
rename from tests_data/mitra/mini.xar
rename to tests_data/mitra/xar/mini.xar
diff --git a/tests_data/mitra/xz.xz b/tests_data/mitra/xz/xz.xz
similarity index 100%
rename from tests_data/mitra/xz.xz
rename to tests_data/mitra/xz/xz.xz
diff --git a/tests_data/mitra/NT.zip b/tests_data/mitra/zip/NT.zip
similarity index 100%
rename from tests_data/mitra/NT.zip
rename to tests_data/mitra/zip/NT.zip
diff --git a/tests_data/mitra/NTFS.zip b/tests_data/mitra/zip/NTFS.zip
similarity index 100%
rename from tests_data/mitra/NTFS.zip
rename to tests_data/mitra/zip/NTFS.zip
diff --git a/tests_data/mitra/PPMd.zip b/tests_data/mitra/zip/PPMd.zip
similarity index 100%
rename from tests_data/mitra/PPMd.zip
rename to tests_data/mitra/zip/PPMd.zip
diff --git a/tests_data/mitra/aes.zip b/tests_data/mitra/zip/aes.zip
similarity index 100%
rename from tests_data/mitra/aes.zip
rename to tests_data/mitra/zip/aes.zip
diff --git a/tests_data/mitra/bz2.zip b/tests_data/mitra/zip/bz2.zip
similarity index 100%
rename from tests_data/mitra/bz2.zip
rename to tests_data/mitra/zip/bz2.zip
diff --git a/tests_data/mitra/deflate64.zip b/tests_data/mitra/zip/deflate64.zip
similarity index 100%
rename from tests_data/mitra/deflate64.zip
rename to tests_data/mitra/zip/deflate64.zip
diff --git a/tests_data/mitra/directory.zip b/tests_data/mitra/zip/directory.zip
similarity index 100%
rename from tests_data/mitra/directory.zip
rename to tests_data/mitra/zip/directory.zip
diff --git a/tests_data/mitra/drive.zip b/tests_data/mitra/zip/drive.zip
similarity index 100%
rename from tests_data/mitra/drive.zip
rename to tests_data/mitra/zip/drive.zip
diff --git a/tests_data/mitra/dual.zip b/tests_data/mitra/zip/dual.zip
similarity index 100%
rename from tests_data/mitra/dual.zip
rename to tests_data/mitra/zip/dual.zip
diff --git a/tests_data/mitra/filecomment.zip b/tests_data/mitra/zip/filecomment.zip
similarity index 100%
rename from tests_data/mitra/filecomment.zip
rename to tests_data/mitra/zip/filecomment.zip
diff --git a/tests_data/mitra/implode.zip b/tests_data/mitra/zip/implode.zip
similarity index 100%
rename from tests_data/mitra/implode.zip
rename to tests_data/mitra/zip/implode.zip
diff --git a/tests_data/mitra/implodeV3.zip b/tests_data/mitra/zip/implodeV3.zip
similarity index 100%
rename from tests_data/mitra/implodeV3.zip
rename to tests_data/mitra/zip/implodeV3.zip
diff --git a/tests_data/mitra/jpeg.zip b/tests_data/mitra/zip/jpeg.zip
similarity index 100%
rename from tests_data/mitra/jpeg.zip
rename to tests_data/mitra/zip/jpeg.zip
diff --git a/tests_data/mitra/lzma.zip b/tests_data/mitra/zip/lzma.zip
similarity index 100%
rename from tests_data/mitra/lzma.zip
rename to tests_data/mitra/zip/lzma.zip
diff --git a/tests_data/mitra/mini.zip b/tests_data/mitra/zip/mini.zip
similarity index 100%
rename from tests_data/mitra/mini.zip
rename to tests_data/mitra/zip/mini.zip
diff --git a/tests_data/mitra/reduced1.zip b/tests_data/mitra/zip/reduced1.zip
similarity index 100%
rename from tests_data/mitra/reduced1.zip
rename to tests_data/mitra/zip/reduced1.zip
diff --git a/tests_data/mitra/reduced2.zip b/tests_data/mitra/zip/reduced2.zip
similarity index 100%
rename from tests_data/mitra/reduced2.zip
rename to tests_data/mitra/zip/reduced2.zip
diff --git a/tests_data/mitra/reduced3.zip b/tests_data/mitra/zip/reduced3.zip
similarity index 100%
rename from tests_data/mitra/reduced3.zip
rename to tests_data/mitra/zip/reduced3.zip
diff --git a/tests_data/mitra/reduced4.zip b/tests_data/mitra/zip/reduced4.zip
similarity index 100%
rename from tests_data/mitra/reduced4.zip
rename to tests_data/mitra/zip/reduced4.zip
diff --git a/tests_data/mitra/shrunk.zip b/tests_data/mitra/zip/shrunk.zip
similarity index 100%
rename from tests_data/mitra/shrunk.zip
rename to tests_data/mitra/zip/shrunk.zip
diff --git a/tests_data/mitra/simple.zip b/tests_data/mitra/zip/simple.zip
similarity index 100%
rename from tests_data/mitra/simple.zip
rename to tests_data/mitra/zip/simple.zip
diff --git a/tests_data/mitra/store.zip b/tests_data/mitra/zip/store.zip
similarity index 100%
rename from tests_data/mitra/store.zip
rename to tests_data/mitra/zip/store.zip
diff --git a/tests_data/mitra/unicode.zip b/tests_data/mitra/zip/unicode.zip
similarity index 100%
rename from tests_data/mitra/unicode.zip
rename to tests_data/mitra/zip/unicode.zip
diff --git a/tests_data/mitra/unicode2.zip b/tests_data/mitra/zip/unicode2.zip
similarity index 100%
rename from tests_data/mitra/unicode2.zip
rename to tests_data/mitra/zip/unicode2.zip
diff --git a/tests_data/mitra/unix.zip b/tests_data/mitra/zip/unix.zip
similarity index 100%
rename from tests_data/mitra/unix.zip
rename to tests_data/mitra/zip/unix.zip
diff --git a/tests_data/mitra/unixdesc.zip b/tests_data/mitra/zip/unixdesc.zip
similarity index 100%
rename from tests_data/mitra/unixdesc.zip
rename to tests_data/mitra/zip/unixdesc.zip
diff --git a/tests_data/mitra/volumecomment.zip b/tests_data/mitra/zip/volumecomment.zip
similarity index 100%
rename from tests_data/mitra/volumecomment.zip
rename to tests_data/mitra/zip/volumecomment.zip
diff --git a/tests_data/mitra/wavpack.zip b/tests_data/mitra/zip/wavpack.zip
similarity index 100%
rename from tests_data/mitra/wavpack.zip
rename to tests_data/mitra/zip/wavpack.zip
diff --git a/tests_data/mitra/zip.zip b/tests_data/mitra/zip/zip.zip
similarity index 100%
rename from tests_data/mitra/zip.zip
rename to tests_data/mitra/zip/zip.zip
diff --git a/tests_data/mitra/zip64.zip b/tests_data/mitra/zip/zip64.zip
similarity index 100%
rename from tests_data/mitra/zip64.zip
rename to tests_data/mitra/zip/zip64.zip
diff --git a/tests_data/mitra/zipcrypto.zip b/tests_data/mitra/zip/zipcrypto.zip
similarity index 100%
rename from tests_data/mitra/zipcrypto.zip
rename to tests_data/mitra/zip/zipcrypto.zip
diff --git a/tests_data/mitra/zopfli.zip b/tests_data/mitra/zip/zopfli.zip
similarity index 100%
rename from tests_data/mitra/zopfli.zip
rename to tests_data/mitra/zip/zopfli.zip
diff --git a/tests_data/mitra/html.htm b/tests_data/mitra_candidates/html.htm
similarity index 100%
rename from tests_data/mitra/html.htm
rename to tests_data/mitra_candidates/html.htm
diff --git a/tests_data/mitra/pcapng.pcapng b/tests_data/mitra_candidates/pcapng.pcapng
similarity index 100%
rename from tests_data/mitra/pcapng.pcapng
rename to tests_data/mitra_candidates/pcapng.pcapng