From e9782f9948aa1fc11426585bd204d3a439f3e21c Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 15:18:43 +0000
Subject: [PATCH 01/26] add LFS tracking rules for artifacts folders

---
 .gitattributes | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000..d014853727
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+modules/ml_linear_model/artifacts/* filter=lfs diff=lfs merge=lfs -text
+modules/ml_online_model/artifacts/* filter=lfs diff=lfs merge=lfs -text

From 580bbe4a7d90ff5ff2eee07d5d88f7133d39a14b Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 15:23:00 +0000
Subject: [PATCH 02/26] adding ML modules, the base class and models they use
 (selected one by hand)

---
 modules/ml_linear_model/README.md            | 144 +++++
 modules/ml_linear_model/__init__.py          |   1 +
 modules/ml_linear_model/artifacts/model.bin  |   3 +
 modules/ml_linear_model/artifacts/pca.bin    |   3 +
 modules/ml_linear_model/artifacts/scaler.bin |   3 +
 modules/ml_linear_model/ml_linear_model.py   | 517 ++++++++++++++++++
 modules/ml_online_model/README.md            | 121 +++++
 modules/ml_online_model/__init__.py          |   1 +
 modules/ml_online_model/artifacts/model.bin  |   3 +
 modules/ml_online_model/artifacts/pca.bin    |   3 +
 modules/ml_online_model/artifacts/scaler.bin |   3 +
 modules/ml_online_model/ml_online_model.py   | 524 +++++++++++++++++++
 12 files changed, 1326 insertions(+)
 create mode 100644 modules/ml_linear_model/README.md
 create mode 100644 modules/ml_linear_model/__init__.py
 create mode 100644 modules/ml_linear_model/artifacts/model.bin
 create mode 100644 modules/ml_linear_model/artifacts/pca.bin
 create mode 100644 modules/ml_linear_model/artifacts/scaler.bin
 create mode 100644 modules/ml_linear_model/ml_linear_model.py
 create mode 100644 modules/ml_online_model/README.md
 create mode 100644 modules/ml_online_model/__init__.py
 create mode 100644 modules/ml_online_model/artifacts/model.bin
 create mode 100644 modules/ml_online_model/artifacts/pca.bin
 create mode 100644 modules/ml_online_model/artifacts/scaler.bin
 create mode 100644 modules/ml_online_model/ml_online_model.py

diff --git a/modules/ml_linear_model/README.md b/modules/ml_linear_model/README.md
new file mode 100644
index 0000000000..5c34f77ae5
--- /dev/null
+++ b/modules/ml_linear_model/README.md
@@ -0,0 +1,144 @@
+# `ml_linear_model` (user guide)
+
+This module provides a ready-to-use sklearn flow model for SLIPS.
+
+## What users need
+
+The runtime files are:
+
+- `modules/ml_linear_model/artifacts/model.bin`
+- `modules/ml_linear_model/artifacts/scaler.bin`
+- `modules/ml_linear_model/artifacts/pca.bin`
+
+Inference/training pipeline in the module:
+
+1. scale features with `scaler.bin`
+2. apply `IncrementalPCA` from scikit-learn (`pca.bin`)
+3. classify with `model.bin`
+
+PCA is mandatory for this model family (not optional): runtime always uses scaler -> PCA -> model in this order.
+
+
+## How the shipped model was trained
+
+The shipped model was trained using the [SLIPS ML Training Pipeline](https://github.com/stratosphereips/Slips-ML-Training-Pipeline) and selected for best performance on real-world and unseen data. The details of the pipeline are abstracted for simplicity—users do not need to run or understand the pipeline to use this module.
+
+- **Classifier:** scikit-learn linear model (see pipeline repo for details)
+- **Preprocessing:** `StandardScaler` and `IncrementalPCA` (from scikit-learn)
+- **Training datasets:**
+  - Train: `001, 008, 009, 010, 012, 014, 015, 016, 017, 020, 025, 026, 031, 035, 037` (from [security-datasets-for-testing](https://github.com/stratosphereips/security-datasets-for-testing))
+  - Test (`test_all`): all datasets above plus `011, 012, 013, 014, 015, 016, 017, 018, 020, 021, 025, 026, 030, 031, 035, 036, 037`
+  - Test (`test_unseen`): only datasets not used in training: `018, 020, 021, 025, 026, 030, 031, 035, 036, 037`
+- **Performance:**
+  - `test_all`: `F1 = 0.9362`, `FPR = 0.3545`
+  - `test_unseen`: `F1 = 0.9308`, `FPR = 0.1063`
+  - `test_all` = broad evaluation on all test datasets; `test_unseen` = evaluation on datasets not used in training.
+- **Retraining:** In SLIPS, retraining is online/incremental using labeled flows and `training_batch_size`.
+
+For more details on the pipeline or datasets, see the [training pipeline repo](https://github.com/stratosphereips/Slips-ML-Training-Pipeline) and [dataset repo](https://github.com/stratosphereips/security-datasets-for-testing).
+
+## Using your own model
+
+You can train your own model externally (using the pipeline or your own code) and use it in this module:
+
+1. Place your model, scaler, and PCA artifacts in the `modules/ml_linear_model/artifacts/` directory (or another path).
+2. In `config/slips.yaml`, set:
+   - `model_load_path` to your model file
+   - `preprocess_load_path` to your scaler file
+   - `pca_load_path` to your PCA file
+3. Set `mode: test` to use your custom model for inference.
+
+To train a new model within SLIPS, set `mode: train` and adjust `train_from_scratch` and artifact store paths as described above.
+
+## Visualizing training and testing results
+
+You can visualize model performance using the provided scripts:
+
+- `slips_files/common/ml_modules_utils/plot_train_performance.py` (for training logs)
+- `slips_files/common/ml_modules_utils/plot_testing_performance.py` (for testing logs)
+
+Example usage:
+
+```bash
+python3 slips_files/common/ml_modules_utils/plot_train_performance.py -f path/to/training.log
+python3 slips_files/common/ml_modules_utils/plot_testing_performance.py -f path/to/testing.log
+```
+
+## Creating your own ML module
+
+To create a new ML module, see:
+- [slips_files/common/abstracts/README.md](../../slips_files/common/abstracts/README.md)
+- [docs/create_new_module.md](../../docs/create_new_module.md)
+
+These documents explain the base class, required methods, and configuration for new modules.
+
+## How to use in SLIPS
+
+`config/slips.yaml` is already wired for this module via the `ml_linear_model` section:
+
+- `model_load_path`
+- `preprocess_load_path`
+- `pca_load_path`
+
+PCA is implemented directly in the backend code path for `ml_linear_model`.
+
+For reproducibility, keep `seed` fixed in `config/slips.yaml`.
+
+## Train/test (module-specific)
+
+Canonical workflow is in `slips_files/common/abstracts/README.md`.
+
+`ml_linear_model`-specific paths:
+
+- original test load:
+  - `model_load_path: modules/ml_linear_model/artifacts/model.bin`
+  - `preprocess_load_path: modules/ml_linear_model/artifacts/scaler.bin`
+  - `pca_load_path: modules/ml_linear_model/artifacts/pca.bin`
+- custom training store:
+  - `model_store_path: modules/ml_linear_model/artifacts/model_custom.bin`
+  - `preprocess_store_path: modules/ml_linear_model/artifacts/scaler_custom.bin`
+  - `pca_store_path: modules/ml_linear_model/artifacts/pca_custom.bin`
+
+## If you change the base class
+
+When updating `MLBaseDetection`, verify these `ml_linear_model` responsibilities still match:
+
+- feature preparation in `process_features`
+- preprocessor lifecycle (`update_preprocessor`, `transform_features`)
+- model lifecycle (`fit_incremental_model`, `predict_batch`)
+- PCA load/store fields (`pca_load_path`, `pca_store_path`) in `init/read_model/store_model`
+
+## Original model vs custom training details
+
+Default behavior keeps provided artifacts intact.
+
+### 1) Test using original provided model (default)
+
+In `ml_linear_model` section, keep:
+
+- `mode: test`
+- `model_load_path: modules/ml_linear_model/artifacts/model.bin`
+- `preprocess_load_path: modules/ml_linear_model/artifacts/scaler.bin`
+- `pca_load_path: modules/ml_linear_model/artifacts/pca.bin`
+
+### 2) Train a custom model without overwriting original artifacts
+
+In `ml_linear_model` section, set:
+
+- `mode: train`
+- `train_from_scratch: false` (warm-start from provided model) or `true` (full scratch)
+- keep store paths as custom files:
+  - `model_store_path: modules/ml_linear_model/artifacts/model_custom.bin`
+  - `preprocess_store_path: modules/ml_linear_model/artifacts/scaler_custom.bin`
+  - `pca_store_path: modules/ml_linear_model/artifacts/pca_custom.bin`
+
+Models are persisted at time-window close (or graceful shutdown), not every batch.
+
+### 3) Test using your custom trained model
+
+Switch load paths to your custom files:
+
+- `mode: test`
+- `model_load_path: modules/ml_linear_model/artifacts/model_custom.bin`
+- `preprocess_load_path: modules/ml_linear_model/artifacts/scaler_custom.bin`
+- `pca_load_path: modules/ml_linear_model/artifacts/pca_custom.bin`
diff --git a/modules/ml_linear_model/__init__.py b/modules/ml_linear_model/__init__.py
new file mode 100644
index 0000000000..70fabd9693
--- /dev/null
+++ b/modules/ml_linear_model/__init__.py
@@ -0,0 +1 @@
+# linear-model standalone Slips ML module.
diff --git a/modules/ml_linear_model/artifacts/model.bin b/modules/ml_linear_model/artifacts/model.bin
new file mode 100644
index 0000000000..e2218e026d
--- /dev/null
+++ b/modules/ml_linear_model/artifacts/model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3b92aff54e59f543c1d1beb30749a0dd36575f78df3be8690bb84503b8e12a7
+size 1107
diff --git a/modules/ml_linear_model/artifacts/pca.bin b/modules/ml_linear_model/artifacts/pca.bin
new file mode 100644
index 0000000000..5f4ff7a1ee
--- /dev/null
+++ b/modules/ml_linear_model/artifacts/pca.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2c190a2759a8bb3c0c50e5f8d9bb5d0c41fef82a608f30b5df8de3f49877aa2
+size 2114
diff --git a/modules/ml_linear_model/artifacts/scaler.bin b/modules/ml_linear_model/artifacts/scaler.bin
new file mode 100644
index 0000000000..9e0a6e5220
--- /dev/null
+++ b/modules/ml_linear_model/artifacts/scaler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98da107964f118425fcdaf4486399f66a47b3a8a635537ac1ae03ed9be08bbe0
+size 1097
diff --git a/modules/ml_linear_model/ml_linear_model.py b/modules/ml_linear_model/ml_linear_model.py
new file mode 100644
index 0000000000..417a8cd000
--- /dev/null
+++ b/modules/ml_linear_model/ml_linear_model.py
@@ -0,0 +1,517 @@
+import traceback
+import warnings
+from typing import Optional, Tuple
+import os
+import pickle
+
+import numpy
+import pandas as pd
+from sklearn.decomposition import IncrementalPCA
+from sklearn.linear_model import SGDClassifier
+from sklearn.preprocessing import StandardScaler
+
+import slips_files.common.abstracts.ml_module_base as ml_base
+from slips_files.common.parsers.config_parser import ConfigParser
+
+BENIGN = ml_base.BENIGN
+MALICIOUS = ml_base.MALICIOUS
+
+
+def warn(*args, **kwargs):
+    pass
+
+
+warnings.warn = warn
+
+# ---------------------------------------------------------------------------
+# Default artifact paths for linear sklearn model + preprocessor.
+# Override at runtime with environment variables:
+# - SLIPS_ML_LINEAR_MODEL_MODEL_LOAD_PATH
+# - SLIPS_ML_LINEAR_MODEL_PREPROCESS_LOAD_PATH
+# - SLIPS_ML_LINEAR_MODEL_MODEL_STORE_PATH
+# - SLIPS_ML_LINEAR_MODEL_PREPROCESS_STORE_PATH
+# ---------------------------------------------------------------------------
+DEFAULT_MODEL_LOAD_PATH = "./modules/ml_linear_model/artifacts/model.bin"
+DEFAULT_PREPROCESS_LOAD_PATH = "./modules/ml_linear_model/artifacts/scaler.bin"
+DEFAULT_MODEL_STORE_PATH = (
+    "./modules/ml_linear_model/artifacts/model_custom.bin"
+)
+DEFAULT_PREPROCESS_STORE_PATH = (
+    "./modules/ml_linear_model/artifacts/scaler_custom.bin"
+)
+DEFAULT_PCA_LOAD_PATH = "./modules/ml_linear_model/artifacts/pca.bin"
+DEFAULT_PCA_STORE_PATH = "./modules/ml_linear_model/artifacts/pca_custom.bin"
+
+
+class MLLinearModel(ml_base.MLBaseDetection):
+    name = "ml_linear_model"
+    description = "Standalone linear sklearn-based ML flow detector"
+    authors = ["Jan Svoboda"]
+    module_key = "ml_linear_model"
+    module_config_section = "ml_linear_model"
+
+    def init(self):
+        super().init()
+        self._add_dummy_flows()
+        self._fit_pca_next_transform = False
+
+        conf = ConfigParser()
+        section = self.module_config_section
+        key_upper = self.module_key.upper()
+
+        configured_pca_load = conf.ml_module_pca_load_path(
+            section,
+            DEFAULT_PCA_LOAD_PATH,
+        )
+        configured_pca_store = conf.ml_module_pca_store_path(
+            section,
+            DEFAULT_PCA_STORE_PATH,
+        )
+
+        self.pca_load_path = self.resolve_artifact_path(
+            env_var=f"SLIPS_{key_upper}_PCA_LOAD_PATH",
+            explicit_path=configured_pca_load,
+            fallback_env_var="SLIPS_FLOW_ML_PCA_LOAD_PATH",
+        )
+        self.pca_store_path = self.resolve_artifact_path(
+            env_var=f"SLIPS_{key_upper}_PCA_STORE_PATH",
+            explicit_path=configured_pca_store,
+            fallback_env_var="SLIPS_FLOW_ML_PCA_STORE_PATH",
+        )
+
+        self.pca_n_components = conf.ml_module_pca_n_components(
+            section,
+            default=None,
+        )
+        self.pca_batch_size = conf.ml_module_pca_batch_size(
+            section,
+            default=self.batch_size,
+        )
+        self.pca = None
+
+        self.benign_target_value = conf.ml_module_benign_target_value(
+            section,
+            default=0.0,
+        )
+        self.malicious_target_value = conf.ml_module_malicious_target_value(
+            section,
+            default=1.0,
+        )
+        self._label_to_target = {
+            BENIGN: self.benign_target_value,
+            MALICIOUS: self.malicious_target_value,
+        }
+
+    def get_default_artifact_paths(self) -> Tuple[str, str, str, str]:
+        return (
+            DEFAULT_MODEL_LOAD_PATH,
+            DEFAULT_PREPROCESS_LOAD_PATH,
+            DEFAULT_MODEL_STORE_PATH,
+            DEFAULT_PREPROCESS_STORE_PATH,
+        )
+
+    def _add_dummy_flows(self):
+        self.dummy_malicious_flow = numpy.array(
+            [
+                1.9424750804901123,
+                0.0,
+                49733.0,
+                443.0,
+                17.0,
+                27.0,
+                25517.0,
+                17247.0,
+                1.0,
+                42764.0,
+                44.0,
+            ]
+        ).reshape(1, -1)
+
+        self.dummy_benign_flow = numpy.array(
+            [
+                10.896695,
+                0.0,
+                47956.0,
+                80.0,
+                1.0,
+                0.0,
+                100.0,
+                67596.0,
+                1.0,
+                67696.0,
+                1.0,
+            ]
+        ).reshape(1, -1)
+
+    def get_dummy_flows(self) -> dict:
+        return {
+            MALICIOUS: self.dummy_malicious_flow,
+            BENIGN: self.dummy_benign_flow,
+        }
+
+    def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
+        try:
+            cols = [
+                "proto",
+                "dport",
+                "sport",
+                "dur",
+                "pkts",
+                "spkts",
+                "bytes",
+                "sbytes",
+                "state",
+            ]
+            for col in cols:
+                if col in dataset.columns:
+                    try:
+                        dataset[col] = dataset[col].astype("float64")
+                    except (ValueError, AttributeError):
+                        pass
+
+            to_discard = ["arp", "ARP", "icmp", "igmp", "ipv6-icmp", ""]
+            for proto in to_discard:
+                dataset = dataset[dataset.proto != proto]
+
+            if dataset.empty:
+                return dataset
+
+            to_drop = [
+                "appproto",
+                "daddr",
+                "saddr",
+                "starttime",
+                "type_",
+                "smac",
+                "dmac",
+                "history",
+                "uid",
+                "dir_",
+                "endtime",
+                "flow_source",
+                "interface",
+            ]
+            for field in to_drop:
+                try:
+                    dataset = dataset.drop(field, axis=1)
+                except (ValueError, KeyError):
+                    pass
+
+            dataset["state"] = dataset.apply(
+                lambda row: self.db.get_final_state_from_flags(
+                    row["state"], (row["spkts"] + row["dpkts"])
+                ),
+                axis=1,
+            )
+
+            dataset.state = dataset.state.str.replace(
+                r"(^.*Not Established.*$)", "0", regex=True
+            )
+            dataset.state = dataset.state.str.replace(
+                r"(^.*Established.*$)", "1", regex=True
+            )
+            dataset.state = dataset.state.astype("float64")
+
+            dataset.proto = dataset.proto.str.lower()
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*tcp.*$)", "0", regex=True
+            )
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*udp.*$)", "1", regex=True
+            )
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*icmp.*$)", "2", regex=True
+            )
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*icmp-ipv6.*$)", "3", regex=True
+            )
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*arp.*$)", "4", regex=True
+            )
+
+            dataset["bytes"] = dataset["sbytes"] + dataset["dbytes"]
+            dataset["pkts"] = dataset["spkts"] + dataset["dpkts"]
+
+            fields_to_convert_to_float = [
+                dataset.proto,
+                dataset.dport,
+                dataset.sport,
+                dataset.dur,
+                dataset.pkts,
+                dataset.spkts,
+                dataset.bytes,
+                dataset.sbytes,
+                dataset.state,
+            ]
+            for field in fields_to_convert_to_float:
+                try:
+                    field = field.astype("float64")
+                    dataset[field.name] = field
+                except (ValueError, AttributeError):
+                    pass
+
+            feature_order = [
+                "dur",
+                "proto",
+                "sport",
+                "dport",
+                "spkts",
+                "dpkts",
+                "sbytes",
+                "dbytes",
+                "state",
+                "bytes",
+                "pkts",
+            ]
+            label_cols = [
+                "ground_truth_label",
+                "detailed_ground_truth_label",
+                "label",
+                "module_labels",
+                "detailed_label",
+            ]
+
+            for col in feature_order:
+                if col not in dataset.columns:
+                    dataset[col] = 0.0
+
+            for col in feature_order:
+                dataset[col] = pd.to_numeric(
+                    dataset[col], errors="coerce"
+                ).fillna(0.0)
+
+            existing_label_cols = [
+                col for col in label_cols if col in dataset.columns
+            ]
+            dataset = dataset[feature_order + existing_label_cols]
+
+            return dataset
+        except Exception:
+            self.print("Error in process_features()")
+            self.print(traceback.format_exc(), 0, 1)
+            return dataset.iloc[0:0]
+
+    def create_empty_model(self):
+        return SGDClassifier(
+            warm_start=False,
+            loss="hinge",
+            penalty="l2",
+            random_state=self.seed,
+        )
+
+    def create_empty_preprocessor(self):
+        return StandardScaler()
+
+    def _is_scaler_initialized(self) -> bool:
+        return (
+            hasattr(self.preprocessor, "mean_")
+            and self.preprocessor.mean_ is not None
+        )
+
+    def is_preprocessor_initialized(self) -> bool:
+        return self._is_scaler_initialized() and self._is_pca_initialized()
+
+    def update_preprocessor(self, x_train: pd.DataFrame):
+        try:
+            if not self.is_preprocessor_initialized():
+                self.print(
+                    "First fitting the scaler to the training data.", 0, 2
+                )
+                self.preprocessor.fit(x_train)
+            else:
+                self.print("Updating the scaler with the training data.", 0, 2)
+                self.preprocessor.partial_fit(x_train)
+        except Exception as exc:
+            self.print(
+                f"[debug][update_preprocessor] failed with {type(exc).__name__}: {exc}",
+                0,
+                1,
+            )
+            incoming = list(x_train.columns)
+            non_numeric_cols = [
+                col
+                for col in incoming
+                if not pd.api.types.is_numeric_dtype(x_train[col])
+            ]
+            self.print(
+                f"[debug][update_preprocessor] incoming_columns={incoming}",
+                0,
+                1,
+            )
+            if non_numeric_cols:
+                sample_values = {
+                    col: x_train[col].astype(str).dropna().head(3).tolist()
+                    for col in non_numeric_cols
+                }
+                self.print(
+                    f"[debug][update_preprocessor] non_numeric_columns={non_numeric_cols}",
+                    0,
+                    1,
+                )
+                self.print(
+                    f"[debug][update_preprocessor] non_numeric_samples={sample_values}",
+                    0,
+                    1,
+                )
+            raise
+        self._fit_pca_next_transform = True
+
+    def _create_incremental_pca(self) -> IncrementalPCA:
+        kwargs = {"batch_size": self.pca_batch_size}
+        if self.pca_n_components is not None:
+            kwargs["n_components"] = self.pca_n_components
+        return IncrementalPCA(**kwargs)
+
+    def _is_pca_initialized(self) -> bool:
+        return self.pca is not None and hasattr(self.pca, "components_")
+
+    def _fit_or_update_pca(self, x_scaled: numpy.ndarray):
+        if self.pca is None:
+            self.pca = self._create_incremental_pca()
+
+        n_samples, n_features = x_scaled.shape
+        if n_samples < 2:
+            raise ValueError("PCA requires at least 2 samples to fit.")
+
+        if self.pca_n_components is not None and self.pca_n_components > min(
+            n_samples, n_features
+        ):
+            raise ValueError(
+                f"Configured pca_n_components={self.pca_n_components} exceeds "
+                f"allowed maximum {min(n_samples, n_features)} for current batch."
+            )
+
+        if not self._is_pca_initialized():
+            self.pca.fit(x_scaled)
+        else:
+            if hasattr(self.pca, "partial_fit"):
+                self.pca.partial_fit(x_scaled)
+            else:
+                self.print(
+                    "Loaded PCA has no partial_fit(); keeping it fixed during training.",
+                    0,
+                    1,
+                )
+
+    def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
+        x_scaled = self.preprocessor.transform(x_data)
+
+        if self._fit_pca_next_transform:
+            self._fit_or_update_pca(x_scaled)
+            self._fit_pca_next_transform = False
+
+        if self._is_pca_initialized():
+            return self.pca.transform(x_scaled)
+
+        raise ValueError(
+            "PCA is required but not initialized. "
+            "Ensure pca_load_path points to a fitted PCA in test mode "
+            "or train with enough samples to fit PCA."
+        )
+
+    def fit_incremental_model(
+        self,
+        x_train: numpy.ndarray,
+        y_train: numpy.ndarray,
+        classes: Optional[list] = None,
+    ):
+        numeric_targets = self._guess_numeric_targets()
+        encoded_targets = self._encode_targets(y_train, numeric_targets)
+        if classes is None:
+            self.clf.partial_fit(x_train, encoded_targets)
+        else:
+            encoded_classes = self._encode_targets(
+                numpy.asarray(classes), numeric_targets
+            )
+            self.clf.partial_fit(
+                x_train, encoded_targets, classes=encoded_classes
+            )
+
+    def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
+        preds = self.clf.predict(x_data)
+        return numpy.asarray([self._decode_target(pred) for pred in preds])
+
+    @staticmethod
+    def _normalize_label(label):
+        if isinstance(label, str):
+            normalized = label.strip().lower()
+            if normalized in {"benign", "normal"}:
+                return BENIGN
+            if normalized in {"malicious", "malware"}:
+                return MALICIOUS
+        return label
+
+    def _guess_numeric_targets(self) -> bool:
+        module_name = getattr(self.clf.__class__, "__module__", "")
+        if module_name.startswith("sklearn."):
+            return False
+        target_transform = getattr(self.clf, "_target_transform", None)
+        if callable(target_transform):
+            try:
+                target_transform(MALICIOUS)
+                return False
+            except Exception:
+                return True
+        return False
+
+    def _encode_targets(
+        self, targets: numpy.ndarray, numeric_targets: bool
+    ) -> numpy.ndarray:
+        normalized_targets = [
+            self._normalize_label(target) for target in targets
+        ]
+        if not numeric_targets:
+            return numpy.asarray(normalized_targets)
+        encoded = [
+            self._label_to_target.get(target, target)
+            for target in normalized_targets
+        ]
+        return numpy.asarray(encoded)
+
+    def _decode_target(self, value):
+        if isinstance(value, (float, int, numpy.floating, numpy.integer)):
+            value = float(value)
+            if numpy.isclose(value, self.malicious_target_value):
+                return MALICIOUS
+            if numpy.isclose(value, self.benign_target_value):
+                return BENIGN
+        return self._normalize_label(value)
+
+    def store_model(self):
+        super().store_model()
+        if self.pca is None:
+            return
+
+        pca_dir = os.path.dirname(self.pca_store_path)
+        if pca_dir:
+            os.makedirs(pca_dir, exist_ok=True)
+
+        with open(self.pca_store_path, "wb") as pca_file:
+            pca_file.write(pickle.dumps(self.pca))
+
+    def read_model(self):
+        super().read_model()
+        self.pca = None
+
+        loaded_pca = self._read_pickle_or_none(self.pca_load_path)
+        if loaded_pca is not None:
+            self.pca = loaded_pca
+            return
+
+        if self.mode == "test":
+            self.print(
+                "No PCA found in test mode. PCA is mandatory for ml_linear_model.",
+                0,
+                1,
+            )
+            return
+
+        self.pca = self._create_incremental_pca()
+
+    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
+        self._train_default(
+            sum_labeled_flows, last_number_of_flows_when_trained
+        )
+
+    def run_test_on_flow(self, flow: dict):
+        self._test_default(flow)
diff --git a/modules/ml_online_model/README.md b/modules/ml_online_model/README.md
new file mode 100644
index 0000000000..7af90cf9ce
--- /dev/null
+++ b/modules/ml_online_model/README.md
@@ -0,0 +1,121 @@
+# `ml_online_model` (user guide)
+
+This module provides a River-based flow model for SLIPS.
+
+## Runtime artifacts
+
+- `modules/ml_online_model/artifacts/model.bin`
+- `modules/ml_online_model/artifacts/scaler.bin`
+
+
+## Train/test (module-specific)
+
+Canonical workflow is in `slips_files/common/abstracts/README.md`.
+
+`ml_online_model`-specific paths:
+
+- original test load:
+  - `model_load_path: modules/ml_online_model/artifacts/model.bin`
+  - `preprocess_load_path: modules/ml_online_model/artifacts/scaler.bin`
+- custom training store:
+  - `model_store_path: modules/ml_online_model/artifacts/model_custom.bin`
+  - `preprocess_store_path: modules/ml_online_model/artifacts/scaler_custom.bin`
+
+## If you change the base class
+
+When updating `MLBaseDetection`, verify these `ml_online_model` responsibilities still match:
+
+- feature preparation in `process_features`
+- preprocessor lifecycle (`update_preprocessor`, `transform_features`)
+- river learner adaptation (`fit_incremental_model`, `predict_batch`)
+
+## Original model vs custom training details
+
+Default behavior keeps provided artifacts intact.
+
+### 1) Test using original provided model (default)
+
+In `config/slips.yaml`, `ml_online_model` section:
+
+- `mode: test`
+- `model_load_path: modules/ml_online_model/artifacts/model.bin`
+- `preprocess_load_path: modules/ml_online_model/artifacts/scaler.bin`
+
+### 2) Train a custom model without overwriting original artifacts
+
+In `ml_online_model` section, set:
+
+- `mode: train`
+- `train_from_scratch: false` (warm-start from provided model) or `true` (full scratch)
+- keep store paths as custom files:
+  - `model_store_path: modules/ml_online_model/artifacts/model_custom.bin`
+  - `preprocess_store_path: modules/ml_online_model/artifacts/scaler_custom.bin`
+
+Models are persisted at time-window close (or graceful shutdown), not every batch.
+
+### 3) Test using your custom trained model
+
+Switch load paths to your custom files:
+
+- `mode: test`
+- `model_load_path: modules/ml_online_model/artifacts/model_custom.bin`
+- `preprocess_load_path: modules/ml_online_model/artifacts/scaler_custom.bin`
+
+## Training/testing notes
+
+- `training_batch_size` controls retraining cadence.
+- `validate_on_train` controls train/validation metric split during train mode.
+- `seed` controls deterministic behavior where applicable.
+- `create_performance_metrics_log_files` enables train/test metrics logs.
+
+## How the shipped model was trained
+
+The shipped model was trained using the [SLIPS ML Training Pipeline](https://github.com/stratosphereips/Slips-ML-Training-Pipeline) and selected for best performance on real-world and unseen data. The details of the pipeline are abstracted for simplicity—users do not need to run or understand the pipeline to use this module.
+
+- **Classifier:** `river.tree.SGTClassifier`
+- **Preprocessing:** `StandardScaler` and `IncrementalPCA` (from scikit-learn)
+- **Training datasets:**
+  - Train: `001, 008, 009, 010, 012, 014, 015, 016, 017, 020, 025, 026, 031, 035, 037` (from [security-datasets-for-testing](https://github.com/stratosphereips/security-datasets-for-testing))
+  - Test (`test_all`): all datasets above plus `011, 013, 018, 021, 030, 036`
+  - Test (`test_unseen`): only datasets not used in training: `018, 020, 021, 025, 026, 030, 031, 035, 036, 037`
+- **Performance:**
+  - `test_f1: 0.9120`, `test_fpr: 0.0405`
+  - `test_unseen_f1: 0.8193`, `test_unseen_fpr: 0.0328`
+  - `test_all` = broad evaluation on all test datasets; `test_unseen` = evaluation on datasets not used in training.
+- **Retraining:** In SLIPS, retraining is online/incremental using labeled flows and `training_batch_size`.
+
+For more details on the pipeline or datasets, see the [training pipeline repo](https://github.com/stratosphereips/Slips-ML-Training-Pipeline) and [dataset repo](https://github.com/stratosphereips/security-datasets-for-testing).
+
+## Using your own model
+
+You can train your own model externally (using the pipeline or your own code) and use it in this module:
+
+1. Place your model and scaler artifacts in the `modules/ml_online_model/artifacts/` directory (or another path).
+2. In `config/slips.yaml`, set:
+   - `model_load_path` to your model file
+   - `preprocess_load_path` to your scaler file
+3. Set `mode: test` to use your custom model for inference.
+
+To train a new model within SLIPS, set `mode: train` and adjust `train_from_scratch` and artifact store paths as described above.
+
+## Visualizing training and testing results
+
+You can visualize model performance using the provided scripts:
+
+- `slips_files/common/ml_modules_utils/plot_train_performance.py` (for training logs)
+- `slips_files/common/ml_modules_utils/plot_testing_performance.py` (for testing logs)
+
+Example usage:
+
+```bash
+python3 slips_files/common/ml_modules_utils/plot_train_performance.py -f path/to/training.log
+python3 slips_files/common/ml_modules_utils/plot_testing_performance.py -f path/to/testing.log
+```
+
+## Creating your own ML module
+
+To create a new ML module, see:
+- [slips_files/common/abstracts/README.md](../../slips_files/common/abstracts/README.md)
+- [docs/create_new_module.md](../../docs/create_new_module.md)
+
+These documents explain the base class, required methods, and configuration for new modules.
diff --git a/modules/ml_online_model/__init__.py b/modules/ml_online_model/__init__.py
new file mode 100644
index 0000000000..0b3e62e228
--- /dev/null
+++ b/modules/ml_online_model/__init__.py
@@ -0,0 +1 @@
+# online-model standalone Slips ML module.
diff --git a/modules/ml_online_model/artifacts/model.bin b/modules/ml_online_model/artifacts/model.bin
new file mode 100644
index 0000000000..00c4b7c289
--- /dev/null
+++ b/modules/ml_online_model/artifacts/model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:765897e2613eddf8e9d5208a4f68cfad2e89abae9562b224b2f6ba28c07df666
+size 21516865
diff --git a/modules/ml_online_model/artifacts/pca.bin b/modules/ml_online_model/artifacts/pca.bin
new file mode 100644
index 0000000000..c183ae9f74
--- /dev/null
+++ b/modules/ml_online_model/artifacts/pca.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b992e7c34b0a25759e995849bad948378728341deabd5413e56b75142c72da42
+size 2114
diff --git a/modules/ml_online_model/artifacts/scaler.bin b/modules/ml_online_model/artifacts/scaler.bin
new file mode 100644
index 0000000000..37726a9685
--- /dev/null
+++ b/modules/ml_online_model/artifacts/scaler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:440b8454c3c95e7ff1a31148a6af53b4d9ed61ffb807233abca484b4d6c00399
+size 887
diff --git a/modules/ml_online_model/ml_online_model.py b/modules/ml_online_model/ml_online_model.py
new file mode 100644
index 0000000000..25260e0fdd
--- /dev/null
+++ b/modules/ml_online_model/ml_online_model.py
@@ -0,0 +1,524 @@
+import traceback
+import warnings
+from typing import Optional, Tuple
+import os
+import pickle
+
+import numpy
+import pandas as pd
+from sklearn.decomposition import IncrementalPCA
+from sklearn.preprocessing import StandardScaler
+
+import slips_files.common.abstracts.ml_module_base as ml_base
+from slips_files.common.parsers.config_parser import ConfigParser
+
+BENIGN = ml_base.BENIGN
+MALICIOUS = ml_base.MALICIOUS
+
+
+def warn(*args, **kwargs):
+    pass
+
+
+warnings.warn = warn
+
+DEFAULT_MODEL_LOAD_PATH = "./modules/ml_online_model/artifacts/model.bin"
+DEFAULT_PREPROCESS_LOAD_PATH = "./modules/ml_online_model/artifacts/scaler.bin"
+DEFAULT_MODEL_STORE_PATH = (
+    "./modules/ml_online_model/artifacts/model_custom.bin"
+)
+DEFAULT_PREPROCESS_STORE_PATH = (
+    "./modules/ml_online_model/artifacts/scaler_custom.bin"
+)
+DEFAULT_PCA_LOAD_PATH = "./modules/ml_online_model/artifacts/pca.bin"
+DEFAULT_PCA_STORE_PATH = "./modules/ml_online_model/artifacts/pca_custom.bin"
+
+
+class _FallbackRiverModel:
+    def __init__(self):
+        self.counts = {}
+
+    def learn_one(self, x, y):
+        self.counts[y] = self.counts.get(y, 0) + 1
+
+    def predict_one(self, x):
+        if not self.counts:
+            return BENIGN
+        return max(self.counts, key=self.counts.get)
+
+
+class MLOnlineModel(ml_base.MLBaseDetection):
+    name = "ml_online_model"
+    description = "Standalone online ML flow detector"
+    authors = ["Jan Svoboda"]
+    module_key = "ml_online_model"
+    module_config_section = "ml_online_model"
+
+    def init(self):
+        super().init()
+        self._add_dummy_flows()
+        self._fit_pca_next_transform = False
+
+        conf = ConfigParser()
+        section = self.module_config_section
+        key_upper = self.module_key.upper()
+
+        configured_pca_load = conf.ml_module_pca_load_path(
+            section,
+            DEFAULT_PCA_LOAD_PATH,
+        )
+        configured_pca_store = conf.ml_module_pca_store_path(
+            section,
+            DEFAULT_PCA_STORE_PATH,
+        )
+
+        self.pca_load_path = self.resolve_artifact_path(
+            env_var=f"SLIPS_{key_upper}_PCA_LOAD_PATH",
+            explicit_path=configured_pca_load,
+            fallback_env_var="SLIPS_FLOW_ML_PCA_LOAD_PATH",
+        )
+        self.pca_store_path = self.resolve_artifact_path(
+            env_var=f"SLIPS_{key_upper}_PCA_STORE_PATH",
+            explicit_path=configured_pca_store,
+            fallback_env_var="SLIPS_FLOW_ML_PCA_STORE_PATH",
+        )
+
+        self.pca_n_components = conf.ml_module_pca_n_components(
+            section,
+            default=11,
+        )
+        self.pca_batch_size = conf.ml_module_pca_batch_size(
+            section,
+            default=self.batch_size,
+        )
+        self.pca = None
+
+        self.benign_target_value = conf.ml_module_benign_target_value(
+            section,
+            default=0.0,
+        )
+        self.malicious_target_value = conf.ml_module_malicious_target_value(
+            section,
+            default=1.0,
+        )
+        self._label_to_target = {
+            BENIGN: self.benign_target_value,
+            MALICIOUS: self.malicious_target_value,
+        }
+
+    def get_default_artifact_paths(self) -> Tuple[str, str, str, str]:
+        return (
+            DEFAULT_MODEL_LOAD_PATH,
+            DEFAULT_PREPROCESS_LOAD_PATH,
+            DEFAULT_MODEL_STORE_PATH,
+            DEFAULT_PREPROCESS_STORE_PATH,
+        )
+
+    def _add_dummy_flows(self):
+        self.dummy_malicious_flow = numpy.array(
+            [
+                1.9424750804901123,
+                0.0,
+                49733.0,
+                443.0,
+                17.0,
+                27.0,
+                25517.0,
+                17247.0,
+                1.0,
+                42764.0,
+                44.0,
+            ]
+        ).reshape(1, -1)
+
+        self.dummy_benign_flow = numpy.array(
+            [
+                10.896695,
+                0.0,
+                47956.0,
+                80.0,
+                1.0,
+                0.0,
+                100.0,
+                67596.0,
+                1.0,
+                67696.0,
+                1.0,
+            ]
+        ).reshape(1, -1)
+
+    def get_dummy_flows(self) -> dict:
+        return {
+            MALICIOUS: self.dummy_malicious_flow,
+            BENIGN: self.dummy_benign_flow,
+        }
+
+    def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
+        try:
+            cols = [
+                "proto",
+                "dport",
+                "sport",
+                "dur",
+                "pkts",
+                "spkts",
+                "bytes",
+                "sbytes",
+                "state",
+            ]
+            for col in cols:
+                if col in dataset.columns:
+                    try:
+                        dataset[col] = dataset[col].astype("float64")
+                    except (ValueError, AttributeError):
+                        pass
+
+            to_discard = ["arp", "ARP", "icmp", "igmp", "ipv6-icmp", ""]
+            for proto in to_discard:
+                dataset = dataset[dataset.proto != proto]
+
+            if dataset.empty:
+                return dataset
+
+            to_drop = [
+                "appproto",
+                "daddr",
+                "saddr",
+                "starttime",
+                "type_",
+                "smac",
+                "dmac",
+                "history",
+                "uid",
+                "dir_",
+                "endtime",
+                "flow_source",
+                "interface",
+            ]
+            for field in to_drop:
+                try:
+                    dataset = dataset.drop(field, axis=1)
+                except (ValueError, KeyError):
+                    pass
+
+            dataset["state"] = dataset.apply(
+                lambda row: self.db.get_final_state_from_flags(
+                    row["state"], (row["spkts"] + row["dpkts"])
+                ),
+                axis=1,
+            )
+
+            dataset.state = dataset.state.str.replace(
+                r"(^.*Not Established.*$)", "0", regex=True
+            )
+            dataset.state = dataset.state.str.replace(
+                r"(^.*Established.*$)", "1", regex=True
+            )
+            dataset.state = dataset.state.astype("float64")
+
+            dataset.proto = dataset.proto.str.lower()
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*tcp.*$)", "0", regex=True
+            )
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*udp.*$)", "1", regex=True
+            )
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*icmp.*$)", "2", regex=True
+            )
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*icmp-ipv6.*$)", "3", regex=True
+            )
+            dataset.proto = dataset.proto.str.replace(
+                r"(^.*arp.*$)", "4", regex=True
+            )
+
+            dataset["bytes"] = dataset["sbytes"] + dataset["dbytes"]
+            dataset["pkts"] = dataset["spkts"] + dataset["dpkts"]
+
+            fields_to_convert_to_float = [
+                dataset.proto,
+                dataset.dport,
+                dataset.sport,
+                dataset.dur,
+                dataset.pkts,
+                dataset.spkts,
+                dataset.bytes,
+                dataset.sbytes,
+                dataset.state,
+            ]
+            for field in fields_to_convert_to_float:
+                try:
+                    field = field.astype("float64")
+                    dataset[field.name] = field
+                except (ValueError, AttributeError):
+                    pass
+
+            feature_order = [
+                "dur",
+                "proto",
+                "sport",
+                "dport",
+                "spkts",
+                "dpkts",
+                "sbytes",
+                "dbytes",
+                "state",
+                "bytes",
+                "pkts",
+            ]
+            label_cols = [
+                "ground_truth_label",
+                "detailed_ground_truth_label",
+                "label",
+                "module_labels",
+                "detailed_label",
+            ]
+
+            for col in feature_order:
+                if col not in dataset.columns:
+                    dataset[col] = 0.0
+
+            for col in feature_order:
+                dataset[col] = pd.to_numeric(
+                    dataset[col], errors="coerce"
+                ).fillna(0.0)
+
+            existing_label_cols = [
+                col for col in label_cols if col in dataset.columns
+            ]
+            dataset = dataset[feature_order + existing_label_cols]
+
+            return dataset
+        except Exception:
+            self.print("Error in process_features()")
+            self.print(traceback.format_exc(), 0, 1)
+            return dataset.iloc[0:0]
+
+    def create_empty_model(self):
+        try:
+            from river import linear_model
+
+            return linear_model.LogisticRegression()
+        except Exception as exc:
+            self.print(
+                f"River is unavailable ({exc}). Falling back to baseline model.",
+                0,
+                1,
+            )
+            return _FallbackRiverModel()
+
+    def create_empty_preprocessor(self):
+        return StandardScaler()
+
+    def _is_scaler_initialized(self) -> bool:
+        return (
+            hasattr(self.preprocessor, "mean_")
+            and self.preprocessor.mean_ is not None
+        )
+
+    def _is_pca_initialized(self) -> bool:
+        return self.pca is not None and hasattr(self.pca, "components_")
+
+    def is_preprocessor_initialized(self) -> bool:
+        return self._is_scaler_initialized() and self._is_pca_initialized()
+
+    def update_preprocessor(self, x_train: pd.DataFrame):
+        try:
+            if not self._is_scaler_initialized():
+                self.preprocessor.fit(x_train)
+            else:
+                self.preprocessor.partial_fit(x_train)
+        except Exception as exc:
+            incoming = list(x_train.columns)
+            self.print(
+                f"[debug][update_preprocessor] failed with {type(exc).__name__}: {exc}",
+                0,
+                1,
+            )
+            self.print(
+                f"[debug][update_preprocessor] incoming_columns={incoming}",
+                0,
+                1,
+            )
+            if hasattr(self.preprocessor, "feature_names_in_"):
+                expected = list(
+                    getattr(self.preprocessor, "feature_names_in_", [])
+                )
+                unseen = sorted(set(incoming) - set(expected))
+                missing = sorted(set(expected) - set(incoming))
+                self.print(
+                    f"[debug][update_preprocessor] expected_columns={expected}",
+                    0,
+                    1,
+                )
+                self.print(
+                    f"[debug][update_preprocessor] unseen_columns={unseen}",
+                    0,
+                    1,
+                )
+                self.print(
+                    f"[debug][update_preprocessor] missing_columns={missing}",
+                    0,
+                    1,
+                )
+            raise
+        self._fit_pca_next_transform = True
+
+    def _create_incremental_pca(self) -> IncrementalPCA:
+        kwargs = {"batch_size": self.pca_batch_size}
+        if self.pca_n_components is not None:
+            kwargs["n_components"] = self.pca_n_components
+        return IncrementalPCA(**kwargs)
+
+    def _fit_or_update_pca(self, x_scaled: numpy.ndarray):
+        if self.pca is None:
+            self.pca = self._create_incremental_pca()
+
+        n_samples, n_features = x_scaled.shape
+        if n_samples < 2:
+            raise ValueError("PCA requires at least 2 samples to fit.")
+
+        if self.pca_n_components is not None and self.pca_n_components > min(
+            n_samples, n_features
+        ):
+            raise ValueError(
+                f"Configured pca_n_components={self.pca_n_components} exceeds "
+                f"allowed maximum {min(n_samples, n_features)} for current batch."
+            )
+
+        if not self._is_pca_initialized():
+            self.pca.fit(x_scaled)
+        else:
+            if hasattr(self.pca, "partial_fit"):
+                self.pca.partial_fit(x_scaled)
+            else:
+                self.print(
+                    "Loaded PCA has no partial_fit(); keeping it fixed during training.",
+                    0,
+                    1,
+                )
+
+    def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
+        x_scaled = self.preprocessor.transform(x_data)
+
+        if self._fit_pca_next_transform:
+            self._fit_or_update_pca(x_scaled)
+            self._fit_pca_next_transform = False
+
+        if self._is_pca_initialized():
+            return self.pca.transform(x_scaled)
+
+        raise ValueError(
+            "PCA is required but not initialized. "
+            "Ensure pca_load_path points to a fitted PCA in test mode "
+            "or train with enough samples to fit PCA."
+        )
+
+    @staticmethod
+    def _row_to_dict(row: numpy.ndarray) -> dict:
+        return {f"f{i}": float(value) for i, value in enumerate(row)}
+
+    @staticmethod
+    def _normalize_label(label):
+        if isinstance(label, str):
+            normalized = label.strip().lower()
+            if normalized in {"benign", "normal"}:
+                return BENIGN
+            if normalized in {"malicious", "malware"}:
+                return MALICIOUS
+        return label
+
+    def _guess_numeric_targets(self) -> bool:
+        target_transform = getattr(self.clf, "_target_transform", None)
+        if callable(target_transform):
+            try:
+                target_transform(MALICIOUS)
+                return False
+            except Exception:
+                return True
+        module_name = getattr(self.clf.__class__, "__module__", "")
+        return module_name.startswith("river.")
+
+    def _encode_targets(
+        self, targets: numpy.ndarray, numeric_targets: bool
+    ) -> numpy.ndarray:
+        normalized_targets = [
+            self._normalize_label(target) for target in targets
+        ]
+        if not numeric_targets:
+            return numpy.asarray(normalized_targets)
+        encoded = [
+            self._label_to_target.get(target, target)
+            for target in normalized_targets
+        ]
+        return numpy.asarray(encoded)
+
+    def _decode_target(self, value):
+        if isinstance(value, (float, int, numpy.floating, numpy.integer)):
+            value = float(value)
+            if numpy.isclose(value, self.malicious_target_value):
+                return MALICIOUS
+            if numpy.isclose(value, self.benign_target_value):
+                return BENIGN
+        return self._normalize_label(value)
+
+    def fit_incremental_model(
+        self,
+        x_train: numpy.ndarray,
+        y_train: numpy.ndarray,
+        classes: Optional[list] = None,
+    ):
+        numeric_targets = self._guess_numeric_targets()
+        encoded_targets = self._encode_targets(y_train, numeric_targets)
+        for row, label in zip(x_train, encoded_targets):
+            self.clf.learn_one(self._row_to_dict(row), label)
+
+    def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
+        preds = []
+        for row in x_data:
+            pred = self.clf.predict_one(self._row_to_dict(row))
+            if pred is None:
+                preds.append(BENIGN)
+                continue
+            preds.append(self._decode_target(pred))
+        return numpy.asarray(preds)
+
+    def store_model(self):
+        super().store_model()
+        if self.pca is None:
+            return
+
+        pca_dir = os.path.dirname(self.pca_store_path)
+        if pca_dir:
+            os.makedirs(pca_dir, exist_ok=True)
+
+        with open(self.pca_store_path, "wb") as pca_file:
+            pca_file.write(pickle.dumps(self.pca))
+
+    def read_model(self):
+        super().read_model()
+        self.pca = None
+
+        loaded_pca = self._read_pickle_or_none(self.pca_load_path)
+        if loaded_pca is not None:
+            self.pca = loaded_pca
+            return
+
+        if self.mode == "test":
+            self.print(
+                "No PCA found in test mode. PCA is mandatory for ml_online_model.",
+                0,
+                1,
+            )
+            return
+
+        self.pca = self._create_incremental_pca()
+
+    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
+        self._train_default(
+            sum_labeled_flows, last_number_of_flows_when_trained
+        )
+
+    def run_test_on_flow(self, flow: dict):
+        self._test_default(flow)

From 5b4ec44eb4af327401ab62b5a28d11ba173be66e Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 15:26:55 +0000
Subject: [PATCH 03/26] ML module base class, template for new modules in
 common. config parser for new config options

---
 slips_files/common/abstracts/README.md        |   50 +
 .../common/abstracts/ml_backend_template.py   |   76 +
 .../common/abstracts/ml_module_base.py        | 1003 +++++++++++++
 .../common/ml_modules_utils/__init__.py       |    1 +
 .../common/ml_modules_utils/base_utils.py     |  466 ++++++
 .../plot_testing_performance.py               |  503 +++++++
 .../plot_train_performance.py                 | 1336 +++++++++++++++++
 slips_files/common/parsers/config_parser.py   |  221 +++
 8 files changed, 3656 insertions(+)
 create mode 100644 slips_files/common/abstracts/README.md
 create mode 100644 slips_files/common/abstracts/ml_backend_template.py
 create mode 100644 slips_files/common/abstracts/ml_module_base.py
 create mode 100644 slips_files/common/ml_modules_utils/__init__.py
 create mode 100644 slips_files/common/ml_modules_utils/base_utils.py
 create mode 100644 slips_files/common/ml_modules_utils/plot_testing_performance.py
 create mode 100644 slips_files/common/ml_modules_utils/plot_train_performance.py

diff --git a/slips_files/common/abstracts/README.md b/slips_files/common/abstracts/README.md
new file mode 100644
index 0000000000..5ee21db9d5
--- /dev/null
+++ b/slips_files/common/abstracts/README.md
@@ -0,0 +1,50 @@
+# ML module base workflow
+
+Shared infrastructure for standalone ML modules (for example `ml_linear_model`, `ml_online_model`) lives in `slips_files/common`.
+
+## Folder purpose
+
+- `ml_module_base.py`: common runtime loop, buffering, config wiring, model I/O, evidence emission.
+- `ml_backend_template.py`: copy/adapt this skeleton when creating a new backend.
+- `../ml_modules_utils/base_utils.py`: metrics parsing/computation for logs/plots.
+- `../ml_modules_utils/plot_train_performance.py`, `../ml_modules_utils/plot_testing_performance.py`: log-based visualization helpers.
+
+## How to add a new model backend
+
+1. Create a new module folder under `modules/` with matching file name (required by Slips discovery), e.g. `modules/ml_xxx/ml_xxx.py`.
+2. Quick start: copy `slips_files/common/abstracts/ml_backend_template.py` into your module and adapt.
+3. Implement a class inheriting `MLBaseDetection`.
+4. Set class metadata: `name`, `description`, `authors`, `module_key`, `module_config_section`.
+5. Implement required abstract methods/signatures.
+
+## Required method signatures
+
+- `get_default_artifact_paths(self) -> Tuple[str, str, str, str]`
+- `process_features(self, dataset: pd.DataFrame) -> pd.DataFrame`
+- `create_empty_model(self) -> Any`
+- `create_empty_preprocessor(self) -> Any`
+- `update_preprocessor(self, x_train: pd.DataFrame)`
+- `transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray`
+- `fit_incremental_model(self, x_train: numpy.ndarray, y_train: numpy.ndarray, classes: Optional[list] = None)`
+- `predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray`
+- `is_preprocessor_initialized(self) -> bool`
+- `train(self, sum_labeled_flows, last_number_of_flows_when_trained)`
+- `run_test_on_flow(self, flow: dict)`
+
+## Config contract
+
+Add a section in `config/slips.yaml` matching `module_config_section` with:
+
+- `mode`, `training_batch_size`, `seed`
+- `create_performance_metrics_log_files`, `log_suffix`, `test_log_batch_size`
+- `model_load_path`, `model_store_path`, `preprocess_load_path`, `preprocess_store_path`
+
+Optional backend-specific keys (for example PCA) should be read in the child class.
+
+## Train/test workflow
+
+Each ML module has its own independent `mode` (`train` or `test`) and artifact paths in `config/slips.yaml`.
+
+- Test provided models: set that module section to `mode: test`.
+- Train custom models without overwriting defaults: set `mode: train`, keep `*_store_path` on custom files.
+- Test custom models: switch `*_load_path` to custom artifact files and set `mode: test`.
diff --git a/slips_files/common/abstracts/ml_backend_template.py b/slips_files/common/abstracts/ml_backend_template.py
new file mode 100644
index 0000000000..3af27963b6
--- /dev/null
+++ b/slips_files/common/abstracts/ml_backend_template.py
@@ -0,0 +1,76 @@
+from typing import Any, Optional, Tuple
+
+import numpy
+import pandas as pd
+
+from slips_files.common.abstracts.ml_module_base import MLBaseDetection
+
+
+# New backend checklist:
+# - Copy this file to modules/<module_name>/<module_name>.py
+# - Rename class, module_key, and module_config_section
+# - Set artifact default paths for your backend
+# - Implement all NotImplementedError methods
+
+
+class MLBackendTemplate(MLBaseDetection):
+    name = "ML backend template"
+    description = "Skeleton backend for a standalone ML flow detector"
+    authors = ["Your Name"]
+    module_key = "ml_template"
+    module_config_section = "ml_template"
+
+    def get_default_artifact_paths(self) -> Tuple[str, str, str, str]:
+        return (
+            "./modules/ml_template/artifacts/model.bin",
+            "./modules/ml_template/artifacts/preprocess.bin",
+            "./modules/ml_template/artifacts/model.bin",
+            "./modules/ml_template/artifacts/preprocess.bin",
+        )
+
+    def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
+        return dataset
+
+    def create_empty_model(self) -> Any:
+        raise NotImplementedError(
+            "Return an untrained backend model instance."
+        )
+
+    def create_empty_preprocessor(self) -> Any:
+        raise NotImplementedError("Return an untrained preprocessor or None.")
+
+    def update_preprocessor(self, x_train: pd.DataFrame):
+        raise NotImplementedError(
+            "Incrementally fit/update preprocessing on x_train."
+        )
+
+    def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
+        raise NotImplementedError(
+            "Convert features to model-ready numpy array."
+        )
+
+    def fit_incremental_model(
+        self,
+        x_train: numpy.ndarray,
+        y_train: numpy.ndarray,
+        classes: Optional[list] = None,
+    ):
+        raise NotImplementedError(
+            "Incrementally train model on current batch."
+        )
+
+    def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
+        raise NotImplementedError("Return batch predictions for x_data.")
+
+    def is_preprocessor_initialized(self) -> bool:
+        raise NotImplementedError(
+            "Return True when preprocessor can transform data."
+        )
+
+    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
+        return self._train_default(
+            sum_labeled_flows, last_number_of_flows_when_trained
+        )
+
+    def run_test_on_flow(self, flow: dict):
+        return self._test_default(flow)
diff --git a/slips_files/common/abstracts/ml_module_base.py b/slips_files/common/abstracts/ml_module_base.py
new file mode 100644
index 0000000000..23f5b53d47
--- /dev/null
+++ b/slips_files/common/abstracts/ml_module_base.py
@@ -0,0 +1,1003 @@
+import json
+import os
+import pickle
+import random
+import traceback
+from abc import ABC, abstractmethod
+from typing import Any, Optional, Tuple
+
+import numpy
+import pandas as pd
+
+from slips_files.common.abstracts.imodule import IModule
+from slips_files.common.parsers.config_parser import ConfigParser
+from slips_files.common.slips_utils import utils
+from slips_files.core.structures.evidence import (
+    Attacker,
+    Direction,
+    Evidence,
+    EvidenceType,
+    IoCType,
+    Method,
+    ProfileID,
+    ThreatLevel,
+    TimeWindow,
+    Victim,
+)
+
+BACKGROUND = "background"
+BENIGN = "Benign"
+MALICIOUS = "Malicious"
+
+
+class MLBaseDetection(IModule, ABC):
+    """
+    Generic base class for standalone ML detection modules.
+
+    Subclasses implement only model specific pieces:
+      - feature processing
+      - model/preprocessor creation
+      - incremental fit and inference
+    """
+
+    name = "ml_module"
+    description = (
+        "Train or test a Machine Learning model to detect malicious flows"
+    )
+    authors = ["Jan Svoboda"]
+    module_key = "ml_module"
+    module_config_section = "ml_module"
+
+    def subscribe_to_channels(self):
+        self.c1 = self.db.subscribe("new_flow")
+        self.channels = {"new_flow": self.c1}
+        if self.mode == "train":
+            self.c2 = self.db.subscribe("tw_closed")
+            self.channels["tw_closed"] = self.c2
+
+    def init(self):
+        """Initialize channels, config, reproducibility, artifact paths, and logging."""
+        self.fieldseparator = self.db.get_field_separator()
+
+        self.read_configuration()
+
+        self.last_number_of_flows_when_trained = 0
+        self.classifier_initialized = False
+        self.all_classes = [MALICIOUS, BENIGN]
+
+        self.labeled_counter = 0
+        self.training_flows = []
+        self.testing_flows_since_last_log = 0
+        self.last_closed_twid = None
+
+        conf = ConfigParser()
+        (
+            default_model_load,
+            default_preprocess_load,
+            default_model_store,
+            default_preprocess_store,
+        ) = self.get_default_artifact_paths()
+
+        section = self.module_config_section
+        configured_model_load = conf.ml_module_model_load_path(
+            section,
+            default_model_load,
+        )
+        configured_preprocess_load = conf.ml_module_preprocess_load_path(
+            section,
+            default_preprocess_load,
+        )
+        configured_model_store = conf.ml_module_model_store_path(
+            section,
+            default_model_store,
+        )
+        configured_preprocess_store = conf.ml_module_preprocess_store_path(
+            section,
+            default_preprocess_store,
+        )
+
+        key_upper = self.module_key.upper()
+
+        configured_seed = conf.ml_module_seed(section, default=self.seed)
+        self.seed = int(
+            os.getenv(
+                f"SLIPS_{key_upper}_SEED",
+                os.getenv("SLIPS_FLOW_ML_SEED", str(configured_seed)),
+            )
+        )
+        random.seed(self.seed)
+        numpy.random.seed(self.seed)
+        self.rng = numpy.random.default_rng(self.seed)
+
+        self.model_load_path = self.resolve_artifact_path(
+            env_var=f"SLIPS_{key_upper}_MODEL_LOAD_PATH",
+            explicit_path=configured_model_load,
+            fallback_env_var="SLIPS_FLOW_ML_MODEL_LOAD_PATH",
+        )
+        self.preprocess_load_path = self.resolve_artifact_path(
+            env_var=f"SLIPS_{key_upper}_PREPROCESS_LOAD_PATH",
+            explicit_path=configured_preprocess_load,
+            fallback_env_var="SLIPS_FLOW_ML_PREPROCESS_LOAD_PATH",
+        )
+        self.model_path = self.resolve_artifact_path(
+            env_var=f"SLIPS_{key_upper}_MODEL_STORE_PATH",
+            explicit_path=configured_model_store,
+            fallback_env_var="SLIPS_FLOW_ML_MODEL_STORE_PATH",
+        )
+        self.preprocess_path = self.resolve_artifact_path(
+            env_var=f"SLIPS_{key_upper}_PREPROCESS_STORE_PATH",
+            explicit_path=configured_preprocess_store,
+            fallback_env_var="SLIPS_FLOW_ML_PREPROCESS_STORE_PATH",
+        )
+
+        configured_test_log_batch_size = conf.ml_module_test_log_batch_size(
+            section,
+            default=self.batch_size,
+        )
+        self.testing_log_batch_size = max(
+            1,
+            int(
+                os.getenv(
+                    f"SLIPS_{key_upper}_TEST_LOG_BATCH_SIZE",
+                    os.getenv(
+                        "SLIPS_FLOW_ML_TEST_LOG_BATCH_SIZE",
+                        str(configured_test_log_batch_size),
+                    ),
+                )
+            ),
+        )
+
+        configured_log_suffix = conf.ml_module_log_suffix(
+            section,
+            default=self.module_key,
+        )
+        self.log_suffix = os.getenv(
+            f"SLIPS_{key_upper}_LOG_SUFFIX",
+            os.getenv("SLIPS_FLOW_ML_LOG_SUFFIX", configured_log_suffix),
+        )
+
+        # Backward compatibility for existing sklearn-specific references.
+        self.scaler_load_path = self.preprocess_load_path
+        self.scaler_path = self.preprocess_path
+
+        self.init_log_file()
+
+    def resolve_artifact_path(
+        self,
+        env_var: str,
+        explicit_path: str,
+        fallback_env_var: Optional[str] = None,
+    ) -> str:
+        """Resolve artifact path from env/config and normalize relative paths."""
+        path = os.getenv(env_var)
+        if path is None and fallback_env_var:
+            path = os.getenv(fallback_env_var)
+        if path is None:
+            path = explicit_path
+        if os.path.isabs(path):
+            return path
+        return os.path.join(".", path.lstrip("./"))
+
+    @staticmethod
+    def _to_bool(value, default: bool) -> bool:
+        """Convert common string/number representations into bool with fallback."""
+        if isinstance(value, bool):
+            return value
+        if value is None:
+            return default
+        if isinstance(value, (int, float)):
+            return bool(value)
+        text = str(value).strip().lower()
+        if text in {"1", "true", "yes", "y", "on"}:
+            return True
+        if text in {"0", "false", "no", "n", "off"}:
+            return False
+        return default
+
+    def init_log_file(self):
+        """Open train/test performance log file for the active module mode."""
+        if not self.enable_logs:
+            self.log_file = None
+            return
+
+        suffix = self.log_suffix.strip()
+        if suffix:
+            training_filename = f"training_{suffix}.log"
+            testing_filename = f"testing_{suffix}.log"
+        else:
+            training_filename = "training.log"
+            testing_filename = "testing.log"
+
+        if self.mode == "train":
+            log_path = os.path.join(self.output_dir, training_filename)
+        else:
+            log_path = os.path.join(self.output_dir, testing_filename)
+
+        os.makedirs(self.output_dir, exist_ok=True)
+        self.log_file = open(log_path, "w")
+
+        self.print(
+            f"{self.name} module initialized in {self.mode} mode. "
+            f"Seed: {self.seed}. "
+            f"Minimum labels to start training: {self.minimum_labels_to_start_train}, "
+            f"minimum labels to retrain: {self.minimum_labels_to_retrain}, "
+            f"minimum labels to finalize training: {self.minimum_labels_to_finalize_train}.",
+            1,
+            1,
+        )
+
+    def read_configuration(self):
+        """Load module-scoped ML settings from config parser into runtime fields."""
+        conf = ConfigParser()
+        section = self.module_config_section
+
+        self.mode = conf.ml_module_mode(section, default=conf.get_ml_mode())
+        self.ground_truth_config_label = conf.label()
+        self.enable_logs = conf.ml_module_enable_logs(
+            section,
+            default=conf.create_performance_metrics_log_files(),
+        )
+        self.batch_size = conf.ml_module_training_batch_size(
+            section,
+            default=conf.flow_ml_detection_training_batch_size(),
+        )
+        self.minimum_labels_to_start_train = self.batch_size
+        self.minimum_labels_to_retrain = self.batch_size
+        self.minimum_labels_to_finalize_train = int(self.batch_size / 4)
+        self.validate_on_train = conf.ml_module_validate_on_train(
+            section,
+            default=conf.validate_on_train(),
+        )
+        self.percentage_validation = conf.ml_module_validation_percentage(
+            section,
+            default=0.1,
+        )
+        self.seed = conf.ml_module_seed(section, default=1111)
+        self.train_from_scratch = conf.ml_module_train_from_scratch(
+            section,
+            default=False,
+        )
+
+    def write_to_log(self, message: str):
+        """Append one log line when metrics logging is enabled."""
+        if not self.enable_logs or self.log_file is None:
+            return
+        try:
+            self.log_file.write(message + "\n")
+        except Exception as exc:
+            self.print(f"Error writing to log: {exc}", 0, 1)
+
+    @abstractmethod
+    def get_default_artifact_paths(self) -> Tuple[str, str, str, str]:
+        """
+        Return backend default artifact paths.
+
+        Returns:
+            model_load_path, preprocess_load_path, model_store_path, preprocess_store_path.
+        """
+
+    @abstractmethod
+    def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
+        """Convert raw flow dataframe to backend-ready numeric feature dataframe."""
+        pass
+
+    @abstractmethod
+    def create_empty_model(self) -> Any:
+        """Create a new untrained backend model instance."""
+        pass
+
+    @abstractmethod
+    def create_empty_preprocessor(self) -> Any:
+        """Create a new untrained preprocessing object."""
+        pass
+
+    @abstractmethod
+    def update_preprocessor(self, x_train: pd.DataFrame):
+        """Incrementally fit/update preprocessing state from training features."""
+        pass
+
+    @abstractmethod
+    def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
+        """Transform processed dataframe into model input matrix."""
+        pass
+
+    @abstractmethod
+    def fit_incremental_model(
+        self,
+        x_train: numpy.ndarray,
+        y_train: numpy.ndarray,
+        classes: Optional[list] = None,
+    ):
+        """Incrementally train/update the model for one batch."""
+        pass
+
+    @abstractmethod
+    def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
+        """Return predictions for a transformed batch."""
+        pass
+
+    @abstractmethod
+    def is_preprocessor_initialized(self) -> bool:
+        """Report whether preprocessing has enough state for inference."""
+        pass
+
+    @abstractmethod
+    def train(
+        self,
+        sum_labeled_flows,
+        last_number_of_flows_when_trained,
+    ):
+        """Backend train entrypoint; typically delegates to `_train_default`."""
+
+    @abstractmethod
+    def run_test_on_flow(self, flow: dict):
+        """Backend test entrypoint; typically delegates to `_test_default`."""
+
+    def get_dummy_flows(self) -> dict:
+        """Provide per-label fallback samples for first partial fit if needed."""
+        return {}
+
+    def store_training_results(
+        self,
+        y_pred_train,
+        y_gt_train,
+        y_pred_val,
+        y_gt_val,
+        sum_labeled_flows,
+    ):
+        """Compute train/validation metrics, persist model, and write one log snapshot."""
+        relevant_labels = [MALICIOUS, BENIGN]
+
+        def compute_metrics(y_true, y_pred):
+            metrics = {
+                "TP": numpy.sum((y_pred == MALICIOUS) & (y_true == MALICIOUS)),
+                "FP": numpy.sum((y_pred == MALICIOUS) & (y_true == BENIGN)),
+                "FN": numpy.sum((y_pred == BENIGN) & (y_true == MALICIOUS)),
+                "TN": numpy.sum((y_pred == BENIGN) & (y_true == BENIGN)),
+            }
+            seen_labels = {
+                label: numpy.sum(y_true == label) for label in relevant_labels
+            }
+            predicted_labels = {
+                label: numpy.sum(y_pred == label) for label in relevant_labels
+            }
+            return metrics, seen_labels, predicted_labels
+
+        def filter_labels(y_true, y_pred):
+            mask = numpy.isin(y_true, relevant_labels)
+            return y_true[mask], y_pred[mask]
+
+        if (
+            y_pred_val is not None
+            and y_gt_val is not None
+            and y_pred_train is not None
+            and y_gt_train is not None
+            and not numpy.array_equal(y_gt_train, y_gt_val)
+        ):
+            y_gt_val_filt, y_pred_val_filt = filter_labels(
+                y_gt_val, y_pred_val
+            )
+            y_gt_train_filt, y_pred_train_filt = filter_labels(
+                y_gt_train, y_pred_train
+            )
+
+            metrics_val, seen_labels_val, predicted_labels_val = (
+                compute_metrics(y_gt_val_filt, y_pred_val_filt)
+            )
+            metrics_train, seen_labels_train, predicted_labels_train = (
+                compute_metrics(y_gt_train_filt, y_pred_train_filt)
+            )
+
+            self.write_to_log(
+                f"Total labels: {sum_labeled_flows}, "
+                f"Validation size: {len(y_pred_val_filt)}, "
+                f"Validation seen labels: {seen_labels_val}, "
+                f"Validation predicted labels: {predicted_labels_val}, "
+                f"Validation metrics: {metrics_val}, "
+                f"Training size: {len(y_gt_train_filt)}, "
+                f"Training seen labels: {seen_labels_train}, "
+                f"Training predicted labels: {predicted_labels_train}, "
+                f"Training metrics: {metrics_train}"
+            )
+        else:
+            y_gt_val_filt, y_pred_val_filt = filter_labels(
+                y_gt_val, y_pred_val
+            )
+            metrics, seen_labels, predicted_labels = compute_metrics(
+                y_gt_val_filt, y_pred_val_filt
+            )
+
+            self.write_to_log(
+                f"Total labels: {sum_labeled_flows}, "
+                f"Training size: {len(y_pred_val_filt)}, "
+                f"Training seen labels: {seen_labels}, "
+                f"Training predicted labels: {predicted_labels}, "
+                f"Training metrics: {metrics}"
+            )
+
+    def store_testing_results(self, original_label, predicted_label):
+        """Accumulate online test metrics and flush snapshots in configured batches."""
+        if original_label == BACKGROUND:
+            return
+
+        if not hasattr(self, "malware_metrics"):
+            self.malware_metrics = {"TP": 0, "FP": 0, "TN": 0, "FN": 0}
+        if not hasattr(self, "seen_labels"):
+            self.seen_labels = {MALICIOUS: 0, BENIGN: 0}
+        if not hasattr(self, "predicted_labels"):
+            self.predicted_labels = {MALICIOUS: 0, BENIGN: 0}
+
+        if original_label in self.seen_labels:
+            self.seen_labels[original_label] += 1
+        else:
+            self.seen_labels[original_label] = 1
+
+        if predicted_label in self.predicted_labels:
+            self.predicted_labels[predicted_label] += 1
+        else:
+            self.predicted_labels[predicted_label] = 1
+
+        if original_label == MALICIOUS and predicted_label == MALICIOUS:
+            self.malware_metrics["TP"] += 1
+        elif original_label == BENIGN and predicted_label == MALICIOUS:
+            self.malware_metrics["FP"] += 1
+        elif original_label == MALICIOUS and predicted_label == BENIGN:
+            self.malware_metrics["FN"] += 1
+        elif original_label == BENIGN and predicted_label == BENIGN:
+            self.malware_metrics["TN"] += 1
+
+        self.testing_flows_since_last_log += 1
+        if self.testing_flows_since_last_log < self.testing_log_batch_size:
+            return
+
+        self._write_testing_snapshot(self.testing_flows_since_last_log)
+        self.testing_flows_since_last_log = 0
+
+    def _write_testing_snapshot(self, batch_flows: int):
+        """Write one aggregated testing metrics snapshot to the log."""
+        if batch_flows <= 0:
+            return
+
+        total_flows = sum(self.seen_labels.values())
+        log_str = (
+            f"Batch flows: {batch_flows}; "
+            f"Total flows: {total_flows}; "
+            f"Seen labels: {self.seen_labels}; "
+            f"Predicted labels: {self.predicted_labels}; "
+            f"Malware metrics (TP/FP/TN/FN): {self.malware_metrics}; "
+        )
+        self.write_to_log(log_str)
+
+    def flush_testing_results(self):
+        """Force-write pending test metrics when shutting down or window closes."""
+        if self.testing_flows_since_last_log > 0:
+            self._write_testing_snapshot(self.testing_flows_since_last_log)
+            self.testing_flows_since_last_log = 0
+
+    def drop_labels(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Remove label-related columns before model preprocessing/inference."""
+        return df.drop(
+            [
+                "ground_truth_label",
+                "detailed_ground_truth_label",
+                "label",
+                "module_labels",
+            ],
+            axis=1,
+            errors="ignore",
+        )
+
+    def _debug_training_dataframe(
+        self, x_data: Optional[pd.DataFrame], stage: str
+    ):
+        """Print compact debug info for training dataframe shape/schema issues."""
+        if x_data is None:
+            self.print(f"[debug][{stage}] x_data is None", 0, 1)
+            return
+
+        self.print(
+            f"[debug][{stage}] shape={x_data.shape}, columns={list(x_data.columns)}",
+            0,
+            1,
+        )
+
+        non_numeric_cols = [
+            col
+            for col in x_data.columns
+            if not pd.api.types.is_numeric_dtype(x_data[col])
+        ]
+        if non_numeric_cols:
+            dtype_map = {
+                col: str(x_data[col].dtype) for col in non_numeric_cols
+            }
+            sample_values = {
+                col: x_data[col].astype(str).dropna().head(3).tolist()
+                for col in non_numeric_cols
+            }
+            self.print(
+                f"[debug][{stage}] non_numeric_cols={non_numeric_cols}",
+                0,
+                1,
+            )
+            self.print(
+                f"[debug][{stage}] non_numeric_dtypes={dtype_map}", 0, 1
+            )
+            self.print(
+                f"[debug][{stage}] non_numeric_samples={sample_values}",
+                0,
+                1,
+            )
+
+        if hasattr(self.preprocessor, "feature_names_in_"):
+            expected = list(
+                getattr(self.preprocessor, "feature_names_in_", [])
+            )
+            incoming = list(x_data.columns)
+            unseen = sorted(set(incoming) - set(expected))
+            missing = sorted(set(expected) - set(incoming))
+            self.print(
+                f"[debug][{stage}] expected_feature_count={len(expected)}, incoming_feature_count={len(incoming)}",
+                0,
+                1,
+            )
+            if unseen:
+                self.print(
+                    f"[debug][{stage}] unseen_features={unseen}",
+                    0,
+                    1,
+                )
+            if missing:
+                self.print(
+                    f"[debug][{stage}] missing_features={missing}",
+                    0,
+                    1,
+                )
+
+    def _train_default(
+        self, sum_labeled_flows, last_number_of_flows_when_trained
+    ):
+        """Shared incremental training flow used by backend `train` hooks."""
+        if self.flows is None or self.flows.empty:
+            self.print("No flows to train on. Skipping training.", 0, 1)
+            return
+
+        x_train = None
+        try:
+            if hasattr(self.flows, "ground_truth_label"):
+                gt = self.flows.ground_truth_label
+                if hasattr(gt, "iloc"):
+                    try:
+                        y_gt_train = numpy.asarray(
+                            self.flows["ground_truth_label"]
+                        )
+                    except Exception:
+                        y_gt_train = numpy.full(
+                            self.flows.shape[0], gt.iloc[0]
+                        )
+                else:
+                    y_gt_train = numpy.full(self.flows.shape[0], gt)
+            else:
+                y_gt_train = numpy.full(
+                    self.flows.shape[0], self.ground_truth_config_label
+                )
+
+            x_train = self.drop_labels(self.flows.copy())
+            x_val = x_train
+            y_gt_val = y_gt_train
+
+            if self.validate_on_train and x_train.shape[0] > 1:
+                val_size = int(self.percentage_validation * x_train.shape[0])
+                val_size = max(1, val_size)
+                val_size = min(val_size, x_train.shape[0] - 1)
+
+                validation_indices = self.rng.choice(
+                    x_train.shape[0],
+                    size=val_size,
+                    replace=False,
+                )
+                train_indices = numpy.array(
+                    list(
+                        set(range(x_train.shape[0])) - set(validation_indices)
+                    )
+                )
+
+                x_val = x_train.iloc[validation_indices]
+                y_gt_val = y_gt_train[validation_indices]
+                x_train = x_train.iloc[train_indices]
+                y_gt_train = y_gt_train[train_indices]
+
+            self._debug_training_dataframe(
+                x_train, "before_update_preprocessor"
+            )
+            self.update_preprocessor(x_train)
+            x_train_arr = self.transform_features(x_train)
+
+            unique_labels = numpy.unique(y_gt_train)
+            if not self.classifier_initialized:
+                missing_labels = [
+                    label
+                    for label in [MALICIOUS, BENIGN]
+                    if label not in unique_labels
+                ]
+                if missing_labels:
+                    dummies = self.get_dummy_flows()
+                    for label in missing_labels:
+                        if label in dummies:
+                            x_train_arr = numpy.vstack(
+                                [x_train_arr, dummies[label]]
+                            )
+                            y_gt_train = numpy.append(y_gt_train, [label])
+
+                self.fit_incremental_model(
+                    x_train=x_train_arr,
+                    y_train=y_gt_train,
+                    classes=[MALICIOUS, BENIGN],
+                )
+                self.classifier_initialized = True
+            else:
+                self.fit_incremental_model(
+                    x_train=x_train_arr,
+                    y_train=y_gt_train,
+                    classes=None,
+                )
+
+            y_pred_train = self.predict_batch(x_train_arr)
+
+            if self.validate_on_train:
+                if x_val.shape[0] == 0:
+                    self.print(
+                        "Validation set is empty after split. Skipping validation.",
+                        0,
+                        1,
+                    )
+                    y_pred_val = numpy.array([])
+                else:
+                    x_val_arr = self.transform_features(x_val)
+                    y_pred_val = self.predict_batch(x_val_arr)
+            else:
+                y_pred_val = y_pred_train
+
+            self.store_training_results(
+                y_pred_train=y_pred_train,
+                y_gt_train=y_gt_train,
+                y_pred_val=y_pred_val,
+                y_gt_val=y_gt_val,
+                sum_labeled_flows=sum_labeled_flows,
+            )
+
+        except Exception as exc:
+            self.print(f"Error in train(): {type(exc).__name__}: {exc}", 0, 1)
+            self._debug_training_dataframe(x_train, "train_exception")
+            self.print(traceback.format_exc(), 0, 1)
+            self.write_to_log("Error occurred during training.")
+
+        self.last_number_of_flows_when_trained = self.labeled_counter
+        self.labeled_counter = 0
+        self.training_flows = []
+
+    def _test_default(self, flow: dict):
+        """Shared per-flow inference flow used by backend `run_test_on_flow` hooks."""
+        processed_flow = self.process_flow(flow)
+        if processed_flow is None or processed_flow.empty:
+            return
+
+        try:
+            original_label = processed_flow["ground_truth_label"].iloc[0]
+        except KeyError:
+            original_label = self.ground_truth_config_label
+
+        processed_flow = self.drop_labels(processed_flow)
+        pred = self.detect(processed_flow)
+        if pred is None or getattr(pred, "size", 0) == 0:
+            return
+
+        if pred[0] == MALICIOUS:
+            self.set_evidence_malicious_flow(flow, self.twid)
+            self.print(
+                f"Prediction {pred[0]} for label {original_label}"
+                f' flow {flow["saddr"]}:'
+                f'{flow["sport"]} -> '
+                f'{flow["daddr"]}:'
+                f'{flow["dport"]}/'
+                f'{flow["proto"]}',
+                0,
+                2,
+            )
+
+        self.store_testing_results(
+            original_label,
+            pred[0],
+        )
+
+    def process_training_flows(self, last_number_of_flows_when_trained):
+        """Build and preprocess one training batch from buffered labeled flows."""
+        try:
+            if last_number_of_flows_when_trained is None:
+                last_number_of_flows_when_trained = 0
+            else:
+                last_number_of_flows_when_trained = int(
+                    last_number_of_flows_when_trained
+                )
+
+            new_flows = self.training_flows
+            if len(new_flows) > self.batch_size:
+                self.print(
+                    f"Expected {self.batch_size} new flows, but got {len(new_flows)}. "
+                    "Skipping training.",
+                    0,
+                    1,
+                )
+                return None
+
+            df_flows = pd.DataFrame(new_flows)
+            self.print(
+                f"Processing {len(df_flows)} new flows for training.", 1, 1
+            )
+            df_flows = self.process_features(df_flows)
+            self.print(
+                f"Processed {len(df_flows)} new flows for training.", 1, 1
+            )
+            self.flows = df_flows
+        except Exception:
+            self.print("Error in process_flows()")
+            self.print(traceback.format_exc(), 0, 1)
+
+    def process_flow(self, flow_to_process: dict):
+        """Convert one raw flow dict into processed single-row dataframe."""
+        try:
+            raw_flow = pd.DataFrame(flow_to_process, index=[0])
+            dflow = self.process_features(raw_flow)
+            if dflow.empty:
+                return None
+            return dflow
+        except Exception:
+            self.print("Error in process_flow()")
+            self.print(traceback.format_exc(), 0, 1)
+            return None
+
+    def detect(self, x_flow) -> Optional[numpy.ndarray]:
+        """Run preprocess + model prediction on already selected feature columns."""
+        if (
+            not self.classifier_initialized
+            or not self.is_preprocessor_initialized()
+        ):
+            self.print(
+                "Classifier/preprocessor is not initialized. Please train the model before detecting.",
+                0,
+                1,
+            )
+            return None
+
+        try:
+            x_flow_arr = self.transform_features(x_flow)
+            pred = self.predict_batch(x_flow_arr)
+            return pred
+        except Exception as exc:
+            self.print(
+                f"Error in detect() while preprocessing or predicting the flow: {exc}",
+                0,
+                1,
+            )
+            self.print(traceback.format_exc(), 0, 1)
+            return None
+
+    def store_model(self):
+        """Persist current model and preprocessor artifacts to disk paths."""
+        self.print("Storing the trained model and preprocessor on disk.", 0, 2)
+
+        model_dir = os.path.dirname(self.model_path)
+        preprocess_dir = os.path.dirname(self.preprocess_path)
+        if model_dir:
+            os.makedirs(model_dir, exist_ok=True)
+        if preprocess_dir:
+            os.makedirs(preprocess_dir, exist_ok=True)
+
+        with open(self.model_path, "wb") as model_file:
+            model_file.write(pickle.dumps(self.clf))
+        with open(self.preprocess_path, "wb") as preprocess_file:
+            preprocess_file.write(pickle.dumps(self.preprocessor))
+
+    def _read_pickle_or_none(self, path: str) -> Optional[Any]:
+        """Load a pickle artifact or return None when missing/empty."""
+        try:
+            with open(path, "rb") as file_handler:
+                return pickle.load(file_handler)
+        except (FileNotFoundError, EOFError):
+            return None
+
+    def read_model(self):
+        """Load model/preprocessor artifacts or initialize empty backend objects."""
+        self.print("Reading trained artifacts from disk.", 0, 2)
+
+        if self.mode == "train" and self.train_from_scratch:
+            self.print(
+                "train_from_scratch=true in train mode: creating empty model and preprocessor.",
+                0,
+                2,
+            )
+            self.clf = self.create_empty_model()
+            self.preprocessor = self.create_empty_preprocessor()
+            self.classifier_initialized = False
+            self.scaler = self.preprocessor
+            return
+
+        loaded_model = self._read_pickle_or_none(self.model_load_path)
+        if loaded_model is None:
+            self.print("No model found, creating a new empty model.", 0, 2)
+            self.clf = self.create_empty_model()
+            self.classifier_initialized = False
+        else:
+            self.clf = loaded_model
+            self.classifier_initialized = True
+
+        loaded_preprocessor = self._read_pickle_or_none(
+            self.preprocess_load_path
+        )
+        if loaded_preprocessor is None:
+            self.print("No preprocessor found, creating a new one.", 0, 2)
+            self.preprocessor = self.create_empty_preprocessor()
+        else:
+            self.preprocessor = loaded_preprocessor
+
+        # Backward compatibility for existing sklearn-specific references.
+        self.scaler = self.preprocessor
+
+    def set_evidence_malicious_flow(self, flow: dict, twid: str):
+        """Emit Slips evidence object when a flow is predicted as malicious."""
+        confidence = 0.1
+        description = (
+            f"Flow with malicious characteristics by ML. Src IP"
+            f" {flow['saddr']}:{flow['sport']} to "
+            f"{flow['daddr']}:{flow['dport']}"
+        )
+        twid_number = int(twid.replace("timewindow", ""))
+        evidence = Evidence(
+            evidence_type=EvidenceType.MALICIOUS_FLOW,
+            attacker=Attacker(
+                direction=Direction.SRC,
+                ioc_type=IoCType.IP,
+                value=flow["saddr"],
+            ),
+            victim=Victim(
+                direction=Direction.DST,
+                ioc_type=IoCType.IP,
+                value=flow["daddr"],
+            ),
+            threat_level=ThreatLevel.LOW,
+            confidence=confidence,
+            description=description,
+            profile=ProfileID(ip=flow["saddr"]),
+            timewindow=TimeWindow(twid_number),
+            uid=[flow["uid"]],
+            timestamp=flow["starttime"],
+            method=Method.AI,
+            src_port=flow["sport"],
+            dst_port=flow["dport"],
+        )
+
+        self.db.set_evidence(evidence)
+
+    def shutdown_gracefully(self):
+        """Flush pending training/testing state and logs during module shutdown."""
+        if self.mode == "train":
+            self.last_training_in_window()
+            self.store_model()
+        elif self.mode == "test":
+            self.flush_testing_results()
+
+        if self.log_file is not None:
+            self.log_file.flush()
+
+    def last_training_in_window(self):
+        """Optionally train on residual labeled flows before window/module ends."""
+        if not self.classifier_initialized:
+            self.print(
+                "Classifier is not initialized. No training will be done.",
+                0,
+                1,
+            )
+            return
+
+        flows_left = self.labeled_counter
+        self.print(f"Flows left to train on: {flows_left}", 0, 1)
+
+        if flows_left >= self.minimum_labels_to_finalize_train:
+            self.print(
+                f"Training on the last {flows_left} flows in the window", 0, 1
+            )
+            self.process_training_flows(self.last_number_of_flows_when_trained)
+            self.print(
+                f"Size of the last training batch: {len(self.flows)}", 0, 1
+            )
+            self.train(
+                self.labeled_counter,
+                self.last_number_of_flows_when_trained,
+            )
+        else:
+            self.print(
+                f"Not enough flows to finalize training. "
+                f"Need at least {self.minimum_labels_to_finalize_train}, but got {flows_left}.",
+                0,
+                1,
+            )
+            self.labeled_counter = 0
+            self.training_flows = []
+
+    def pre_main(self):
+        """Drop privileges and load model artifacts before the main loop starts."""
+        utils.drop_root_privs_permanently()
+        self.read_model()
+        print("\n")
+
+    @staticmethod
+    def _extract_twid_from_tw_closed(msg: dict) -> Optional[str]:
+        """Extract timewindow id from a tw_closed message payload."""
+        payload = msg.get("data") if isinstance(msg, dict) else None
+        if payload is None:
+            return None
+        payload = str(payload)
+        if "_" in payload:
+            return payload.split("_")[-1]
+        return payload
+
+    def handle_tw_closed(self, msg: dict):
+        """Finalize residual train batch and persist artifacts once per closed TW."""
+        if self.mode != "train":
+            return
+
+        twid = self._extract_twid_from_tw_closed(msg)
+        if twid and twid == self.last_closed_twid:
+            return
+        if twid:
+            self.last_closed_twid = twid
+
+        self.last_training_in_window()
+        self.store_model()
+
+    def main(self):
+        """Consume incoming flows, route to train/test path, and maintain buffers."""
+        if msg := self.get_msg("new_flow"):
+            msg = json.loads(msg["data"])
+            self.twid = msg["twid"]
+            self.profileid = msg["profileid"]
+            self.flow = msg["flow"]
+
+            self.flow.update(
+                {
+                    "state": msg["interpreted_state"],
+                    "label": msg["label"],
+                    "module_labels": msg["module_labels"],
+                }
+            )
+
+            if (not self.flow.get("ground_truth_label")) or (
+                self.flow.get("ground_truth_label") == ""
+            ):
+                self.flow["ground_truth_label"] = (
+                    self.ground_truth_config_label
+                )
+
+            if self.flow["ground_truth_label"] in [BACKGROUND]:
+                return
+
+            if self.mode == "train":
+                if self.flow["ground_truth_label"] in [MALICIOUS, BENIGN]:
+                    self.labeled_counter += 1
+                    self.training_flows += [self.flow]
+
+                if self.labeled_counter < self.minimum_labels_to_retrain:
+                    return
+
+                self.process_training_flows(
+                    self.last_number_of_flows_when_trained
+                )
+                self.train(
+                    self.labeled_counter,
+                    self.last_number_of_flows_when_trained,
+                )
+
+            elif self.mode == "test":
+                self.run_test_on_flow(self.flow)
+
+        if "tw_closed" in self.channels and (msg := self.get_msg("tw_closed")):
+            self.handle_tw_closed(msg)
diff --git a/slips_files/common/ml_modules_utils/__init__.py b/slips_files/common/ml_modules_utils/__init__.py
new file mode 100644
index 0000000000..25f48d4499
--- /dev/null
+++ b/slips_files/common/ml_modules_utils/__init__.py
@@ -0,0 +1 @@
+# Shared utility scripts/helpers for ML modules.
diff --git a/slips_files/common/ml_modules_utils/base_utils.py b/slips_files/common/ml_modules_utils/base_utils.py
new file mode 100644
index 0000000000..76c3e50dfc
--- /dev/null
+++ b/slips_files/common/ml_modules_utils/base_utils.py
@@ -0,0 +1,466 @@
+# base_utils.py
+import os
+import ast
+import re
+import traceback
+from typing import Dict, List, Optional
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+# ============================================================================
+# METRIC DISPLAY CONFIGURATIONS
+# Single source of truth for all plotting - change once, applies everywhere
+# ============================================================================
+
+# Metrics to show in malware-focused plots (with FPR, FNR, F1, error rate)
+MALWARE_PLOT_METRICS = {
+    "Malware FPR": "malware_fpr",
+    "Malware FNR": "malware_fnr",
+    "Malware F1": "malware_f1",
+    "Accuracy": "accuracy",  # This IS benign-malicious accuracy
+    "Total Error Rate": "error_rate",
+}
+
+# Metrics for accuracy-only plots
+ACCURACY_PLOT_METRICS = {
+    "Accuracy": "accuracy",
+}
+
+# Metrics for train/val comparison plots
+COMPARISON_PLOT_METRICS = [
+    ("accuracy", "Accuracy", "train_val_accuracy.png"),
+    ("malware_f1", "Malware F1", "train_val_malware_f1.png"),
+    ("MCC", "MCC", "train_val_mcc.png"),
+]
+
+# Metrics for FN/FP rate comparison plots
+FN_RATE_METRIC = ("malware_fnr", "FN Rate")
+FP_RATE_METRIC = ("malware_fp_over_predicted", "FP Rate")
+
+
+# ============================================================================
+# METRIC EXTRACTION FUNCTIONS
+# ============================================================================
+
+
+def extract_metrics_for_plot(
+    metrics_dict: Dict[str, float], display_mapping: Dict[str, str]
+) -> Dict[str, float]:
+    """
+    Generic extractor: maps display names to metric keys.
+
+    Args:
+        metrics_dict: Dict with computed metrics (e.g., from accumulate_metrics)
+        display_mapping: Dict mapping display_name -> metric_key
+
+    Returns:
+        Dict with display names as keys
+    """
+    return {
+        display_name: metrics_dict.get(metric_key, 0.0)
+        for display_name, metric_key in display_mapping.items()
+    }
+
+
+def extract_comparison_for_plot(
+    val_metric: float,
+    train_metric: float,
+    val_label: str = "Validation",
+    train_label: str = "Training",
+) -> Dict[str, float]:
+    """
+    Build comparison dict for train vs val plots.
+    """
+    return {val_label: val_metric, train_label: train_metric}
+
+
+def ensure_dir(path: str) -> str:
+    """
+    Ensure directory exists, return the normalized path.
+    """
+    p = os.path.abspath(path)
+    os.makedirs(p, exist_ok=True)
+    return p
+
+
+def _safe_literal_eval(s: str):
+    try:
+        return ast.literal_eval(s)
+    except Exception:
+        # fallback: try replacing single quotes with double quotes for malformed JSON-like strings
+        try:
+            return ast.literal_eval(s.replace("'", '"'))
+        except Exception:
+            raise
+
+
+def parse_training_log_line(line: str) -> Optional[Dict]:
+    """
+    Parse one line of the 'new' training log format you provided.
+
+    Expected example format (single line):
+      Total labels: 500, Validation size: 49, Validation seen labels: {'Malicious': 36, 'Benign': 13},
+      Validation predicted labels: {'Malicious': 38, 'Benign': 11}, Validation metrics: {'TP': 36, 'FP': 2, 'FN': 0, 'TN': 11},
+      Training size: 450, Training seen labels: {...}, Training predicted labels: {...}, Training metrics: {...}
+
+    Returns a dict with keys:
+      - 'total_labels' (float) if present
+      - 'testing_size' (int) if present
+      - 'training_size' (int) if present
+      - 'seen' (dict) : validation seen labels (if present)
+      - 'predicted' (dict) : validation predicted labels (if present)
+      - 'per_class' (dict) : per-class counts for validation in canonical form:
+            {'Malicious': {'TP':..., 'FP':..., 'TN':..., 'FN':...}, 'Benign': {...}}
+      - 'training_seen', 'training_predicted', 'training_per_class' similarly for training section if present.
+
+    Returns None if parsing fails.
+    """
+    out = {}
+    try:
+        s = line.strip()
+
+        # total labels (float or int)
+        m_total = re.search(
+            r"Total labels\s*:\s*([0-9]+(?:\.[0-9]+)?)", s, re.IGNORECASE
+        )
+        if m_total:
+            val = m_total.group(1)
+            out["total_labels"] = float(val) if "." in val else int(val)
+
+        # Testing/Validation size (two variants: 'Validation size' or 'Testing size')
+        m_test_size = re.search(
+            r"(?:Validation|Testing) size\s*:\s*(\d+)", s, re.IGNORECASE
+        )
+        if m_test_size:
+            out["testing_size"] = int(m_test_size.group(1))
+
+        # Training size (optional)
+        m_train_size = re.search(
+            r"Training size\s*:\s*(\d+)", s, re.IGNORECASE
+        )
+        if m_train_size:
+            out["training_size"] = int(m_train_size.group(1))
+
+        # Validation Seen labels / Predicted labels
+        m_seen = re.search(
+            r"(?:Validation|Testing) seen labels\s*:\s*(\{.*?\})", s
+        )
+        if m_seen:
+            out["seen"] = _safe_literal_eval(m_seen.group(1))
+
+        m_pred = re.search(
+            r"(?:Validation|Testing) predicted labels\s*:\s*(\{.*?\})", s
+        )
+        if m_pred:
+            out["predicted"] = _safe_literal_eval(m_pred.group(1))
+
+        # Validation metrics: dictionary with TP/FP/TN/FN
+        m_metrics = re.search(
+            r"(?:Validation|Testing) metrics\s*:\s*(\{.*?\})", s
+        )
+        if m_metrics:
+            metrics = _safe_literal_eval(m_metrics.group(1))
+            tp = int(metrics.get("TP", 0))
+            fp = int(metrics.get("FP", 0))
+            fn = int(metrics.get("FN", 0))
+            tn = int(metrics.get("TN", 0))
+            # canonical per_class with Malicious entry (and inverted Benign)
+            per_class = {
+                "Malicious": {"TP": tp, "FP": fp, "TN": tn, "FN": fn},
+                "Benign": {"TP": tn, "FP": fn, "TN": tp, "FN": fp},
+            }
+            out["per_class"] = per_class
+
+        # Training part (if present). Use "Training seen labels", "Training predicted labels", "Training metrics"
+        m_seen_tr = re.search(r"Training seen labels\s*:\s*(\{.*?\})", s)
+        if m_seen_tr:
+            out["training_seen"] = _safe_literal_eval(m_seen_tr.group(1))
+
+        m_pred_tr = re.search(r"Training predicted labels\s*:\s*(\{.*?\})", s)
+        if m_pred_tr:
+            out["training_predicted"] = _safe_literal_eval(m_pred_tr.group(1))
+
+        m_metrics_tr = re.search(r"Training metrics\s*:\s*(\{.*?\})", s)
+        if m_metrics_tr:
+            metrics = _safe_literal_eval(m_metrics_tr.group(1))
+            tp = int(metrics.get("TP", 0))
+            fp = int(metrics.get("FP", 0))
+            fn = int(metrics.get("FN", 0))
+            tn = int(metrics.get("TN", 0))
+            training_per_class = {
+                "Malicious": {"TP": tp, "FP": fp, "TN": tn, "FN": fn},
+                "Benign": {"TP": tn, "FP": fn, "TN": tp, "FN": fp},
+            }
+            out["training_per_class"] = training_per_class
+
+        # If per_class is still missing but we have seen/predicted entries with class names,
+        # create zero-count placeholders (can't infer TP/FP/TN/FN without explicit metrics).
+        if "per_class" not in out and "seen" in out and "predicted" in out:
+            seen_keys = set(out["seen"].keys())
+            if seen_keys:
+                pc = {}
+                for k in seen_keys:
+                    pc[k] = {"TP": 0, "FP": 0, "TN": 0, "FN": 0}
+                out["per_class"] = pc
+
+        return out
+    except Exception as e:
+        print("[WARN] parse_training_log_line failed:", e)
+        traceback.print_exc()
+        return None
+
+
+def parse_testing_log_line(line: str) -> Optional[Dict]:
+    """
+    Parse one line of the testing log (single format).
+
+    Expected example:
+    Total flows: 54; Seen labels: {'Malicious': 42, 'Benign': 12}; Predicted labels: {'Malicious': 42, 'Benign': 12}; Malware metrics (TP/FP/TN/FN): {'TP': 42, 'FP': 0, 'TN': 12, 'FN': 0};
+
+        Returns dict with:
+            - batch_flows (int, optional)
+      - total_flows (int)
+      - seen (dict)
+      - predicted (dict)
+      - per_class: canonical per-class counts dict (Malicious/Benign)
+      - binary_summary: raw TP/FP/TN/FN for Malicious class
+    """
+    out = {}
+    try:
+        s = line.strip()
+        m_batch = re.search(r"Batch flows\s*:\s*(\d+)", s, re.IGNORECASE)
+        if m_batch:
+            out["batch_flows"] = int(m_batch.group(1))
+
+        m_total = re.search(r"Total flows\s*:\s*(\d+)", s, re.IGNORECASE)
+        if m_total:
+            out["total_flows"] = int(m_total.group(1))
+
+        m_seen = re.search(r"Seen labels\s*:\s*(\{.*?\})", s)
+        if m_seen:
+            out["seen"] = _safe_literal_eval(m_seen.group(1))
+
+        m_pred = re.search(r"Predicted labels\s*:\s*(\{.*?\})", s)
+        if m_pred:
+            out["predicted"] = _safe_literal_eval(m_pred.group(1))
+
+        # Malware metrics dict
+        m_metrics = re.search(
+            r"Malware metrics(?:\s*\(.*?\))?\s*[:=]\s*(\{.*?\})",
+            s,
+            re.IGNORECASE,
+        )
+        if m_metrics:
+            bm = _safe_literal_eval(m_metrics.group(1))
+            tp = int(bm.get("TP", 0))
+            fp = int(bm.get("FP", 0))
+            tn = int(bm.get("TN", 0))
+            fn = int(bm.get("FN", 0))
+            out["per_class"] = {
+                "Malicious": {"TP": tp, "FP": fp, "TN": tn, "FN": fn},
+                "Benign": {"TP": tn, "FP": fn, "TN": tp, "FN": fp},
+            }
+            out["binary_summary"] = {"TP": tp, "FP": fp, "TN": tn, "FN": fn}
+        return out
+    except Exception as e:
+        print("[WARN] parse_testing_log_line failed:", e)
+        traceback.print_exc()
+        return None
+
+
+# ------------------------
+# Metric computations
+# ------------------------
+def compute_binary_metrics(counts: Dict[str, int]) -> Dict[str, float]:
+    """
+    Given a dict with integer counts: {'TP':..., 'FP':..., 'TN':..., 'FN':...}
+    return a dict with:
+      accuracy, precision, recall, f1
+    """
+    tp = int(counts.get("TP", 0))
+    fp = int(counts.get("FP", 0))
+    tn = int(counts.get("TN", 0))
+    fn = int(counts.get("FN", 0))
+
+    total = tp + tn + fp + fn
+    accuracy = (tp + tn) / total if total > 0 else 0.0
+
+    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
+    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
+    f1 = (
+        (2 * precision * recall / (precision + recall))
+        if (precision + recall) > 0
+        else 0.0
+    )
+
+    numerator = (tp * tn) - (fp * fn)
+    denominator = ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) ** 0.5
+    mcc = numerator / denominator if denominator > 0 else 0.0
+
+    return {
+        "accuracy": accuracy,
+        "precision": precision,
+        "recall": recall,
+        "f1": f1,
+        "mcc": mcc,
+        "error_rate": (fp + fn) / total if total > 0 else 0.0,
+        "FPR": fp / (fp + tn) if (fp + tn) > 0 else 0.0,
+        "FNR": fn / (fn + tp) if (fn + tp) > 0 else 0.0,
+    }
+
+
+def compute_multi_metrics(
+    per_class: Dict[str, Dict[str, int]],
+) -> Dict[str, float]:
+    """
+    Given a per_class dict:
+      {class_name: {'TP':..., 'FP':..., 'TN':..., 'FN':...}, ...}
+    returns:
+      {
+        "accuracy",
+        "macro_precision", "macro_recall", "macro_f1",
+        "micro_precision", "micro_recall", "micro_f1", "MCC"
+      }
+    """
+    # accumulate counts
+    TP_total = 0
+    FP_total = 0
+    TN_total = 0
+    FN_total = 0
+    precisions = []
+    recalls = []
+    f1s = []
+
+    for cls, c in per_class.items():
+        tp = int(c.get("TP", 0))
+        fp = int(c.get("FP", 0))
+        tn = int(c.get("TN", 0))
+        fn = int(c.get("FN", 0))
+        TP_total += tp
+        FP_total += fp
+        TN_total += tn
+        FN_total += fn
+
+        p = tp / (tp + fp) if (tp + fp) > 0 else 0.0
+        r = tp / (tp + fn) if (tp + fn) > 0 else 0.0
+        f1 = (2 * p * r / (p + r)) if (p + r) > 0 else 0.0
+        precisions.append(p)
+        recalls.append(r)
+        f1s.append(f1)
+
+    total = TP_total + FP_total + TN_total + FN_total
+    accuracy = (TP_total + TN_total) / total if total > 0 else 0.0
+
+    micro_precision = (
+        TP_total / (TP_total + FP_total) if (TP_total + FP_total) > 0 else 0.0
+    )
+    micro_recall = (
+        TP_total / (TP_total + FN_total) if (TP_total + FN_total) > 0 else 0.0
+    )
+    micro_f1 = (
+        (2 * micro_precision * micro_recall / (micro_precision + micro_recall))
+        if (micro_precision + micro_recall) > 0
+        else 0.0
+    )
+
+    macro_precision = float(np.mean(precisions)) if precisions else 0.0
+    macro_recall = float(np.mean(recalls)) if recalls else 0.0
+    macro_f1 = float(np.mean(f1s)) if f1s else 0.0
+
+    return {
+        "accuracy": accuracy,
+        "macro_precision": macro_precision,
+        "macro_recall": macro_recall,
+        "macro_f1": macro_f1,
+        "micro_precision": micro_precision,
+        "micro_recall": micro_recall,
+        "micro_f1": micro_f1,
+    }
+
+
+# ------------------------
+# Plotting helpers
+# ------------------------
+def plot_major_metrics_together(
+    series: List[Dict[str, float]],
+    outpath: str,
+    title: str = "Metrics over tests",
+    xvals: Optional[List] = None,
+    xlabel: str = "Index",
+):
+    if series is None or len(series) == 0:
+        print(f"[INFO] plot_major_metrics_together: no data for {outpath}")
+        return
+
+    outdir = os.path.dirname(os.path.abspath(outpath))
+    if outdir:
+        os.makedirs(outdir, exist_ok=True)
+
+    metric_names = []
+    first_keys = list(series[0].keys())
+    for k in first_keys:
+        if k not in metric_names:
+            metric_names.append(k)
+    for entry in series[1:]:
+        for k in entry.keys():
+            if k not in metric_names:
+                metric_names.append(k)
+
+    metric_values = {m: [] for m in metric_names}
+    for entry in series:
+        for m in metric_names:
+            metric_values[m].append(entry.get(m, 0.0))
+
+    n = len(next(iter(metric_values.values())))
+    if xvals is None:
+        x_axis = list(range(1, n + 1))
+    else:
+        try:
+            if len(xvals) == n:
+                x_axis = xvals
+            else:
+                x_axis = list(range(1, n + 1))
+        except Exception:
+            x_axis = list(range(1, n + 1))
+
+    plt.figure(figsize=(8, 4.5))
+    for m in metric_names:
+        vals = metric_values[m]
+        plt.plot(x_axis, vals, label=m, linewidth=1.5, marker=None)
+
+    plt.xlabel(xlabel)
+    plt.ylabel("Value")
+    plt.title(title)
+    plt.legend(loc="best", fontsize=8)
+
+    all_vals = [v for vals in metric_values.values() for v in vals]
+    finite_vals = [float(x) for x in all_vals if np.isfinite(x)]
+
+    if finite_vals:
+        min_val = min(finite_vals)
+        max_val = max(finite_vals)
+        value_range = max_val - min_val
+
+        # Check if values look like probabilities/rates (0-1 range)
+        if 0 <= min_val and max_val <= 1:
+            # If the range is very small (< 0.05), we have high accuracy scenario
+            if value_range < 0.05:
+                # Show it's a zoomed view by using a tighter range
+                # but DON'T make it look like the full scale
+                margin = max(0.002, value_range * 0.2)
+                lower = max(0, min_val - margin)
+                upper = min(1, max_val + margin)
+                plt.ylim(lower, upper)
+            else:
+                # Normal range - show full 0 to 1
+                plt.ylim(0, 1.05)
+        else:
+            # Not probability metrics - use natural range
+            margin = 0.05 * value_range if value_range > 0 else 0.05
+            plt.ylim(min_val - margin, max_val + margin)
+
+    plt.grid(axis="y", linestyle=":", linewidth=0.5)
+    plt.tight_layout()
+    plt.savefig(outpath)
+    plt.close()
diff --git a/slips_files/common/ml_modules_utils/plot_testing_performance.py b/slips_files/common/ml_modules_utils/plot_testing_performance.py
new file mode 100644
index 0000000000..7b06edac22
--- /dev/null
+++ b/slips_files/common/ml_modules_utils/plot_testing_performance.py
@@ -0,0 +1,503 @@
+#!/usr/bin/env python3
+# plot_test_performance.py (drop-in replacement)
+import argparse
+import os
+import traceback
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from slips_files.common.ml_modules_utils.base_utils import (
+    compute_binary_metrics,
+    compute_multi_metrics,
+    ensure_dir,
+    parse_testing_log_line,
+    plot_major_metrics_together,
+)
+
+
+def resolve_testing_log_path(path_arg: str) -> str:
+    if os.path.isfile(path_arg):
+        return path_arg
+
+    if not os.path.isdir(path_arg):
+        raise FileNotFoundError(f"Log file not found: {path_arg}")
+
+    candidates = ["testing.log"]
+    for name in candidates:
+        candidate = os.path.join(path_arg, name)
+        if os.path.isfile(candidate):
+            print(f"[INFO] -f is a directory, using: {candidate}")
+            return candidate
+
+    prefix_candidates = sorted(
+        [
+            filename
+            for filename in os.listdir(path_arg)
+            if filename.startswith("testing_") and filename.endswith(".log")
+        ]
+    )
+    if prefix_candidates:
+        candidate = os.path.join(path_arg, prefix_candidates[-1])
+        print(f"[INFO] -f is a directory, using latest: {candidate}")
+        return candidate
+
+    raise FileNotFoundError(
+        f"No testing log file found in directory: {path_arg}"
+    )
+
+
+def read_all_tests(logfile):
+    entries = []
+    print(f"[INFO] Reading testing logfile: {logfile}")
+    with open(logfile, "r") as f:
+        for i, line in enumerate(f):
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                data = parse_testing_log_line(line)
+                if data is None:
+                    print(
+                        f"[WARN] Skipping unparsable testing line {i}: {line[:200]}"
+                    )
+                    continue
+                # strip background if exists
+                if "per_class" in data:
+                    data["per_class"] = {
+                        k: v
+                        for k, v in data["per_class"].items()
+                        if k.lower() not in ("background", "bg")
+                    }
+                entries.append(data)
+            except Exception:
+                print(
+                    f"[WARN] Skipping line due to parsing error: {line[:200]}"
+                )
+                traceback.print_exc()
+                continue
+    # print(f"[INFO] Parsed {len(entries)} testing snapshots")
+    return entries
+
+
+def accumulate_test_metrics_cumulative_snapshots(entries):
+    if not entries:
+        return [], [], [], [], []
+
+    class_names = list(entries[0].get("per_class", {}).keys())
+    if not class_names:
+        class_names = ["Malicious", "Benign"]
+
+    cumul_per_class_series = []
+    cumul_multi_series = []
+    cumul_binary_series = []
+    cumul_class_counts_series = []
+    cumulative_total_flows = []
+
+    for data in entries:
+        pcm = data.get("per_class", {})
+
+        per_class_metrics_now = {}
+        for cls in class_names:
+            counts = {
+                k: int(pcm.get(cls, {}).get(k, 0))
+                for k in ("TP", "FP", "TN", "FN")
+            }
+            bin_metrics = compute_binary_metrics(counts)
+            bin_metrics.update(counts)
+            per_class_metrics_now[cls] = bin_metrics
+        cumul_per_class_series.append(per_class_metrics_now)
+
+        snapshot_counts = {
+            cls: {
+                k: int(pcm.get(cls, {}).get(k, 0))
+                for k in ("TP", "FP", "TN", "FN")
+            }
+            for cls in class_names
+        }
+        multi_now = compute_multi_metrics(snapshot_counts)
+        # malware specific
+        mal_key = next(
+            (
+                k
+                for k in snapshot_counts
+                if k.lower() in ("malware", "malicious")
+            ),
+            None,
+        )
+        if mal_key:
+            mcounts = snapshot_counts[mal_key]
+            # Reuse binary metrics!
+            mal_binary = compute_binary_metrics(mcounts)
+            multi_now["malware_fpr"] = mal_binary["FPR"]
+            multi_now["malware_fnr"] = mal_binary["FNR"]
+            multi_now["malware_f1"] = mal_binary["f1"]
+        else:
+            multi_now["malware_fpr"] = 0.0
+            multi_now["malware_fnr"] = 0.0
+            multi_now["malware_f1"] = 0.0
+
+        # malware specific
+        mal_key = next(
+            (
+                k
+                for k in snapshot_counts
+                if k.lower() in ("malware", "malicious")
+            ),
+            None,
+        )
+        if mal_key:
+            mcounts = snapshot_counts[mal_key]
+            tp = mcounts.get("TP", 0)
+            fp = mcounts.get("FP", 0)
+            tn = mcounts.get("TN", 0)
+            fn = mcounts.get("FN", 0)
+            multi_now["malware_fpr"] = (
+                (fp / (fp + tn)) if (fp + tn) > 0 else 0.0
+            )
+            multi_now["malware_fnr"] = (
+                (fn / (fn + tp)) if (fn + tp) > 0 else 0.0
+            )
+            prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
+            rec = tp / (tp + fn) if (tp + fn) > 0 else 0.0
+            multi_now["malware_f1"] = (
+                (2 * prec * rec / (prec + rec)) if (prec + rec) > 0 else 0.0
+            )
+        else:
+            multi_now["malware_fpr"] = 0.0
+            multi_now["malware_fnr"] = 0.0
+            multi_now["malware_f1"] = 0.0
+
+        cumul_multi_series.append(multi_now)
+
+        # binary summary
+        if "binary_summary" in data:
+            bm = data["binary_summary"]
+            bm_counts = {
+                k: int(bm.get(k, 0)) for k in ("TP", "FP", "TN", "FN")
+            }
+        else:
+            mal = pcm.get("Malicious", {})
+            tp = int(mal.get("TP", 0))
+            fp = int(mal.get("FP", 0))
+            fn = int(mal.get("FN", 0))
+            tn = 0
+            for k in pcm.keys():
+                if k.lower() not in ("malware", "malicious"):
+                    tn += int(pcm[k].get("TN", 0))
+            bm_counts = {"TP": tp, "FP": fp, "TN": tn, "FN": fn}
+        cumul_binary_series.append(compute_binary_metrics(bm_counts))
+
+        # class counts TP + FN
+        counts_dict = {
+            cls: int(
+                pcm.get(cls, {}).get("TP", 0) + pcm.get(cls, {}).get("FN", 0)
+            )
+            for cls in class_names
+        }
+        cumul_class_counts_series.append(counts_dict)
+
+        total = int(data.get("total_flows", 0))
+        cumulative_total_flows.append(total)
+
+    return (
+        cumul_per_class_series,
+        cumul_multi_series,
+        cumul_binary_series,
+        cumul_class_counts_series,
+        cumulative_total_flows,
+    )
+
+
+def _choose_sparse_xticks(batch_count, labels):
+    """
+    Always return numeric positions for xticks (0..batch_count-1) as the first
+    element. The second element is a list of labels where only a limited set
+    of positions contain text (sparse labels); other positions are "".
+
+    This prevents accidental use of string labels as x coordinates.
+    """
+    # full numeric positions for plotting (monotonic)
+    positions = list(range(batch_count))
+
+    if batch_count <= 20:
+        # keep all labels for small series
+        return positions, labels
+
+    max_labels = 15
+    step = max(1, batch_count // max_labels)
+    indices = list(range(0, batch_count, step))
+    if indices[-1] != batch_count - 1:
+        indices.append(batch_count - 1)
+
+    sparse_labels = [""] * batch_count
+    for i in indices:
+        # guard: labels might be shorter than batch_count
+        if i < len(labels):
+            sparse_labels[i] = labels[i]
+        else:
+            sparse_labels[i] = str(i)
+
+    # NOTE: first element is the full numeric positions (not the sparse indices)
+    return positions, sparse_labels
+
+
+def plot_counts_series(
+    series_of_dicts, outpath, title, xlabels=None, xlabel="Index"
+):
+    if series_of_dicts is None or not series_of_dicts:
+        # print("[INFO] No data to plot for", title)
+        return
+    classes = list(next(iter(series_of_dicts)).keys())
+    values_per_class = {
+        c: [entry.get(c, 0) for entry in series_of_dicts] for c in classes
+    }
+    n = len(series_of_dicts)
+    x_positions = list(range(n))
+    plt.figure(figsize=(9, 4))
+    for cls in classes:
+        plt.plot(x_positions, values_per_class[cls], label=cls, linewidth=1)
+    if xlabels is None:
+        labels = [str(i) for i in range(n)]
+    else:
+        labels = list(xlabels)
+    if n >= 10:
+        cleaned = []
+        for lab in labels:
+            if "\n" in lab:
+                cleaned.append(lab.split("\n", 1)[0])
+            else:
+                cleaned.append(lab)
+        labels = cleaned
+    idxs, sparse_labels = _choose_sparse_xticks(n, labels)
+    plt.xticks(idxs, [sparse_labels[i] for i in idxs], rotation=45, ha="right")
+    plt.xlabel(xlabel)
+    plt.ylabel("Count")
+    max_val = max(v for values in values_per_class.values() for v in values)
+    top = max_val * 1.05 if max_val > 0 else 1
+    plt.ylim(0, top)
+    plt.title(title)
+    plt.legend(loc="best", fontsize=8)
+    plt.tight_layout(rect=[0, 0, 1, 0.95])
+    plt.savefig(outpath)
+    plt.close()
+    # print(f"[SAVED] {outpath}")
+
+
+def plot_confusion_matrix_from_final(final_per_class, outpath):
+    mal = final_per_class.get("Malicious", {})
+    tp = int(mal.get("TP", 0))
+    fn = int(mal.get("FN", 0))
+    fp = int(mal.get("FP", 0))
+    tn = int(mal.get("TN", 0))
+    cm = np.array([[tp, fn], [fp, tn]])
+    labels = np.array([[f"TP\n{tp}", f"FN\n{fn}"], [f"FP\n{fp}", f"TN\n{tn}"]])
+    plt.figure(figsize=(4, 4))
+    im = plt.imshow(cm, interpolation="nearest", cmap="Blues")
+    plt.colorbar(im, fraction=0.046, pad=0.04)
+    plt.xticks([0, 1], ["Pred Malicious", "Pred Benign"], rotation=45)
+    plt.yticks([0, 1], ["True Malicious", "True Benign"])
+    for i in range(2):
+        for j in range(2):
+            plt.text(
+                j, i, labels[i, j], ha="center", va="center", color="black"
+            )
+    plt.title("Confusion matrix (final snapshot)")
+    plt.tight_layout()
+    plt.savefig(outpath)
+    plt.close()
+    # print(f"[SAVED] {outpath}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Plot testing performance metrics."
+    )
+    parser.add_argument(
+        "-f",
+        "--file",
+        required=True,
+        help="Path to testing log file or directory",
+    )
+    parser.add_argument(
+        "-e", "--exp", required=True, help="Experiment identifier"
+    )
+    parser.add_argument(
+        "--save_folder", required=False, help="Output folder", default=None
+    )
+    args = parser.parse_args()
+
+    save_folder = args.save_folder
+    if save_folder is not None:
+        if not os.path.isdir(save_folder):
+            raise NotADirectoryError(
+                f"Output folder does not exist: {save_folder}"
+            )
+        base_dir = ensure_dir(save_folder)
+    else:
+        base_dir = ensure_dir("performance_metrics")
+
+    file_path = resolve_testing_log_path(args.file)
+
+    testing_dir = ensure_dir(os.path.join(base_dir, "testing", args.exp))
+    print(f"[INFO] Output folder: {testing_dir}")
+
+    entries = read_all_tests(file_path)
+    if not entries:
+        print("[ERROR] No testing entries parsed; exiting.")
+        return
+
+    (
+        cumul_per_class_series,
+        cumul_multi_series,
+        cumul_binary_series,
+        cumul_class_counts_series,
+        cumulative_total_flows,
+    ) = accumulate_test_metrics_cumulative_snapshots(entries)
+    n = len(cumul_multi_series)
+    # print(f"[INFO] Building plots for {n} snapshots")
+
+    # aggregated class counts
+    xlabels = (
+        [str(x) for x in cumulative_total_flows]
+        if any(cumulative_total_flows)
+        else [str(i) for i in range(n)]
+    )
+    out_counts = os.path.join(
+        testing_dir, "class_counts_aggregated_testing.png"
+    )
+    print("[INFO] Plotting aggregated class counts per testing snapshot...")
+    plot_counts_series(
+        cumul_class_counts_series,
+        out_counts,
+        title="Aggregated class counts\n(Testing snapshot checkpoints)",
+        xlabels=xlabels,
+        xlabel="Cumulative flows seen",
+    )
+
+    # malware metrics
+    print(
+        "[INFO] Plotting malware metrics (FPR, FNR, F1, Accuracy) over snapshots..."
+    )
+    from slips_files.common.ml_modules_utils.base_utils import (
+        MALWARE_PLOT_METRICS,
+        extract_metrics_for_plot,
+    )
+
+    malware_metrics_data = [
+        extract_metrics_for_plot(m, MALWARE_PLOT_METRICS)
+        for m in cumul_multi_series
+    ]
+    out_malware = os.path.join(
+        testing_dir, "malware_metrics_aggregated_testing.png"
+    )
+    xvals = (
+        cumulative_total_flows
+        if any(cumulative_total_flows)
+        else list(range(n))
+    )
+    plot_major_metrics_together(
+        malware_metrics_data,
+        out_malware,
+        title="Malware metrics (Aggregated)\n(testing snapshots)",
+        xvals=xvals,
+        xlabel="Total flows seen",
+    )
+
+    # FPR/FNR only
+    print("[INFO] Saving FPR/FNR-only plot...")
+    fpr_fnr_series = [
+        {"FPR": m.get("malware_fpr", 0), "FNR": m.get("malware_fnr", 0)}
+        for m in cumul_multi_series
+    ]
+    out_fprfnr = os.path.join(testing_dir, "malware_fpr_fnr_over_time.png")
+    plot_major_metrics_together(
+        fpr_fnr_series,
+        out_fprfnr,
+        title="Malware FPR & FNR over time\n(testing snapshots)",
+        xvals=xvals,
+        xlabel="Total flows seen",
+    )
+
+    # predicted vs seen
+    print(
+        "[INFO] Plotting predicted vs seen counts (per-snapshot) for Malicious & Benign..."
+    )
+    pred_seen_series = []
+    for e in entries:
+        seen = e.get("seen", {})
+        pred = e.get("predicted", {})
+        pred_seen_series.append(
+            {
+                "Seen Malicious": int(seen.get("Malicious", 0)),
+                "Pred Malicious": int(pred.get("Malicious", 0)),
+                "Seen Benign": int(seen.get("Benign", 0)),
+                "Pred Benign": int(pred.get("Benign", 0)),
+            }
+        )
+    out_predseen = os.path.join(
+        testing_dir, "predicted_vs_seen_per_snapshot.png"
+    )
+    plot_counts_series(
+        pred_seen_series,
+        out_predseen,
+        title="Predicted vs Seen counts per testing snapshot",
+        xlabels=xlabels,
+        xlabel="Snapshot / cumulative flows seen",
+    )
+
+    # confusion matrix (final snapshot)
+    print("[INFO] Plotting final confusion matrix (final snapshot)...")
+    final_per_class = cumul_per_class_series[-1]
+    out_cm = os.path.join(testing_dir, "confusion_matrix_final.png")
+    plot_confusion_matrix_from_final(final_per_class, out_cm)
+
+    # summary
+    last_multi = cumul_multi_series[-1]
+    last_binary = cumul_binary_series[-1]
+    final_per_class_table = cumul_per_class_series[-1]
+
+    # print("[INFO] Writing summary...")
+    lines = []
+    lines.append("\n=== Main final metrics (Aggregated so-far) ===")
+    lines.append(f"Accuracy:             {last_multi.get('accuracy', 0):.4f}")
+    lines.append(
+        f"Malware F1:           {last_multi.get('malware_f1', 0):.4f}"
+    )
+    lines.append(
+        f"Malware FPR:          {last_multi.get('malware_fpr', 0):.4f}"
+    )
+    lines.append(
+        f"Malware FNR:          {last_multi.get('malware_fnr', 0):.4f}"
+    )
+    lines.append(f"Macro F1:             {last_multi.get('macro_f1', 0):.4f}")
+    lines.append(
+        f"Precision:            {last_binary.get('precision', 0):.4f}"
+    )
+    lines.append(f"Recall:               {last_binary.get('recall', 0):.4f}")
+
+    lines.append("\n=== Per-class metrics (final snapshot) ===")
+    lines.append(
+        f"{'Class':<15} {'TP':>8} {'TN':>8} {'FP':>8} {'FN':>8} {'Prec':>8} {'Rec':>8} {'F1':>8}"
+    )
+    for cls, m in final_per_class_table.items():
+        lines.append(
+            f"{cls:<15} {m.get('TP', 0):8d} {m.get('TN', 0):8d} {m.get('FP', 0):8d} {m.get('FN', 0):8d} {m.get('precision', 0.0):8.4f} {m.get('recall', 0.0):8.4f} {m.get('f1', 0.0):8.4f}"
+        )
+
+    lines.append(f"\nSummary for Experiment {args.exp}:")
+    lines.append(f"Total test snapshots processed: {len(entries)}")
+    if cumulative_total_flows:
+        lines.append(f"Total flows processed: {cumulative_total_flows[-1]}")
+
+    summary_text = "\n".join(lines)
+    summary_path = os.path.join(testing_dir, "summary.txt")
+    with open(summary_path, "w") as f:
+        f.write(summary_text)
+
+    # print(f"[SAVED] {summary_path}")
+    print(summary_text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/slips_files/common/ml_modules_utils/plot_train_performance.py b/slips_files/common/ml_modules_utils/plot_train_performance.py
new file mode 100644
index 0000000000..89131ffbf2
--- /dev/null
+++ b/slips_files/common/ml_modules_utils/plot_train_performance.py
@@ -0,0 +1,1336 @@
+#!/usr/bin/env python3
+# plot_train_performance.py (drop-in replacement)
+import argparse
+import os
+import traceback
+import matplotlib.pyplot as plt
+
+from slips_files.common.ml_modules_utils.base_utils import (
+    compute_binary_metrics,
+    compute_multi_metrics,
+    ensure_dir,
+    parse_training_log_line,
+    plot_major_metrics_together,
+)
+
+
+def resolve_training_log_path(path_arg: str) -> str:
+    if os.path.isfile(path_arg):
+        return path_arg
+
+    if not os.path.isdir(path_arg):
+        raise FileNotFoundError(f"Log file not found: {path_arg}")
+
+    candidates = ["training.log"]
+    for name in candidates:
+        candidate = os.path.join(path_arg, name)
+        if os.path.isfile(candidate):
+            print(f"[INFO] -f is a directory, using: {candidate}")
+            return candidate
+
+    prefix_candidates = sorted(
+        [
+            filename
+            for filename in os.listdir(path_arg)
+            if filename.startswith("training_") and filename.endswith(".log")
+        ]
+    )
+    if prefix_candidates:
+        candidate = os.path.join(path_arg, prefix_candidates[-1])
+        print(f"[INFO] -f is a directory, using latest: {candidate}")
+        return candidate
+
+    raise FileNotFoundError(
+        f"No training log file found in directory: {path_arg}"
+    )
+
+
+def read_all_batches(logfile):
+    entries = []
+    print(f"[INFO] Reading logfile: {logfile}")
+    with open(logfile, "r") as f:
+        for i, line in enumerate(f):
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                data = parse_training_log_line(line)
+                if data is None:
+                    print(f"[WARN] Skipping unparsable line {i}: {line[:200]}")
+                    continue
+                # remove Background if present
+                if "per_class" in data:
+                    data["per_class"] = {
+                        k: v
+                        for k, v in data["per_class"].items()
+                        if k.lower() not in ("background", "bg")
+                    }
+                if "training_per_class" in data:
+                    data["training_per_class"] = {
+                        k: v
+                        for k, v in data["training_per_class"].items()
+                        if k.lower() not in ("background", "bg")
+                    }
+                entries.append(data)
+            except Exception:
+                print(f"[WARN] Failed to parse line {i}: {line[:200]}")
+                traceback.print_exc()
+                continue
+    # print(f"[INFO] Parsed {len(entries)} batches from logfile")
+    return entries
+
+
+def compute_malware_metrics(per_class):
+    """
+    Compute malware-specific metrics by reusing binary metrics.
+    """
+    malware_key = None
+    for cls_name in per_class.keys():
+        if cls_name.lower() in ("malware", "malicious"):
+            malware_key = cls_name
+            break
+
+    if malware_key and malware_key in per_class:
+        counts = per_class[malware_key]
+        binary_metrics = compute_binary_metrics(counts)
+        malware_metrics = {
+            "malware_fpr": binary_metrics["FPR"],
+            "malware_fnr": binary_metrics["FNR"],
+            "malware_precision": binary_metrics["precision"],
+            "malware_recall": binary_metrics["recall"],
+            "malware_f1": binary_metrics["f1"],
+            "MCC": binary_metrics["mcc"],
+            "error_rate": binary_metrics["error_rate"],
+        }
+
+        tp = counts.get("TP", 0)
+        fp = counts.get("FP", 0)
+        malware_metrics["malware_fp_over_predicted"] = (
+            (fp / (tp + fp)) if (tp + fp) > 0 else 0.0
+        )
+    else:
+        malware_metrics = {
+            "malware_fpr": 0.0,
+            "malware_fnr": 0.0,
+            "malware_fp_over_predicted": 0.0,
+            "malware_precision": 0.0,
+            "malware_recall": 0.0,
+            "malware_f1": 0.0,
+            "MCC": 0.0,
+            "error_rate": 0.0,
+        }
+
+    return malware_metrics
+
+
+def process_batch_metrics(per_class, class_names):
+    batch_metrics_per_class = {}
+    for cls in class_names:
+        bin_metrics_per_class = compute_binary_metrics(per_class[cls])
+        bin_metrics_per_class.update(per_class[cls])
+        batch_metrics_per_class[cls] = bin_metrics_per_class
+
+    batch_multi = compute_multi_metrics(per_class)
+    batch_multi.update(compute_malware_metrics(per_class))
+
+    return batch_metrics_per_class, batch_multi
+
+
+def process_cumulative_metrics(cumul_class_counters, class_names):
+    cumul_metrics_per_class = {}
+    for cls in class_names:
+        bin_metrics_per_class = compute_binary_metrics(
+            cumul_class_counters[cls]
+        )
+        bin_metrics_per_class.update(cumul_class_counters[cls])
+        cumul_metrics_per_class[cls] = bin_metrics_per_class
+
+    cumul_multi = compute_multi_metrics(cumul_class_counters)
+    cumul_multi.update(compute_malware_metrics(cumul_class_counters))
+
+    return cumul_metrics_per_class, cumul_multi
+
+
+def accumulate_metrics(entries, has_validation_data):
+    """
+    Accumulate batch and cumulative metrics.
+
+    - If has_validation_data is False, returns 4 training lists:
+        (batch_metrics_per_class_train,
+         batch_metrics_multi_train,
+         cumul_metrics_multi_train,
+         cumul_metrics_per_class_train)
+
+    - If has_validation_data is True, returns 8 lists **(validation first, training second)**:
+        (batch_metrics_per_class_val,
+         batch_metrics_multi_val,
+         cumul_metrics_multi_val,
+         cumul_metrics_per_class_val,
+         batch_metrics_per_class_train,
+         batch_metrics_multi_train,
+         cumul_metrics_multi_train,
+         cumul_metrics_per_class_train)
+    """
+    print("[INFO] Accumulating batch and cumulative metrics...")
+
+    # training outputs (always used)
+    batch_metrics_per_class_train = []
+    batch_metrics_multi_train = []
+    cumul_metrics_multi_train = []
+    cumul_metrics_per_class_train = []
+
+    # validation outputs (only if has_validation_data)
+    if has_validation_data:
+        batch_metrics_per_class_val = []
+        batch_metrics_multi_val = []
+        cumul_metrics_multi_val = []
+        cumul_metrics_per_class_val = []
+
+    if not entries:
+        # nothing to do; return correct shape
+        if has_validation_data:
+            return (
+                batch_metrics_per_class_val,
+                batch_metrics_multi_val,
+                cumul_metrics_multi_val,
+                cumul_metrics_per_class_val,
+                batch_metrics_per_class_train,
+                batch_metrics_multi_train,
+                cumul_metrics_multi_train,
+                cumul_metrics_per_class_train,
+            )
+        else:
+            return (
+                batch_metrics_per_class_train,
+                batch_metrics_multi_train,
+                cumul_metrics_multi_train,
+                cumul_metrics_per_class_train,
+            )
+
+    first = entries[0]
+    class_name_sets = []
+    # training_predicted (if present)
+    if "training_predicted" in first and isinstance(
+        first["training_predicted"], dict
+    ):
+        class_name_sets.append(set(first["training_predicted"].keys()))
+    # training_per_class (explicit training per-class counts)
+    if "training_per_class" in first and isinstance(
+        first["training_per_class"], dict
+    ):
+        class_name_sets.append(set(first["training_per_class"].keys()))
+    # per_class (your parser's validation-per-class)
+    if "per_class" in first and isinstance(first["per_class"], dict):
+        class_name_sets.append(set(first["per_class"].keys()))
+    # validation_per_class (in case the parser used that name)
+    if "validation_per_class" in first and isinstance(
+        first["validation_per_class"], dict
+    ):
+        class_name_sets.append(set(first["validation_per_class"].keys()))
+
+    # union all discovered names; if nothing found, fall back to empty list
+    if class_name_sets:
+        class_names = sorted(set().union(*class_name_sets))
+    else:
+        class_names = []
+
+    # cumulative counters
+    cumul_class_counters_train = {
+        cls: {"TP": 0, "FP": 0, "TN": 0, "FN": 0} for cls in class_names
+    }
+    if has_validation_data:
+        cumul_class_counters_val = {
+            cls: {"TP": 0, "FP": 0, "TN": 0, "FN": 0} for cls in class_names
+        }
+
+    # iterate entries and accumulate
+    for data in entries:
+        # VALIDATION split (expected key: "per_class")
+        if has_validation_data:
+            validation_per_class = data.get(
+                "per_class",
+                {
+                    cls: {"TP": 0, "FP": 0, "TN": 0, "FN": 0}
+                    for cls in class_names
+                },
+            )
+            batch_per_class_val, batch_multi_val = process_batch_metrics(
+                validation_per_class, class_names
+            )
+            batch_metrics_per_class_val.append(batch_per_class_val)
+            batch_metrics_multi_val.append(batch_multi_val)
+
+            for cls in class_names:
+                for k in ("TP", "FP", "TN", "FN"):
+                    cumul_class_counters_val[cls][k] += int(
+                        validation_per_class.get(cls, {}).get(k, 0)
+                    )
+
+            cumul_per_class_val, cumul_multi_val = process_cumulative_metrics(
+                cumul_class_counters_val, class_names
+            )
+            cumul_metrics_per_class_val.append(cumul_per_class_val)
+            cumul_metrics_multi_val.append(cumul_multi_val)
+
+        # TRAINING split (expected key: "training_per_class")
+        training_per_class = data.get(
+            "training_per_class",
+            {cls: {"TP": 0, "FP": 0, "TN": 0, "FN": 0} for cls in class_names},
+        )
+        batch_per_class_train, batch_multi_train = process_batch_metrics(
+            training_per_class, class_names
+        )
+        batch_metrics_per_class_train.append(batch_per_class_train)
+        batch_metrics_multi_train.append(batch_multi_train)
+
+        for cls in class_names:
+            for k in ("TP", "FP", "TN", "FN"):
+                cumul_class_counters_train[cls][k] += int(
+                    training_per_class.get(cls, {}).get(k, 0)
+                )
+
+        cumul_per_class_train, cumul_multi_train = process_cumulative_metrics(
+            cumul_class_counters_train, class_names
+        )
+        cumul_metrics_per_class_train.append(cumul_per_class_train)
+        cumul_metrics_multi_train.append(cumul_multi_train)
+
+    # Return order: **validation first** (if present), then training — this matches your plotting code.
+    if has_validation_data:
+        return (
+            batch_metrics_per_class_val,
+            batch_metrics_multi_val,
+            cumul_metrics_multi_val,
+            cumul_metrics_per_class_val,
+            batch_metrics_per_class_train,
+            batch_metrics_multi_train,
+            cumul_metrics_multi_train,
+            cumul_metrics_per_class_train,
+        )
+    else:
+        return (
+            batch_metrics_per_class_train,
+            batch_metrics_multi_train,
+            cumul_metrics_multi_train,
+            cumul_metrics_per_class_train,
+        )
+
+
+def calculate_class_counts(entries, data_key, class_names):
+    batch_class_counts = []
+    for entry in entries:
+        if data_key in entry and entry[data_key]:
+            counts = {
+                cls: int(
+                    entry[data_key][cls].get("TP", 0)
+                    + entry[data_key][cls].get("FN", 0)
+                )
+                for cls in class_names
+            }
+        else:
+            counts = {cls: 0 for cls in class_names}
+        batch_class_counts.append(counts)
+
+    cumul_class_counts = {cls: 0 for cls in class_names}
+    cumul_class_counts_per_batch = []
+    for counts in batch_class_counts:
+        for cls in class_names:
+            cumul_class_counts[cls] += counts[cls]
+        cumul_class_counts_per_batch.append(cumul_class_counts.copy())
+
+    return batch_class_counts, cumul_class_counts_per_batch
+
+
+def _choose_sparse_xticks(batch_count, labels):
+    """
+    Always return numeric positions for xticks (0..batch_count-1) as the first
+    element. The second element is a list of labels where only a limited set
+    of positions contain text (sparse labels); other positions are "".
+
+    This prevents accidental use of string labels as x coordinates.
+    """
+    # full numeric positions for plotting (monotonic)
+    positions = list(range(batch_count))
+
+    if batch_count <= 20:
+        # keep all labels for small series
+        return positions, labels
+
+    max_labels = 15
+    step = max(1, batch_count // max_labels)
+    indices = list(range(0, batch_count, step))
+    if indices[-1] != batch_count - 1:
+        indices.append(batch_count - 1)
+
+    sparse_labels = [""] * batch_count
+    for i in indices:
+        # guard: labels might be shorter than batch_count
+        if i < len(labels):
+            sparse_labels[i] = labels[i]
+        else:
+            sparse_labels[i] = str(i)
+
+    # NOTE: first element is the full numeric positions (not the sparse indices)
+    return positions, sparse_labels
+
+
+def plot_counts_series(
+    series_of_dicts, outpath, title, xlabels=None, xlabel="Batch"
+):
+    if series_of_dicts is None or not series_of_dicts:
+        print("[INFO] No data to plot for", title)
+        return
+
+    classes = list(next(iter(series_of_dicts)).keys())
+    values_per_class = {
+        c: [entry.get(c, 0) for entry in series_of_dicts] for c in classes
+    }
+    batch_count = len(series_of_dicts)
+    x_positions = list(range(batch_count))
+
+    plt.figure(figsize=(9, 4))
+    for cls in classes:
+        plt.plot(x_positions, values_per_class[cls], label=cls, linewidth=1)
+
+    if xlabels is None:
+        labels = [str(i) for i in range(batch_count)]
+    else:
+        labels = list(xlabels)
+
+    if batch_count >= 10:
+        cleaned = []
+        for lab in labels:
+            if "\n" in lab:
+                cleaned.append(lab.split("\n", 1)[0])
+            else:
+                cleaned.append(lab)
+        labels = cleaned
+
+    idxs, sparse_labels = _choose_sparse_xticks(batch_count, labels)
+    plt.xticks(idxs, [sparse_labels[i] for i in idxs], rotation=45, ha="right")
+
+    plt.xlabel(xlabel)
+    plt.ylabel("Count")
+    max_val = max(v for values in values_per_class.values() for v in values)
+    top = max_val * 1.05 if max_val > 0 else 1
+    plt.ylim(0, top)
+
+    plt.title(title)
+    plt.legend(loc="best", fontsize=8)
+    plt.tight_layout(rect=[0, 0, 1, 0.95])
+    plt.savefig(outpath)
+    plt.close()
+    # print(f"[SAVED] {outpath}")
+
+
+def get_stepping_sizes(entries, batch_count, size_key):
+    labels = []
+    if batch_count < 10:
+        for i, entry in enumerate(entries):
+            labels.append(f"{i}\n{entry.get(size_key, 0)}")
+        return labels
+
+    if batch_count <= 20:
+        return [str(i) for i in range(batch_count)]
+    else:
+        max_labels = 15
+        step = max(1, batch_count // max_labels)
+        labels = []
+        for i, entry in enumerate(entries):
+            if i % step == 0 or i == batch_count - 1:
+                labels.append(str(i))
+            else:
+                labels.append("")
+        return labels
+
+
+def sliding_window_aggregated(
+    batch_metrics_per_class, class_names, k, trim_to_full_window=True
+):
+    n = len(batch_metrics_per_class)
+    series_per_class = []
+    series_multi = []
+
+    for i in range(n):
+        start = max(0, i - k + 1)
+        agg = {
+            cls: {"TP": 0, "FP": 0, "TN": 0, "FN": 0} for cls in class_names
+        }
+        for j in range(start, i + 1):
+            per_cls = batch_metrics_per_class[j]
+            for cls in class_names:
+                agg[cls]["TP"] += int(per_cls[cls].get("TP", 0))
+                agg[cls]["FP"] += int(per_cls[cls].get("FP", 0))
+                agg[cls]["TN"] += int(per_cls[cls].get("TN", 0))
+                agg[cls]["FN"] += int(per_cls[cls].get("FN", 0))
+
+        per_class_metrics = {}
+        for cls in class_names:
+            bin_metrics = compute_binary_metrics(agg[cls])
+            bin_metrics.update(agg[cls])
+            per_class_metrics[cls] = bin_metrics
+
+        multi = compute_multi_metrics(agg)
+        multi.update(compute_malware_metrics(agg))
+
+        series_per_class.append(per_class_metrics)
+        series_multi.append(multi)
+
+    if trim_to_full_window:
+        if n < k:
+            return [], [], None
+        start_index = k - 1
+        return (
+            series_per_class[start_index:],
+            series_multi[start_index:],
+            start_index,
+        )
+    else:
+        return series_per_class, series_multi, 0
+
+
+def plot_malware_metrics(metrics_data, output_path, title, xvals, xlabel):
+    from base_utils import MALWARE_PLOT_METRICS, extract_metrics_for_plot
+
+    plot_data = [
+        extract_metrics_for_plot(entry, MALWARE_PLOT_METRICS)
+        for entry in metrics_data
+    ]
+    plot_major_metrics_together(
+        plot_data, output_path, title=title, xvals=xvals, xlabel=xlabel
+    )
+
+
+def plot_accuracy_metrics(metrics_data, output_path, title, xvals, xlabel):
+    accuracy_data = []
+    for entry in metrics_data:
+        accuracy_data.append(
+            {"Benign-Malicious Acc": entry.get("benign_malicious_accuracy", 0)}
+        )
+    plot_major_metrics_together(
+        accuracy_data, output_path, title=title, xvals=xvals, xlabel=xlabel
+    )
+
+
+def plot_comparison_metrics(
+    batch_metrics_multi,
+    cumul_metrics_multi,
+    batch_metrics_multi_training,
+    cumul_metrics_multi_training,
+    base_dir,
+    stepping_total_sizes,
+    cumulative_total_sizes,
+    batch_count,
+):
+    from base_utils import COMPARISON_PLOT_METRICS, extract_comparison_for_plot
+
+    agg_dir = ensure_dir(os.path.join(base_dir, "aggregated"))
+    batch_dir = ensure_dir(os.path.join(base_dir, "per_batch"))
+
+    # Aggregated plots
+    for metric_key, short_title, filename in COMPARISON_PLOT_METRICS:
+        combined = [
+            extract_comparison_for_plot(
+                cumul_metrics_multi[i].get(metric_key, 0),
+                cumul_metrics_multi_training[i].get(metric_key, 0),
+            )
+            for i in range(batch_count)
+        ]
+        out = os.path.join(agg_dir, filename)
+
+        # Safe x-labels
+        labels = [str(v) for v in cumulative_total_sizes]
+        if len(labels) < batch_count:
+            labels.extend([str(i) for i in range(len(labels), batch_count)])
+
+        positions, sparse_labels = _choose_sparse_xticks(batch_count, labels)
+
+        plot_major_metrics_together(
+            combined,
+            out,
+            title=f"{short_title}\n(Validation vs Training — Aggregated)",
+            xvals=positions,
+            xlabel="Aggregated samples",
+        )
+
+    # Per-batch plots
+    for metric_key, short_title, filename in COMPARISON_PLOT_METRICS:
+        combined = [
+            extract_comparison_for_plot(
+                batch_metrics_multi[i].get(metric_key, 0),
+                batch_metrics_multi_training[i].get(metric_key, 0),
+            )
+            for i in range(batch_count)
+        ]
+        out = os.path.join(batch_dir, filename.replace(".png", "_batch.png"))
+
+        # Safe x-labels
+        labels = [str(v) for v in stepping_total_sizes]
+        if len(labels) < batch_count:
+            labels.extend([str(i) for i in range(len(labels), batch_count)])
+
+        positions, sparse_labels = _choose_sparse_xticks(batch_count, labels)
+
+        plot_major_metrics_together(
+            combined,
+            out,
+            title=f"{short_title}\n(Validation vs Training — Per-batch)",
+            xvals=positions,
+            xlabel="Batch",
+        )
+
+
+def plot_comparison_metrics_for_series(
+    series_val,
+    series_train,
+    base_dir,
+    xvals,
+    start_index,
+    batch_count,
+    name_prefix,
+):
+    from base_utils import (
+        COMPARISON_PLOT_METRICS,
+        FN_RATE_METRIC,
+        FP_RATE_METRIC,
+        extract_comparison_for_plot,
+    )
+
+    if start_index is None:
+        print(f"Skipping comparison {name_prefix}: not enough batches")
+        return
+
+    ensure_dir(base_dir)
+    length = len(series_val)
+
+    # Main comparison metrics
+    for metric_key, short_title, base_filename in COMPARISON_PLOT_METRICS:
+        filename = base_filename.replace(".png", f"_{name_prefix}.png")
+        combined = [
+            extract_comparison_for_plot(
+                series_val[i].get(metric_key, 0),
+                series_train[i].get(metric_key, 0),
+            )
+            for i in range(length)
+        ]
+        out = os.path.join(base_dir, filename)
+
+        # Safe x-labels
+        labels = [str(v) for v in xvals]
+        if len(labels) < length:
+            labels.extend([str(i) for i in range(len(labels), length)])
+        positions, sparse_labels = _choose_sparse_xticks(length, labels)
+
+        plot_major_metrics_together(
+            combined,
+            out,
+            title=f"{short_title}\n(Validation vs Training — {name_prefix})",
+            xvals=positions,
+            xlabel="Batch",
+        )
+
+    # FN Rate
+    fn_metric_key, fn_title = FN_RATE_METRIC
+    fn_data = [
+        extract_comparison_for_plot(
+            series_val[i].get(fn_metric_key, 0),
+            series_train[i].get(fn_metric_key, 0),
+            f"Validation {fn_title}",
+            f"Training {fn_title}",
+        )
+        for i in range(length)
+    ]
+    out1 = os.path.join(base_dir, f"train_val_fn_rate_{name_prefix}.png")
+
+    labels = [str(v) for v in xvals]
+    if len(labels) < length:
+        labels.extend([str(i) for i in range(len(labels), length)])
+    positions, sparse_labels = _choose_sparse_xticks(length, labels)
+
+    plot_major_metrics_together(
+        fn_data,
+        out1,
+        title=f"{fn_title}\n(Validation vs Training — {name_prefix})",
+        xvals=positions,
+        xlabel="Batch",
+    )
+
+    # FP Rate
+    fp_metric_key, fp_title = FP_RATE_METRIC
+    fp_data = [
+        extract_comparison_for_plot(
+            series_val[i].get(fp_metric_key, 0),
+            series_train[i].get(fp_metric_key, 0),
+            f"Validation {fp_title}",
+            f"Training {fp_title}",
+        )
+        for i in range(length)
+    ]
+    out2 = os.path.join(base_dir, f"train_val_fp_rate_{name_prefix}.png")
+
+    labels = [str(v) for v in xvals]
+    if len(labels) < length:
+        labels.extend([str(i) for i in range(len(labels), length)])
+    positions, sparse_labels = _choose_sparse_xticks(length, labels)
+
+    plot_major_metrics_together(
+        fp_data,
+        out2,
+        title=f"{fp_title}\n(Validation vs Training — {name_prefix})",
+        xvals=positions,
+        xlabel="Batch",
+    )
+
+
+def plot_malware_fn_rate_comparison(
+    cumul_metrics_multi,
+    cumul_metrics_multi_training,
+    base_dir,
+    cumulative_total_sizes,
+    batch_count,
+):
+    from base_utils import FN_RATE_METRIC, extract_comparison_for_plot
+
+    agg_dir = ensure_dir(os.path.join(base_dir, "aggregated"))
+    fn_metric_key, fn_title = FN_RATE_METRIC
+
+    fn_rate_data = [
+        extract_comparison_for_plot(
+            cumul_metrics_multi[i].get(fn_metric_key, 0),
+            cumul_metrics_multi_training[i].get(fn_metric_key, 0),
+            f"Validation {fn_title}",
+            f"Training {fn_title}",
+        )
+        for i in range(batch_count)
+    ]
+    out = os.path.join(agg_dir, "train_val_fn_rate.png")
+    plot_major_metrics_together(
+        fn_rate_data,
+        out,
+        title=f"{fn_title}\n(Validation vs Training — Aggregated)",
+        xvals=cumulative_total_sizes,
+        xlabel="Aggregated samples",
+    )
+
+
+def plot_malware_fp_over_predicted_comparison(
+    cumul_metrics_multi,
+    cumul_metrics_multi_training,
+    base_dir,
+    cumulative_total_sizes,
+    batch_count,
+):
+    from base_utils import FP_RATE_METRIC, extract_comparison_for_plot
+
+    agg_dir = ensure_dir(os.path.join(base_dir, "aggregated"))
+    fp_metric_key, fp_title = FP_RATE_METRIC
+
+    fp_rate_data = [
+        extract_comparison_for_plot(
+            cumul_metrics_multi[i].get(fp_metric_key, 0),
+            cumul_metrics_multi_training[i].get(fp_metric_key, 0),
+            f"Validation {fp_title}",
+            f"Training {fp_title}",
+        )
+        for i in range(batch_count)
+    ]
+    out = os.path.join(agg_dir, "train_val_fp_rate.png")
+    plot_major_metrics_together(
+        fp_rate_data,
+        out,
+        title=f"{fp_title}\n(Validation vs Training — Aggregated)",
+        xvals=cumulative_total_sizes,
+        xlabel="Aggregated samples",
+    )
+
+
+def print_summary_section(lines, title, metrics_data):
+    lines.append(f"\n=== {title} ===")
+    lines.append(
+        f"Accuracy:             {metrics_data.get('accuracy', 0):.4f}"
+    )
+    lines.append(
+        f"Malware F1:           {metrics_data.get('malware_f1', 0):.4f}"
+    )
+    lines.append(
+        f"Malware FPR:          {metrics_data.get('malware_fpr', 0):.4f}"
+    )
+    lines.append(
+        f"Malware FNR:          {metrics_data.get('malware_fnr', 0):.4f}"
+    )
+    lines.append(
+        f"Macro F1:             {metrics_data.get('macro_f1', 0):.4f}"
+    )
+    lines.append(
+        f"Precision:            {metrics_data.get('malware_precision', 0):.4f}"
+    )
+    lines.append(
+        f"Recall:               {metrics_data.get('malware_recall', 0):.4f}"
+    )
+    lines.append(f"MCC:                  {metrics_data.get('MCC', 0):.4f}")
+
+
+def print_per_class_table(lines, title, cum_metrics_per_class):
+    lines.append(f"\n=== {title} ===")
+    lines.append(
+        f"{'Class':<15} {'TP':>8} {'TN':>8} {'FP':>8} {'FN':>8} {'Acc':>8} {'Prec':>8} {'Rec':>8} {'F1':>8}"
+    )
+    for cls, m in cum_metrics_per_class.items():
+        lines.append(
+            f"{cls:<15} {m.get('TP', 0):8d} {m.get('TN', 0):8d} {m.get('FP', 0):8d} {m.get('FN', 0):8d} {m.get('accuracy', 0.0):8.4f} {m.get('precision', 0.0):8.4f} {m.get('recall', 0.0):8.4f} {m.get('f1', 0.0):8.4f}"
+        )
+
+
+def ensure_plot_subdirs(base_dir):
+    subs = {}
+    for name in ["per_batch", "aggregated", "last5", "last10", "last20"]:
+        p = ensure_dir(os.path.join(base_dir, name))
+        subs[name] = p
+    return subs
+
+
+def _plot_lastk_class_counts(series_per_class_k, outpath, title, xlabels=None):
+    if not series_per_class_k:
+        print(f"No class-counts to plot for {outpath}")
+        return
+    counts_series = []
+    for entry in series_per_class_k:
+        counts_series.append(
+            {
+                cls: int(entry[cls].get("TP", 0) + entry[cls].get("FN", 0))
+                for cls in entry.keys()
+            }
+        )
+    plot_counts_series(
+        counts_series, outpath, title=title, xlabels=xlabels, xlabel="Batch"
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Plot training performance metrics."
+    )
+    parser.add_argument(
+        "-f",
+        "--file",
+        required=True,
+        help="Path to training log file or directory",
+    )
+    parser.add_argument(
+        "-e", "--exp", required=True, help="Experiment identifier"
+    )
+    parser.add_argument(
+        "--save_folder", required=False, help="Output folder", default=None
+    )
+    args = parser.parse_args()
+
+    save_folder = args.save_folder
+    if save_folder is not None:
+        if not os.path.isdir(save_folder):
+            raise NotADirectoryError(
+                f"Output folder does not exist: {save_folder}"
+            )
+        base_dir = ensure_dir(save_folder)
+    else:
+        base_dir = ensure_dir("performance_metrics")
+
+    file_path = resolve_training_log_path(args.file)
+
+    folder_dir = ensure_dir(os.path.join(base_dir, "training", args.exp))
+    # print(f"[INFO] Output folder: {folder_dir}")
+
+    entries = read_all_batches(file_path)
+    if not entries:
+        print("[ERROR] No entries parsed; exiting.")
+        return
+
+    has_validation_data = any(
+        ("per_class" in e and bool(e["per_class"]))
+        or ("validation_per_class" in e and bool(e["validation_per_class"]))
+        or (e.get("testing_size", 0) > 0)
+        for e in entries
+    )
+
+    if has_validation_data:
+        (
+            batch_metrics_per_class,
+            batch_metrics_multi,
+            cumul_metrics_multi,
+            cumul_metrics_per_class,
+            batch_metrics_per_class_training,
+            batch_metrics_multi_training,
+            cumul_metrics_multi_training,
+            cumul_metrics_per_class_training,
+        ) = accumulate_metrics(entries, has_validation_data)
+    else:
+        (
+            batch_metrics_per_class,
+            batch_metrics_multi,
+            cumul_metrics_multi,
+            cumul_metrics_per_class,
+        ) = accumulate_metrics(entries, has_validation_data)
+
+    if has_validation_data:
+        validation_dir = ensure_dir(os.path.join(folder_dir, "validation"))
+        train_dir = ensure_dir(os.path.join(folder_dir, "training"))
+        comparison_dir = ensure_dir(os.path.join(folder_dir, "comparison"))
+        print(f"Validation plots will be saved to: {validation_dir}")
+        print(f"Training plots will be saved to: {train_dir}")
+        print(f"Comparison plots will be saved to: {comparison_dir}")
+    else:
+        train_dir = ensure_dir(os.path.join(folder_dir, "training"))
+        validation_dir = None
+        comparison_dir = None
+        print(f"Training plots will be saved to: {train_dir}")
+
+    def get_dir(dt):
+        if dt == "validation" and validation_dir is not None:
+            return validation_dir
+        elif dt == "training":
+            return train_dir
+        else:
+            return train_dir
+
+    class_names = list(entries[0]["training_predicted"].keys())
+    batch_count = len(entries)
+
+    # ensure subdirs
+    if validation_dir:
+        ensure_plot_subdirs(validation_dir)
+    ensure_plot_subdirs(train_dir)
+    if comparison_dir:
+        ensure_plot_subdirs(comparison_dir)
+
+    # class counts
+    batch_class_counts, cumul_class_counts_per_batch = calculate_class_counts(
+        entries, "per_class", class_names
+    )
+    plot_counts_series(
+        batch_class_counts,
+        os.path.join(
+            get_dir("validation" if has_validation_data else "training"),
+            "per_batch",
+            f"class_counts_batch_{'validation' if has_validation_data else 'training'}.png",
+        ),
+        title="Per-batch class counts\n(Number of samples per batch)",
+        xlabels=[str(i) for i in range(batch_count)],
+        xlabel="Batch",
+    )
+    plot_counts_series(
+        cumul_class_counts_per_batch,
+        os.path.join(
+            get_dir("validation" if has_validation_data else "training"),
+            "aggregated",
+            f"class_counts_aggregated_{'validation' if has_validation_data else 'training'}.png",
+        ),
+        title="Aggregated class counts\n(Total samples seen so far)",
+        xlabels=[str(i) for i in range(batch_count)],
+        xlabel="Batch",
+    )
+
+    if has_validation_data:
+        batch_class_counts_training, cumul_class_counts_training_per_batch = (
+            calculate_class_counts(entries, "training_per_class", class_names)
+        )
+        plot_counts_series(
+            batch_class_counts_training,
+            os.path.join(
+                get_dir("training"),
+                "per_batch",
+                "class_counts_batch_training.png",
+            ),
+            title="Per-batch class counts (Training)",
+            xlabels=[str(i) for i in range(batch_count)],
+            xlabel="Batch",
+        )
+        plot_counts_series(
+            cumul_class_counts_training_per_batch,
+            os.path.join(
+                get_dir("training"),
+                "aggregated",
+                "class_counts_aggregated_training.png",
+            ),
+            title="Aggregated class counts (Training)",
+            xlabels=[str(i) for i in range(batch_count)],
+            xlabel="Batch",
+        )
+
+    # stepping sizes
+    if has_validation_data:
+        cumulative_sizes = []
+        total = 0
+        for entry in entries:
+            size = entry.get("testing_size", 0)
+            total += size
+            cumulative_sizes.append(total)
+        stepping_sizes = get_stepping_sizes(
+            entries, batch_count, "testing_size"
+        )
+    else:
+        cumulative_sizes = []
+        total = 0
+        for entry in entries:
+            size = entry.get("training_size", entry.get("testing_size", 0))
+            total += size
+            cumulative_sizes.append(total)
+        stepping_sizes = get_stepping_sizes(
+            entries, batch_count, "training_size"
+        )
+
+    # malware & accuracy plots
+    plot_malware_metrics(
+        batch_metrics_multi,
+        os.path.join(
+            get_dir("validation" if has_validation_data else "training"),
+            "per_batch",
+            f"malware_metrics_batch_{'validation' if has_validation_data else 'training'}.png",
+        ),
+        f"Malware metrics (per-batch)\n({'Validation' if has_validation_data else 'Training'})",
+        stepping_sizes,
+        "Batch",
+    )
+    plot_malware_metrics(
+        cumul_metrics_multi,
+        os.path.join(
+            get_dir("validation" if has_validation_data else "training"),
+            "aggregated",
+            f"malware_metrics_aggregated_{'validation' if has_validation_data else 'training'}.png",
+        ),
+        f"Malware metrics (Aggregated)\n({'Validation' if has_validation_data else 'Training'})",
+        xvals=cumulative_sizes,
+        xlabel="Aggregated samples",
+    )
+    plot_accuracy_metrics(
+        batch_metrics_multi,
+        os.path.join(
+            get_dir("validation" if has_validation_data else "training"),
+            "per_batch",
+            f"accuracy_batch_{'validation' if has_validation_data else 'training'}.png",
+        ),
+        f"Benign-Malicious Acc (per-batch)\n({'Validation' if has_validation_data else 'Training'})",
+        stepping_sizes,
+        "Batch",
+    )
+    plot_accuracy_metrics(
+        cumul_metrics_multi,
+        os.path.join(
+            get_dir("validation" if has_validation_data else "training"),
+            "aggregated",
+            f"accuracy_aggregated_{'validation' if has_validation_data else 'training'}.png",
+        ),
+        f"Benign-Malicious Acc (Aggregated)\n({'Validation' if has_validation_data else 'Training'})",
+        xvals=cumulative_sizes,
+        xlabel="Aggregated samples",
+    )
+
+    # sliding windows and last-k plots
+    def make_lastk_and_plot(
+        k, per_class_batch, multi_batch, base_dir, stepping_sizes_all, label
+    ):
+        series_per_class_k, series_multi_k, start_idx = (
+            sliding_window_aggregated(
+                per_class_batch, class_names, k, trim_to_full_window=True
+            )
+        )
+        if start_idx is None or len(series_multi_k) == 0:
+            print(
+                f"[INFO] Not enough batches for last-{k} ({label}), skipping."
+            )
+            return None, None, None
+        n = len(per_class_batch)
+        xvals = list(range(start_idx, n))
+        folder = os.path.join(base_dir, f"last{k}")
+        ensure_dir(folder)
+        plot_malware_metrics(
+            series_multi_k,
+            os.path.join(folder, f"malware_metrics_last{k}_{label}.png"),
+            f"Malware metrics (last-{k})\n({label})",
+            xvals,
+            "Batch",
+        )
+        plot_accuracy_metrics(
+            series_multi_k,
+            os.path.join(folder, f"accuracy_last{k}_{label}.png"),
+            f"Benign-Malicious Acc (last-{k})\n({label})",
+            xvals,
+            "Batch",
+        )
+        _plot_lastk_class_counts(
+            series_per_class_k,
+            os.path.join(folder, f"class_counts_last{k}_{label}.png"),
+            title=f"Aggregated class counts (last-{k})\n({label})",
+            xlabels=[str(i) for i in xvals],
+        )
+        return series_per_class_k, series_multi_k, start_idx
+
+    label_val = "validation" if has_validation_data else "training"
+    last5_per_class_val, last5_multi_val, last5_start = make_lastk_and_plot(
+        5,
+        batch_metrics_per_class,
+        batch_metrics_multi,
+        get_dir("validation" if has_validation_data else "training"),
+        stepping_sizes,
+        label_val,
+    )
+    last10_per_class_val, last10_multi_val, last10_start = make_lastk_and_plot(
+        10,
+        batch_metrics_per_class,
+        batch_metrics_multi,
+        get_dir("validation" if has_validation_data else "training"),
+        stepping_sizes,
+        label_val,
+    )
+    last20_per_class_val, last20_multi_val, last20_start = make_lastk_and_plot(
+        20,
+        batch_metrics_per_class,
+        batch_metrics_multi,
+        get_dir("validation" if has_validation_data else "training"),
+        stepping_sizes,
+        label_val,
+    )
+
+    # training-specific plots
+    if has_validation_data:
+        cumulative_training_sizes = []
+        total_training = 0
+        for entry in entries:
+            size = entry.get("training_size", 0)
+            total_training += size
+            cumulative_training_sizes.append(total_training)
+        stepping_training_sizes = get_stepping_sizes(
+            entries, batch_count, "training_size"
+        )
+
+        plot_malware_metrics(
+            batch_metrics_multi_training,
+            os.path.join(
+                get_dir("training"),
+                "per_batch",
+                "malware_metrics_batch_training.png",
+            ),
+            "Malware metrics (per-batch)\n(Training)",
+            stepping_training_sizes,
+            "Batch",
+        )
+        plot_malware_metrics(
+            cumul_metrics_multi_training,
+            os.path.join(
+                get_dir("training"),
+                "aggregated",
+                "malware_metrics_aggregated_training.png",
+            ),
+            "Malware metrics (Aggregated)\n(Training)",
+            xvals=cumulative_training_sizes,
+            xlabel="Aggregated samples",
+        )
+        plot_accuracy_metrics(
+            batch_metrics_multi_training,
+            os.path.join(
+                get_dir("training"), "per_batch", "accuracy_batch_training.png"
+            ),
+            "Benign-Malicious Acc (per-batch)\n(Training)",
+            stepping_training_sizes,
+            "Batch",
+        )
+        plot_accuracy_metrics(
+            cumul_metrics_multi_training,
+            os.path.join(
+                get_dir("training"),
+                "aggregated",
+                "accuracy_aggregated_training.png",
+            ),
+            "Benign-Malicious Acc (Aggregated)\n(Training)",
+            xvals=cumulative_training_sizes,
+            xlabel="Aggregated samples",
+        )
+
+        last5_per_class_train, last5_multi_train, last5_start_train = (
+            make_lastk_and_plot(
+                5,
+                batch_metrics_per_class_training,
+                batch_metrics_multi_training,
+                get_dir("training"),
+                stepping_training_sizes,
+                "training",
+            )
+        )
+        last10_per_class_train, last10_multi_train, last10_start_train = (
+            make_lastk_and_plot(
+                10,
+                batch_metrics_per_class_training,
+                batch_metrics_multi_training,
+                get_dir("training"),
+                stepping_training_sizes,
+                "training",
+            )
+        )
+        last20_per_class_train, last20_multi_train, last20_start_train = (
+            make_lastk_and_plot(
+                20,
+                batch_metrics_per_class_training,
+                batch_metrics_multi_training,
+                get_dir("training"),
+                stepping_training_sizes,
+                "training",
+            )
+        )
+
+        # comparison x-axis
+        batch_total_sizes = [
+            entry.get("training_size", 0) + entry.get("testing_size", 0)
+            for entry in entries
+        ]
+        if batch_count < 10:
+            stepping_total_sizes = [
+                f"{i}\n{size}" for i, size in enumerate(batch_total_sizes)
+            ]
+        else:
+            stepping_total_sizes = get_stepping_sizes(
+                [{"dummy": 0}] * batch_count, batch_count, "dummy"
+            )
+
+        cumulative_total_sizes = []
+        total_so_far = 0
+        for size in batch_total_sizes:
+            total_so_far += size
+            cumulative_total_sizes.append(total_so_far)
+
+        plot_comparison_metrics(
+            batch_metrics_multi,
+            cumul_metrics_multi,
+            batch_metrics_multi_training,
+            cumul_metrics_multi_training,
+            comparison_dir,
+            stepping_total_sizes,
+            cumulative_total_sizes,
+            batch_count,
+        )
+        plot_malware_fn_rate_comparison(
+            cumul_metrics_multi,
+            cumul_metrics_multi_training,
+            comparison_dir,
+            cumulative_total_sizes,
+            batch_count,
+        )
+        plot_malware_fp_over_predicted_comparison(
+            cumul_metrics_multi,
+            cumul_metrics_multi_training,
+            comparison_dir,
+            cumulative_total_sizes,
+            batch_count,
+        )
+
+        # comparison for last-k
+        def maybe_plot_compare(
+            last_multi_val,
+            last_start_val,
+            last_multi_train,
+            last_start_train,
+            kname,
+        ):
+            if (
+                last_multi_val
+                and last_start_val is not None
+                and last_multi_train
+                and last_start_train is not None
+            ):
+                start = max(last_start_val, last_start_train)
+                offset_val = start - last_start_val
+                offset_train = start - last_start_train
+                len_val = len(last_multi_val) - offset_val
+                len_train = len(last_multi_train) - offset_train
+                common_len = min(len_val, len_train)
+                if common_len <= 0:
+                    print(f"No overlapping region for {kname} comparison.")
+                    return
+                slice_val = last_multi_val[
+                    offset_val : offset_val + common_len
+                ]
+                slice_train = last_multi_train[
+                    offset_train : offset_train + common_len
+                ]
+                xvals = list(range(start, start + common_len))
+                base = os.path.join(comparison_dir, kname)
+                plot_comparison_metrics_for_series(
+                    slice_val,
+                    slice_train,
+                    base,
+                    xvals,
+                    start,
+                    common_len,
+                    kname,
+                )
+
+        maybe_plot_compare(
+            last5_multi_val,
+            last5_start,
+            last5_multi_train,
+            last5_start_train,
+            "last5",
+        )
+        maybe_plot_compare(
+            last10_multi_val,
+            last10_start,
+            last10_multi_train,
+            last10_start_train,
+            "last10",
+        )
+        maybe_plot_compare(
+            last20_multi_val,
+            last20_start,
+            last20_multi_train,
+            last20_start_train,
+            "last20",
+        )
+
+    # summary
+    lines = []
+    if has_validation_data:
+        print_summary_section(
+            lines,
+            "VALIDATION Multi-class (Aggregated)",
+            cumul_metrics_multi[-1],
+        )
+        print_summary_section(
+            lines,
+            "TRAINING Multi-class (Aggregated)",
+            cumul_metrics_multi_training[-1],
+        )
+        print_per_class_table(
+            lines,
+            "Per-class metrics (Aggregated) - VALIDATION",
+            cumul_metrics_per_class[-1],
+        )
+        print_per_class_table(
+            lines,
+            "Per-class metrics (Aggregated) - TRAINING",
+            cumul_metrics_per_class_training[-1],
+        )
+    else:
+        print_summary_section(
+            lines, "TRAINING Multi-class (Aggregated)", cumul_metrics_multi[-1]
+        )
+        print_per_class_table(
+            lines,
+            "Per-class metrics (Aggregated) - TRAINING",
+            cumul_metrics_per_class[-1],
+        )
+
+    lines.append(f"\nSummary for Experiment {args.exp}:")
+    lines.append(f"Total batches processed: {batch_count}")
+    lines.append(
+        "Data type: Training/Validation split"
+        if has_validation_data
+        else "Data type: Training only"
+    )
+
+    summary_txt = "\n".join(lines)
+    summary_path = os.path.join(folder_dir, "summary.txt")
+    with open(summary_path, "w") as f:
+        f.write(summary_txt)
+    # print(f"[SAVED] {summary_path}")
+    print(summary_txt)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/slips_files/common/parsers/config_parser.py b/slips_files/common/parsers/config_parser.py
index 2bd7fb5e5d..0155dee057 100644
--- a/slips_files/common/parsers/config_parser.py
+++ b/slips_files/common/parsers/config_parser.py
@@ -6,6 +6,7 @@
 import ipaddress
 from typing import (
     List,
+    Optional,
     Union,
 )
 import yaml
@@ -479,6 +480,226 @@ def data_exfiltration_threshold(self):
     def get_ml_mode(self):
         return self.read_configuration("flowmldetection", "mode", "test")
 
+    @staticmethod
+    def _to_bool(value, default: bool) -> bool:
+        if isinstance(value, bool):
+            return value
+        if value is None:
+            return default
+        if isinstance(value, (int, float)):
+            return bool(value)
+        text = str(value).strip().lower()
+        if text in {"1", "true", "yes", "y", "on"}:
+            return True
+        if text in {"0", "false", "no", "n", "off"}:
+            return False
+        return default
+
+    def ml_module_mode(self, section: str, default: str = "test") -> str:
+        value = self.read_configuration(section, "mode", default)
+        value = str(value).strip().lower()
+        if value not in ("train", "test"):
+            return default
+        return value
+
+    def ml_module_enable_logs(
+        self, section: str, default: bool = False
+    ) -> bool:
+        value = self.read_configuration(
+            section,
+            "create_performance_metrics_log_files",
+            default,
+        )
+        return self._to_bool(value, default)
+
+    def ml_module_validate_on_train(
+        self,
+        section: str,
+        default: bool = True,
+    ) -> bool:
+        value = self.read_configuration(section, "validate_on_train", default)
+        return self._to_bool(value, default)
+
+    def ml_module_validation_percentage(
+        self,
+        section: str,
+        default: float = 0.1,
+    ) -> float:
+        value = self.read_configuration(
+            section, "validation_percentage", default
+        )
+        try:
+            value = float(value)
+        except (TypeError, ValueError):
+            value = default
+
+        if value > 1.0:
+            value = value / 100.0
+
+        return min(max(value, 0.0), 0.9)
+
+    def ml_module_training_batch_size(
+        self,
+        section: str,
+        default: int = 50,
+    ) -> int:
+        value = self.read_configuration(
+            section, "training_batch_size", default
+        )
+        try:
+            value = int(value)
+        except (TypeError, ValueError):
+            value = default
+        return max(1, value)
+
+    def ml_module_seed(
+        self,
+        section: str,
+        default: int = 1111,
+    ) -> int:
+        value = self.read_configuration(section, "seed", default)
+        try:
+            value = int(value)
+        except (TypeError, ValueError):
+            value = default
+        return value
+
+    def ml_module_train_from_scratch(
+        self,
+        section: str,
+        default: bool = False,
+    ) -> bool:
+        value = self.read_configuration(section, "train_from_scratch", default)
+        return self._to_bool(value, default)
+
+    def ml_module_log_suffix(self, section: str, default: str) -> str:
+        value = self.read_configuration(section, "log_suffix", default)
+        return str(value).strip()
+
+    def ml_module_test_log_batch_size(
+        self,
+        section: str,
+        default: int,
+    ) -> int:
+        value = self.read_configuration(
+            section, "test_log_batch_size", default
+        )
+        try:
+            value = int(value)
+        except (TypeError, ValueError):
+            value = default
+        return max(1, value)
+
+    def ml_module_model_load_path(self, section: str, default: str) -> str:
+        return str(
+            self.read_configuration(section, "model_load_path", default)
+        ).strip()
+
+    def ml_module_model_store_path(self, section: str, default: str) -> str:
+        return str(
+            self.read_configuration(section, "model_store_path", default)
+        ).strip()
+
+    def ml_module_preprocess_load_path(
+        self, section: str, default: str
+    ) -> str:
+        return str(
+            self.read_configuration(section, "preprocess_load_path", default)
+        ).strip()
+
+    def ml_module_preprocess_store_path(
+        self, section: str, default: str
+    ) -> str:
+        return str(
+            self.read_configuration(section, "preprocess_store_path", default)
+        ).strip()
+
+    def ml_module_pca_n_components(
+        self,
+        section: str,
+        default: Optional[int] = None,
+    ) -> Optional[int]:
+        value = self.read_configuration(section, "pca_n_components", default)
+        if value in (None, "", "null", "None"):
+            return None
+        try:
+            n_components = int(value)
+        except (TypeError, ValueError):
+            return default
+        return max(1, n_components)
+
+    def ml_module_pca_batch_size(
+        self,
+        section: str,
+        default: int,
+    ) -> int:
+        value = self.read_configuration(section, "pca_batch_size", default)
+        try:
+            value = int(value)
+        except (TypeError, ValueError):
+            value = default
+        return max(1, value)
+
+    def ml_module_pca_load_path(self, section: str, default: str) -> str:
+        return str(
+            self.read_configuration(section, "pca_load_path", default)
+        ).strip()
+
+    def ml_module_pca_store_path(self, section: str, default: str) -> str:
+        return str(
+            self.read_configuration(section, "pca_store_path", default)
+        ).strip()
+
+    def ml_module_benign_target_value(
+        self,
+        section: str,
+        default: float = 0.0,
+    ) -> float:
+        value = self.read_configuration(section, "benign_target_value", None)
+        if value is None:
+            value = self.read_configuration(
+                "flowmldetection",
+                "benign_target_value",
+                default,
+            )
+        try:
+            return float(value)
+        except (TypeError, ValueError):
+            return default
+
+    def ml_module_malicious_target_value(
+        self,
+        section: str,
+        default: float = 1.0,
+    ) -> float:
+        value = self.read_configuration(
+            section, "malicious_target_value", None
+        )
+        if value is None:
+            value = self.read_configuration(
+                "flowmldetection",
+                "malicious_target_value",
+                default,
+            )
+        try:
+            return float(value)
+        except (TypeError, ValueError):
+            return default
+
+    # Legacy flowmldetection wrappers kept for compatibility.
+    def create_performance_metrics_log_files(self) -> bool:
+        return self.ml_module_enable_logs("flowmldetection", default=False)
+
+    def validate_on_train(self) -> bool:
+        return self.ml_module_validate_on_train(
+            "flowmldetection", default=True
+        )
+
+    def flow_ml_detection_training_batch_size(self) -> int:
+        return self.ml_module_training_batch_size(
+            "flowmldetection", default=50
+        )
+
     def https_anomaly_training_hours(self) -> int:
         training_hours = self.read_configuration(
             "anomaly_detection_https", "training_hours", 24

From 79d24fcb75421e233424f8c7313b7d4c8db9e8f4 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 15:30:10 +0000
Subject: [PATCH 04/26] unit-tests for all new modules and base class

---
 .../ml_models/test_ml_base_detection.py       | 112 ++++++++
 .../modules/ml_models/test_ml_linear_model.py |  62 ++++
 .../unit/modules/ml_models/test_ml_modules.py | 265 ++++++++++++++++++
 .../modules/ml_models/test_ml_online_model.py |  86 ++++++
 4 files changed, 525 insertions(+)
 create mode 100644 tests/unit/modules/ml_models/test_ml_base_detection.py
 create mode 100644 tests/unit/modules/ml_models/test_ml_linear_model.py
 create mode 100644 tests/unit/modules/ml_models/test_ml_modules.py
 create mode 100644 tests/unit/modules/ml_models/test_ml_online_model.py

diff --git a/tests/unit/modules/ml_models/test_ml_base_detection.py b/tests/unit/modules/ml_models/test_ml_base_detection.py
new file mode 100644
index 0000000000..836bf5eaf8
--- /dev/null
+++ b/tests/unit/modules/ml_models/test_ml_base_detection.py
@@ -0,0 +1,112 @@
+import numpy
+import pandas as pd
+import pytest
+
+from slips_files.common.abstracts.ml_module_base import (
+    BENIGN,
+    MALICIOUS,
+    MLBaseDetection,
+)
+
+
+class _DummyBaseModule(MLBaseDetection):
+    name = "dummy_ml"
+    module_key = "dummy_ml"
+    module_config_section = "dummy_ml"
+
+    def get_default_artifact_paths(self):
+        return "", "", "", ""
+
+    def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
+        return dataset
+
+    def create_empty_model(self):
+        return object()
+
+    def create_empty_preprocessor(self):
+        return object()
+
+    def update_preprocessor(self, x_train: pd.DataFrame):
+        return None
+
+    def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
+        return x_data.to_numpy(dtype=float)
+
+    def fit_incremental_model(self, x_train, y_train, classes=None):
+        self.fit_calls.append(
+            {
+                "x_train": x_train,
+                "y_train": numpy.asarray(y_train),
+                "classes": classes,
+            }
+        )
+
+    def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
+        return numpy.asarray([BENIGN] * len(x_data))
+
+    def is_preprocessor_initialized(self) -> bool:
+        return True
+
+    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
+        return None
+
+    def run_test_on_flow(self, flow: dict):
+        return None
+
+
+@pytest.fixture
+def base_module():
+    module = _DummyBaseModule.__new__(_DummyBaseModule)
+    module.flows = pd.DataFrame(
+        {
+            "dur": [1.0, 2.0],
+            "proto": [0.0, 1.0],
+            "sport": [10.0, 11.0],
+            "dport": [80.0, 443.0],
+            "spkts": [1.0, 1.0],
+            "dpkts": [1.0, 1.0],
+            "sbytes": [100.0, 200.0],
+            "dbytes": [50.0, 70.0],
+            "state": [1.0, 1.0],
+            "bytes": [150.0, 270.0],
+            "pkts": [2.0, 2.0],
+            "ground_truth_label": [BENIGN, MALICIOUS],
+        }
+    )
+    module.ground_truth_config_label = BENIGN
+    module.validate_on_train = False
+    module.percentage_validation = 0.1
+    module.rng = numpy.random.default_rng(123)
+    module.classifier_initialized = False
+    module.fit_calls = []
+    module.print = lambda *args, **kwargs: None
+    module._debug_training_dataframe = lambda *args, **kwargs: None
+    module.store_training_results = lambda **kwargs: None
+    module.write_to_log = lambda *args, **kwargs: None
+    module.labeled_counter = 0
+    module.training_flows = []
+    module.last_number_of_flows_when_trained = 0
+    module.preprocessor = object()
+    return module
+
+
+class TestMLBaseModule:
+    def test_drop_labels_removes_known_label_columns(self, base_module):
+        raw = pd.DataFrame(
+            {
+                "dur": [1.0],
+                "ground_truth_label": [BENIGN],
+                "detailed_ground_truth_label": [BENIGN],
+                "label": [BENIGN],
+                "module_labels": [{"m": BENIGN}],
+            }
+        )
+        cleaned = base_module.drop_labels(raw)
+        assert list(cleaned.columns) == ["dur"]
+
+    def test_train_default_passes_both_classes_on_first_fit(self, base_module):
+        base_module._train_default(
+            sum_labeled_flows=2, last_number_of_flows_when_trained=0
+        )
+        assert len(base_module.fit_calls) == 1
+        assert base_module.fit_calls[0]["classes"] == [MALICIOUS, BENIGN]
diff --git a/tests/unit/modules/ml_models/test_ml_linear_model.py b/tests/unit/modules/ml_models/test_ml_linear_model.py
new file mode 100644
index 0000000000..4d1812259f
--- /dev/null
+++ b/tests/unit/modules/ml_models/test_ml_linear_model.py
@@ -0,0 +1,62 @@
+import numpy
+import pytest
+from slips_files.common.abstracts.ml_module_base import BENIGN, MALICIOUS
+from modules.ml_linear_model.ml_linear_model import MLLinearModel
+
+
+class _DummySklearnClassifier:
+    __module__ = "sklearn.linear_model"
+
+    def __init__(self):
+        self.calls = []
+        self._predictions = numpy.asarray([MALICIOUS, BENIGN])
+
+    def partial_fit(self, x_train, y_train, classes=None):
+        self.calls.append(
+            {
+                "x_train": x_train,
+                "y_train": numpy.asarray(y_train),
+                "classes": classes,
+            }
+        )
+
+    def predict(self, x_data):
+        return self._predictions[: len(x_data)]
+
+
+@pytest.fixture
+def linear_model():
+    model = MLLinearModel.__new__(MLLinearModel)
+    model.benign_target_value = 0.0
+    model.malicious_target_value = 1.0
+    model._label_to_target = {BENIGN: 0.0, MALICIOUS: 1.0}
+    model.clf = _DummySklearnClassifier()
+    return model
+
+
+class TestMLLinearModelLabels:
+    def test_linear_model_fit_uses_categorical_targets_for_sklearn(
+        self, linear_model
+    ):
+        x_train = numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        y_train = numpy.array([BENIGN, MALICIOUS], dtype=object)
+        linear_model.fit_incremental_model(
+            x_train, y_train, classes=[MALICIOUS, BENIGN]
+        )
+        assert len(linear_model.clf.calls) == 1
+        assert linear_model.clf.calls[0]["y_train"].tolist() == [
+            BENIGN,
+            MALICIOUS,
+        ]
+        assert list(linear_model.clf.calls[0]["classes"]) == [
+            MALICIOUS,
+            BENIGN,
+        ]
+
+    def test_linear_model_prediction_returns_canonical_labels(
+        self, linear_model
+    ):
+        preds = linear_model.predict_batch(
+            numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        )
+        assert preds.tolist() == [MALICIOUS, BENIGN]
diff --git a/tests/unit/modules/ml_models/test_ml_modules.py b/tests/unit/modules/ml_models/test_ml_modules.py
new file mode 100644
index 0000000000..8c17086b48
--- /dev/null
+++ b/tests/unit/modules/ml_models/test_ml_modules.py
@@ -0,0 +1,265 @@
+import numpy
+import pandas as pd
+import pytest
+
+from slips_files.common.abstracts.ml_module_base import (
+    BENIGN,
+    MALICIOUS,
+    MLBaseDetection,
+)
+from modules.ml_online_model.ml_online_model import MLOnlineModel
+from modules.ml_linear_model.ml_linear_model import MLLinearModel
+
+
+class _DummyBaseModule(MLBaseDetection):
+    name = "dummy_ml"
+    module_key = "dummy_ml"
+    module_config_section = "dummy_ml"
+
+    def get_default_artifact_paths(self):
+        return "", "", "", ""
+
+    def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
+        return dataset
+
+    def create_empty_model(self):
+        return object()
+
+    def create_empty_preprocessor(self):
+        return object()
+
+    def update_preprocessor(self, x_train: pd.DataFrame):
+        return None
+
+    def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
+        return x_data.to_numpy(dtype=float)
+
+    def fit_incremental_model(self, x_train, y_train, classes=None):
+        self.fit_calls.append(
+            {
+                "x_train": x_train,
+                "y_train": numpy.asarray(y_train),
+                "classes": classes,
+            }
+        )
+
+    def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
+        return numpy.asarray([BENIGN] * len(x_data))
+
+    def is_preprocessor_initialized(self) -> bool:
+        return True
+
+    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
+        return None
+
+    def run_test_on_flow(self, flow: dict):
+        return None
+
+
+class _DummyOnlineClassifierNumeric:
+    def __init__(self):
+        self.learned_targets = []
+        self.predictions = [1.0, 0.0]
+
+    def _target_transform(self, y):
+        return float(y)
+
+    def learn_one(self, x, y):
+        self.learned_targets.append(y)
+
+    def predict_one(self, x):
+        return self.predictions.pop(0)
+
+
+class _DummyOnlineClassifierCategorical:
+    def __init__(self):
+        self.learned_targets = []
+
+    def _target_transform(self, y):
+        return y
+
+    def learn_one(self, x, y):
+        self.learned_targets.append(y)
+
+    def predict_one(self, x):
+        return MALICIOUS
+
+
+class _DummySklearnClassifier:
+    __module__ = "sklearn.linear_model"
+
+    def __init__(self):
+        self.calls = []
+        self._predictions = numpy.asarray([MALICIOUS, BENIGN])
+
+    def partial_fit(self, x_train, y_train, classes=None):
+        self.calls.append(
+            {
+                "x_train": x_train,
+                "y_train": numpy.asarray(y_train),
+                "classes": classes,
+            }
+        )
+
+    def predict(self, x_data):
+        return self._predictions[: len(x_data)]
+
+
+@pytest.fixture
+def base_module():
+    module = _DummyBaseModule.__new__(_DummyBaseModule)
+    module.flows = pd.DataFrame(
+        {
+            "dur": [1.0, 2.0],
+            "proto": [0.0, 1.0],
+            "sport": [10.0, 11.0],
+            "dport": [80.0, 443.0],
+            "spkts": [1.0, 1.0],
+            "dpkts": [1.0, 1.0],
+            "sbytes": [100.0, 200.0],
+            "dbytes": [50.0, 70.0],
+            "state": [1.0, 1.0],
+            "bytes": [150.0, 270.0],
+            "pkts": [2.0, 2.0],
+            "ground_truth_label": [BENIGN, MALICIOUS],
+        }
+    )
+    module.ground_truth_config_label = BENIGN
+    module.validate_on_train = False
+    module.percentage_validation = 0.1
+    module.rng = numpy.random.default_rng(123)
+    module.classifier_initialized = False
+    module.fit_calls = []
+    module.print = lambda *args, **kwargs: None
+    module._debug_training_dataframe = lambda *args, **kwargs: None
+    module.store_training_results = lambda **kwargs: None
+    module.write_to_log = lambda *args, **kwargs: None
+    module.labeled_counter = 0
+    module.training_flows = []
+    module.last_number_of_flows_when_trained = 0
+    module.preprocessor = object()
+    return module
+
+
+@pytest.fixture
+def online_model_numeric():
+    model = MLOnlineModel.__new__(MLOnlineModel)
+    model.benign_target_value = 0.0
+    model.malicious_target_value = 1.0
+    model._label_to_target = {BENIGN: 0.0, MALICIOUS: 1.0}
+    model.clf = _DummyOnlineClassifierNumeric()
+    return model
+
+
+@pytest.fixture
+def online_model_categorical():
+    model = MLOnlineModel.__new__(MLOnlineModel)
+    model.benign_target_value = 0.0
+    model.malicious_target_value = 1.0
+    model._label_to_target = {BENIGN: 0.0, MALICIOUS: 1.0}
+    model.clf = _DummyOnlineClassifierCategorical()
+    return model
+
+
+@pytest.fixture
+def linear_model():
+    model = MLLinearModel.__new__(MLLinearModel)
+    model.benign_target_value = 0.0
+    model.malicious_target_value = 1.0
+    model._label_to_target = {BENIGN: 0.0, MALICIOUS: 1.0}
+    model.clf = _DummySklearnClassifier()
+    return model
+
+
+class TestMLBaseModule:
+    def test_drop_labels_removes_known_label_columns(self, base_module):
+        raw = pd.DataFrame(
+            {
+                "dur": [1.0],
+                "ground_truth_label": [BENIGN],
+                "detailed_ground_truth_label": [BENIGN],
+                "label": [BENIGN],
+                "module_labels": [{"m": BENIGN}],
+            }
+        )
+
+        cleaned = base_module.drop_labels(raw)
+
+        assert list(cleaned.columns) == ["dur"]
+
+    def test_train_default_passes_both_classes_on_first_fit(self, base_module):
+        base_module._train_default(
+            sum_labeled_flows=2, last_number_of_flows_when_trained=0
+        )
+
+        assert len(base_module.fit_calls) == 1
+        assert base_module.fit_calls[0]["classes"] == [MALICIOUS, BENIGN]
+
+
+class TestMLOfflineOnlineLabels:
+    def test_online_model_numeric_conversion_for_river(
+        self, online_model_numeric
+    ):
+        x_train = numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        y_train = numpy.array([BENIGN, MALICIOUS], dtype=object)
+
+        online_model_numeric.fit_incremental_model(
+            x_train, y_train, classes=[MALICIOUS, BENIGN]
+        )
+
+        assert online_model_numeric.clf.learned_targets == [0.0, 1.0]
+
+    def test_online_model_keeps_categorical_when_supported(
+        self, online_model_categorical
+    ):
+        x_train = numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        y_train = numpy.array([BENIGN, MALICIOUS], dtype=object)
+
+        online_model_categorical.fit_incremental_model(
+            x_train, y_train, classes=[MALICIOUS, BENIGN]
+        )
+
+        assert online_model_categorical.clf.learned_targets == [
+            BENIGN,
+            MALICIOUS,
+        ]
+
+    def test_online_model_decodes_numeric_predictions(
+        self, online_model_numeric
+    ):
+        preds = online_model_numeric.predict_batch(
+            numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        )
+
+        assert preds.tolist() == [MALICIOUS, BENIGN]
+
+
+class TestMLLinearModelLabels:
+    def test_linear_model_fit_uses_categorical_targets_for_sklearn(
+        self, linear_model
+    ):
+        x_train = numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        y_train = numpy.array([BENIGN, MALICIOUS], dtype=object)
+
+        linear_model.fit_incremental_model(
+            x_train, y_train, classes=[MALICIOUS, BENIGN]
+        )
+
+        assert len(linear_model.clf.calls) == 1
+        assert linear_model.clf.calls[0]["y_train"].tolist() == [
+            BENIGN,
+            MALICIOUS,
+        ]
+        assert list(linear_model.clf.calls[0]["classes"]) == [
+            MALICIOUS,
+            BENIGN,
+        ]
+
+    def test_linear_model_prediction_returns_canonical_labels(
+        self, linear_model
+    ):
+        preds = linear_model.predict_batch(
+            numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        )
+
+        assert preds.tolist() == [MALICIOUS, BENIGN]
diff --git a/tests/unit/modules/ml_models/test_ml_online_model.py b/tests/unit/modules/ml_models/test_ml_online_model.py
new file mode 100644
index 0000000000..17df2c78ea
--- /dev/null
+++ b/tests/unit/modules/ml_models/test_ml_online_model.py
@@ -0,0 +1,86 @@
+import numpy
+import pytest
+from slips_files.common.abstracts.ml_module_base import BENIGN, MALICIOUS
+from modules.ml_online_model.ml_online_model import MLOnlineModel
+
+
+class _DummyOnlineClassifierNumeric:
+    def __init__(self):
+        self.learned_targets = []
+        self.predictions = [1.0, 0.0]
+
+    def _target_transform(self, y):
+        return float(y)
+
+    def learn_one(self, x, y):
+        self.learned_targets.append(y)
+
+    def predict_one(self, x):
+        return self.predictions.pop(0)
+
+
+class _DummyOnlineClassifierCategorical:
+    def __init__(self):
+        self.learned_targets = []
+
+    def _target_transform(self, y):
+        return y
+
+    def learn_one(self, x, y):
+        self.learned_targets.append(y)
+
+    def predict_one(self, x):
+        return MALICIOUS
+
+
+@pytest.fixture
+def online_model_numeric():
+    model = MLOnlineModel.__new__(MLOnlineModel)
+    model.benign_target_value = 0.0
+    model.malicious_target_value = 1.0
+    model._label_to_target = {BENIGN: 0.0, MALICIOUS: 1.0}
+    model.clf = _DummyOnlineClassifierNumeric()
+    return model
+
+
+@pytest.fixture
+def online_model_categorical():
+    model = MLOnlineModel.__new__(MLOnlineModel)
+    model.benign_target_value = 0.0
+    model.malicious_target_value = 1.0
+    model._label_to_target = {BENIGN: 0.0, MALICIOUS: 1.0}
+    model.clf = _DummyOnlineClassifierCategorical()
+    return model
+
+
+class TestMLOfflineOnlineLabels:
+    def test_online_model_numeric_conversion_for_river(
+        self, online_model_numeric
+    ):
+        x_train = numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        y_train = numpy.array([BENIGN, MALICIOUS], dtype=object)
+        online_model_numeric.fit_incremental_model(
+            x_train, y_train, classes=[MALICIOUS, BENIGN]
+        )
+        assert online_model_numeric.clf.learned_targets == [0.0, 1.0]
+
+    def test_online_model_keeps_categorical_when_supported(
+        self, online_model_categorical
+    ):
+        x_train = numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        y_train = numpy.array([BENIGN, MALICIOUS], dtype=object)
+        online_model_categorical.fit_incremental_model(
+            x_train, y_train, classes=[MALICIOUS, BENIGN]
+        )
+        assert online_model_categorical.clf.learned_targets == [
+            BENIGN,
+            MALICIOUS,
+        ]
+
+    def test_online_model_decodes_numeric_predictions(
+        self, online_model_numeric
+    ):
+        preds = online_model_numeric.predict_batch(
+            numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        )
+        assert preds.tolist() == [MALICIOUS, BENIGN]

From ef27f824a94a2fc137bdd06c9a4c2e2ab6788f80 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 15:31:26 +0000
Subject: [PATCH 05/26] docs for new modules, river in install, training
 directions

---
 .secrets.baseline         |  4 +--
 docs/FAQ.md               |  2 +-
 docs/detection_modules.md | 11 ++++--
 docs/features.md          | 11 ++++--
 docs/installation.md      |  3 +-
 docs/related_repos.md     |  1 +
 docs/training.md          | 75 ++++++++++++++-------------------------
 install/requirements.txt  |  1 +
 8 files changed, 52 insertions(+), 56 deletions(-)

diff --git a/.secrets.baseline b/.secrets.baseline
index 42de85b9f8..097a917ad1 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -149,7 +149,7 @@
         "filename": "config/slips.yaml",
         "hashed_secret": "4cac50cee3ad8e462728e711eac3e670753d5016",
         "is_verified": false,
-        "line_number": 268
+        "line_number": 418
       }
     ],
     "dataset/test14-malicious-zeek-dir/http.log": [
@@ -7185,5 +7185,5 @@
       }
     ]
   },
-  "generated_at": "2026-03-02T22:46:58Z"
+  "generated_at": "2026-03-25T16:09:08Z"
 }
diff --git a/docs/FAQ.md b/docs/FAQ.md
index 3619ac1b64..ec00a1d671 100644
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -12,7 +12,7 @@ If the tensorflow version you're using isn't compatible with your architecture,
 you will get the "Illegal instruction" error and slips will terminate.
 
 To fix this you can disable the modules that use tensorflow by adding
-```rnn-cc-detection, flowmldetection``` to the ```disable``` key in ```config/slips.yaml```
+```rnn-cc-detection``` to the ```disable``` key in ```config/slips.yaml```
 
 
 ## Docker time is not in sync with that of the host
diff --git a/docs/detection_modules.md b/docs/detection_modules.md
index a12c9538c5..fcdf4b7633 100644
--- a/docs/detection_modules.md
+++ b/docs/detection_modules.md
@@ -119,8 +119,15 @@ tr:nth-child(even) {
     <td>✅</td>
   </tr>
   <tr>
-    <td>Flow ML Detection</td>
-    <td>module to detect malicious flows using machine learning</td>
+    <td>ml_linear_model</td>
+    <td>standalone linear sklearn-based module to detect malicious flows using machine learning.<br/>
+    <i>This module uses a machine learning model that is the result of training with the <a href="https://github.com/stratosphereips/pipeline_ml_training_for_SLIPS">Slips-ML-Training-Pipeline</a>. The official models, along with training results, usage instructions, and details on how they were trained, are published in the <a href="https://github.com/stratosphereips/Stratosphere-ML-trained-models">Stratosphere-ML-trained-models</a> repository.</i></td>
+    <td>✅</td>
+  </tr>
+  <tr>
+    <td>ml_online_model</td>
+    <td>standalone online module to detect malicious flows using machine learning.<br/>
+    <i>This module uses a machine learning model that is the result of training with the <a href="https://github.com/stratosphereips/pipeline_ml_training_for_SLIPS">Slips-ML-Training-Pipeline</a>. The official models, along with training results, usage instructions, and details on how they were trained, are published in the <a href="https://github.com/stratosphereips/Stratosphere-ML-trained-models">Stratosphere-ML-trained-models</a> repository.</i></td>
     <td>✅</td>
   </tr>
 
diff --git a/docs/features.md b/docs/features.md
index 415c5a7dbd..c01244b13d 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -531,8 +531,15 @@ tr:nth-child(even) {
     <td>✅</td>
   </tr>
   <tr>
-    <td>flowmldetection</td>
-    <td>module to detect malicious flows using machine learning</td>
+    <td>ml_linear_model</td>
+    <td>standalone linear sklearn-based module to detect malicious flows using machine learning.<br/>
+    <i>This module uses a machine learning model that is the result of training with the <a href="https://github.com/stratosphereips/pipeline_ml_training_for_SLIPS">Slips-ML-Training-Pipeline</a>. The official models, along with training results, usage instructions, and details on how they were trained, are published in the <a href="https://github.com/stratosphereips/Stratosphere-ML-trained-models">Stratosphere-ML-trained-models</a> repository.</i></td>
+    <td>✅</td>
+  </tr>
+  <tr>
+    <td>ml_online_model</td>
+    <td>standalone online module to detect malicious flows using machine learning.<br/>
+    <i>This module uses a machine learning model that is the result of training with the <a href="https://github.com/stratosphereips/pipeline_ml_training_for_SLIPS">Slips-ML-Training-Pipeline</a>. The official models, along with training results, usage instructions, and details on how they were trained, are published in the <a href="https://github.com/stratosphereips/Stratosphere-ML-trained-models">Stratosphere-ML-trained-models</a> repository.</i></td>
     <td>✅</td>
   </tr>
 
diff --git a/docs/installation.md b/docs/installation.md
index 55bdc1150f..4b183a2e47 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -82,7 +82,8 @@ In addition to the full stratosphereips/slips:latest image, there is now a minim
 * timeline/
 * kalipso/
 * p2ptrust/
-* flowmldetection/
+* ml_linear_model/
+* ml_online_model/
 * cyst/
 * cesnet/
 * exporting_alerts/
diff --git a/docs/related_repos.md b/docs/related_repos.md
index 30ddd27d44..c97d6e3ca9 100644
--- a/docs/related_repos.md
+++ b/docs/related_repos.md
@@ -1,3 +1,4 @@
 # Related Repositories
 
 - [Slips-tools](https://github.com/stratosphereips/Slips-tools): repo is to store all the tools and scripts needeed to test and evaluate Slips
+- [pipeline_ml_training_for_SLIPS](https://github.com/stratosphereips/pipeline_ml_training_for_SLIPS): standalone ML training/selection pipeline used to produce and evaluate shipped ML artifacts for Slips modules
diff --git a/docs/training.md b/docs/training.md
index e2a86674be..9130a54e13 100644
--- a/docs/training.md
+++ b/docs/training.md
@@ -1,65 +1,44 @@
 # Training
 
-Slips has one machine learning module that can be retrained by users. This is done by puttin slips in training mode so you can re-train the machine learning models with your own traffic. By default Slips includes an already trained model with our data, but it is sometimes necessary to adapt it to your own circumstances.
+Slips supports ML retraining with per-module train/test switches. Each ML module has its own section in `config/slips.yaml` and can be trained independently.
 
-Until Slips 0.7.3, there is only one module for now that can do this, the one called 'flowmldetection'. This module analyzes flows one by one, as formatted similarly as in a conn.log Zeek file. This module is enabled by default in testing mode. This module uses by default the SGDClassifier with a linear support vector machine (SVM). The decision to use SVM was done because is one of the few algorithms that can be used for online learning and that can extend a current model with new data.
+Current ML modules:
 
-To re-train this machine learning algorithm, you need to do the following:
+- `ml_linear_model`
+- `ml_online_model`
+- `flowmldetection` (legacy module, still available)
 
-1- Edit the config/slips.yaml file to put Slips in train mode. Search the word __train__ in the section __[flowmldetection]__ and uncomment the __mode = train__ and comment __mode = test__. It should look like
+## Per-module workflow
 
-    [flowmldetection]
-    # The mode 'train' should be used to tell the flowmldetection module that the flows received are all for training.
-    # A label should be provided in the [Parameters] section
-    mode = train
+1. Select only the module you want to train and set its section to `mode: train`.
+2. Set `parameters.label` (`normal` or `malicious`) for the input you are feeding.
+3. Run Slips with your training data (pcap, Zeek directory, or interface).
+4. Repeat with additional labeled traffic as needed.
+5. Switch the same module back to `mode: test` to use trained artifacts.
 
-    # The mode 'test' should be used after training the models, to test in unknown data.
-    # You should have trained at least once with 'Normal' data and once with 'Malicious' data in order for the test to work.
-    #mode = test
+Example run commands:
 
-2- Establish the general label for all the traffic that you want to re-train with. For now we only support 1 label per file. Search in the [parameters] section and choose the type of traffic you will send to Slips.
+```bash
+./slips.py -c config/slips.yaml -f ~/my-traffic.pcap
+./slips.py -c config/slips.yaml -f ~/my-zeek-dir/
+./slips.py -c config/slips.yaml -i eth0
+```
 
-    # Set the label for all the flows that are being read. For now only normal and malware directly. No option for setting labels with a filter
-    label = normal
-    #label = malicious
-    #label = unknown
+## Important notes
 
-After this edits, just run Slips as usual with any type of input, for example with a Zeek folder.
+- Train/test is module-specific; there is no global ML train mode.
+- Keep model load/store paths per module (`ml_linear_model` and `ml_online_model` sections) so custom training does not overwrite shipped artifacts.
+- `training_batch_size`, `validate_on_train`, `seed`, and log settings are also module-specific.
 
-    ./slips.py -c config/slips.yaml -f ~/my-computer-normal/
 
-Or with a pcap file.
+## Official Models and Training Pipeline
 
-    ./slips.py -c config/slips.yaml -f ~/my-computer-normal2.pcap
+The official trained models used by SLIPS ML modules are maintained in a separate repository:
 
-3- If you have also malicious traffic, first change the label to malicious in config/slips.yaml
+- [Stratosphere-ML-trained-models](https://github.com/stratosphereips/Stratosphere-ML-trained-models): Official, versioned, and evaluated ML models for SLIPS modules (including ml_linear_model and ml_online_model).
 
-    # Set the label for all the flows that are being read. For now only normal and malware directly. No option for setting labels with a filter
-    #label = normal
-    label = malicious
-    #label = unknown
+The experiment/training pipeline is maintained as a standalone repository:
 
-    ./slips.py -c config/slips.yaml -f ~/my-computer-normal2.pcap
+- [Slips-ML-Training-Pipeline](https://github.com/stratosphereips/pipeline_ml_training_for_SLIPS): Used to produce and evaluate shipped ML artifacts for SLIPS modules.
 
-After this edits, just run Slips as usual with any type of input, for example another pcap
-
-    ./slips.py -c config/slips.yaml -f ~/malware1.pcap
-
-You can also run slips in an interface and train it directly with your data
-
-    ./slips.py -c config/slips.yaml -i eth0
-
-4- Finally to use the model, put back the __test__ mode in the configuration config/slips.yaml
-
-    [flowmldetection]
-    # The mode 'train' should be used to tell the flowmldetection module that the flows received are all for training.
-    # A label should be provided in the [Parameters] section
-    #mode = train
-
-    # The mode 'test' should be used after training the models, to test in unknown data.
-    # You should have trained at least once with 'Normal' data and once with 'Malicious' data in order for the test to work.
-    mode = test
-
-5- Use slips normally in files or interfaces
-
-    ./slips.py -c config/slips.yaml -i eth0
+See also: `docs/related_repos.md`
diff --git a/install/requirements.txt b/install/requirements.txt
index e52f249fb0..b62edf6ad1 100644
--- a/install/requirements.txt
+++ b/install/requirements.txt
@@ -16,6 +16,7 @@ validators==0.35.0
 ipwhois==1.2.0
 matplotlib==3.10.7
 scikit_learn
+river
 slackclient==2.9.4
 psutil==7.1.3
 six==1.17.0

From bfcfa5077b9e25b0b516300d262af1e85bfc1a21 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 15:41:36 +0000
Subject: [PATCH 06/26] solve problems with parsing .labeled file names?

---
 slips_files/core/input_profilers/zeek.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/slips_files/core/input_profilers/zeek.py b/slips_files/core/input_profilers/zeek.py
index f6e0df6646..e15a5583fa 100644
--- a/slips_files/core/input_profilers/zeek.py
+++ b/slips_files/core/input_profilers/zeek.py
@@ -89,7 +89,9 @@ def remove_subsuffix(self, file_name: str) -> str:
 
         # is it something like notice.13:00:00-14:00:00.log?
         splitted_filename = file_name.split(".")
-        if len(splitted_filename) == 3:
+        if len(splitted_filename) >= 3:
+            if splitted_filename[1] == "log":
+                return splitted_filename[0] + ".log"
             if splitted_filename[-1] == "log":
                 return splitted_filename[0] + ".log"
 

From a8699051e50fba9f37bd8ad9f57c41ffe470bb86 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 18:55:29 +0000
Subject: [PATCH 07/26] updated model, it was the bad sgt one

---
 modules/ml_online_model/artifacts/model.bin  | 4 ++--
 modules/ml_online_model/artifacts/pca.bin    | 4 ++--
 modules/ml_online_model/artifacts/scaler.bin | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/modules/ml_online_model/artifacts/model.bin b/modules/ml_online_model/artifacts/model.bin
index 00c4b7c289..f862559d5c 100644
--- a/modules/ml_online_model/artifacts/model.bin
+++ b/modules/ml_online_model/artifacts/model.bin
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:765897e2613eddf8e9d5208a4f68cfad2e89abae9562b224b2f6ba28c07df666
-size 21516865
+oid sha256:4e295da27653aeeb3fcbc2d5627c8123636e4072ea496b6590b3bd283782f815
+size 9747723
diff --git a/modules/ml_online_model/artifacts/pca.bin b/modules/ml_online_model/artifacts/pca.bin
index c183ae9f74..9dd6134c74 100644
--- a/modules/ml_online_model/artifacts/pca.bin
+++ b/modules/ml_online_model/artifacts/pca.bin
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b992e7c34b0a25759e995849bad948378728341deabd5413e56b75142c72da42
-size 2114
+oid sha256:eddc29fae03fb9c36c9b66d176e559f1be7674aa2a4c4293b44e8dc070a3c4fb
+size 1676
diff --git a/modules/ml_online_model/artifacts/scaler.bin b/modules/ml_online_model/artifacts/scaler.bin
index 37726a9685..30dbd6aaa2 100644
--- a/modules/ml_online_model/artifacts/scaler.bin
+++ b/modules/ml_online_model/artifacts/scaler.bin
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:440b8454c3c95e7ff1a31148a6af53b4d9ed61ffb807233abca484b4d6c00399
+oid sha256:ec083c6a60d6291503fe184dd1ff3fdd5b4e81f09e9178d062a13035d6091615
 size 887

From 83de07860826c52c3e15594d2233e32112b2ba51 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 19:25:03 +0000
Subject: [PATCH 08/26] now fetching via LFS for integration tests

---
 .github/workflows/integration-tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 2ab0edf93d..9bc67280f2 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -45,6 +45,7 @@ jobs:
         with:
           ref: ${{ github.ref }}
           fetch-depth: 0
+          lfs: true
 
       - name: Start Redis
         uses: ./.github/actions/start-redis

From d674cdfc44614071f0591525f0274e542ba7fd35 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 19:32:29 +0000
Subject: [PATCH 09/26] now fetching via LFS for integration tests

---
 .github/workflows/integration-tests.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 9bc67280f2..9f2d2204dd 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -41,6 +41,13 @@ jobs:
         test_file: ${{ fromJson(needs.list-integration-tests.outputs.test_files) }}
 
     steps:
+
+      - name: Install Git LFS
+        run: |
+          apt-get update -qq
+          apt-get install -y git-lfs
+          git lfs install
+
       - uses: actions/checkout@v5
         with:
           ref: ${{ github.ref }}

From 301005cdf472417d80f5fefab215a8d58d1e555e Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 20:05:58 +0000
Subject: [PATCH 10/26] updated yaml with all configs for running new ml models

---
 .secrets.baseline |   4 +-
 config/slips.yaml | 132 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 132 insertions(+), 4 deletions(-)

diff --git a/.secrets.baseline b/.secrets.baseline
index 097a917ad1..044ba0f251 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -149,7 +149,7 @@
         "filename": "config/slips.yaml",
         "hashed_secret": "4cac50cee3ad8e462728e711eac3e670753d5016",
         "is_verified": false,
-        "line_number": 418
+        "line_number": 396
       }
     ],
     "dataset/test14-malicious-zeek-dir/http.log": [
@@ -7185,5 +7185,5 @@
       }
     ]
   },
-  "generated_at": "2026-03-25T16:09:08Z"
+  "generated_at": "2026-03-26T20:05:39Z"
 }
diff --git a/config/slips.yaml b/config/slips.yaml
index 5e307f143e..373cb8aae3 100644
--- a/config/slips.yaml
+++ b/config/slips.yaml
@@ -193,8 +193,9 @@ modules:
   # Add the names of other modules that you want to disable
   # (they all should be lowercase with no special characters). Example,
   # threatintelligence, blocking, networkdiscovery, timeline, virustotal,
-  # rnnccdetection, flowmldetection, updatemanager
-  disable: [template]
+  # rnnccdetection, ml_linear_model, ml_online_model, flowmldetection, updatemanager
+  disable:
+    - template
 
   # For each line in timeline file there is a timestamp.
   # By default the timestamp is seconds in unix time. However
@@ -215,6 +216,133 @@ flowmldetection:
   # 'Malicious' data in order for the test to work.
   mode: test
 
+  # Write training/testing metrics logs to output dir.
+  # This affects only logging, not training behavior.
+  create_performance_metrics_log_files: false
+
+  # Used only when mode: train.
+  # If true, each training batch is split into train/validation for metrics.
+  validate_on_train: false
+
+  # Used only when validate_on_train is true.
+  # Fraction (0.0-1.0) or percent (1-100) of each training batch used for validation.
+  validation_percentage: 0.1
+
+  # Number of labeled flows collected before each training/retraining step.
+  training_batch_size: 500
+
+  # Reproducibility seed for train/test behavior in this module.
+  seed: 1111
+
+  # Global target conversion used by refactored ML modules when a backend
+  # expects numeric targets (for example some river models).
+  benign_target_value: 0.0
+  malicious_target_value: 1.0
+
+#############################
+ml_linear_model:
+  # Standalone sklearn-based ML module.
+  mode: test
+
+  # Training startup behavior (used only when mode: train):
+  # false -> warm-start from model_load_path/preprocess_load_path (recommended default)
+  # true  -> ignore load paths and train from scratch
+  train_from_scratch: false
+
+  # Write training/testing metrics logs to output dir.
+  # This affects only logging, not training behavior.
+  create_performance_metrics_log_files: true
+
+  # Used only when mode: train.
+  # If true, each training batch is split into train/validation for metrics.
+  validate_on_train: false
+
+  # Used only when validate_on_train is true.
+  # Fraction (0.0-1.0) or percent (1-100) of each training batch used for validation.
+  validation_percentage: 0.1
+
+  # Number of labeled flows collected before each training/retraining step.
+  training_batch_size: 500
+
+  # Reproducibility seed for train/test behavior in this module.
+  seed: 1111
+
+  # Optional per-module override for target conversion values.
+  # benign_target_value: 0.0
+  # malicious_target_value: 1.0
+
+  # Separate log names, so they never overwrite other ML module logs.
+  # Final files are: training_<log_suffix>.log and testing_<log_suffix>.log
+  log_suffix: ml_linear_model
+
+  # Used only in mode: test.
+  # Emit one testing-log snapshot every N processed flows.
+  test_log_batch_size: 1000
+
+  # Artifact paths used by modules/ml_linear_model/ml_linear_model.py
+  # Load paths point to provided reference artifacts for immediate test/warm-start use.
+  model_load_path: modules/ml_linear_model/artifacts/model.bin
+  preprocess_load_path: modules/ml_linear_model/artifacts/scaler.bin
+  pca_load_path: modules/ml_linear_model/artifacts/pca.bin
+  # Store paths point to custom artifacts so training does not overwrite provided ones.
+  model_store_path: modules/ml_linear_model/artifacts/model_custom.bin
+  preprocess_store_path: modules/ml_linear_model/artifacts/scaler_custom.bin
+  pca_store_path: modules/ml_linear_model/artifacts/pca_custom.bin
+
+############################ #
+ml_online_model:
+  # Standalone river-based ML module.
+  mode: test
+
+  # Training startup behavior (used only when mode: train):
+  # false -> warm-start from model_load_path/preprocess_load_path (recommended default)
+  # true  -> ignore load paths and train from scratch
+  train_from_scratch: false
+
+  # Write training/testing metrics logs to output dir.
+  # This affects only logging, not training behavior.
+  create_performance_metrics_log_files: true
+
+  # Used only when mode: train.
+  # If true, each training batch is split into train/validation for metrics.
+  validate_on_train: true
+
+  # Used only when validate_on_train is true.
+  # Fraction (0.0-1.0) or percent (1-100) of each training batch used for validation.
+  validation_percentage: 0.1
+
+  # Number of labeled flows collected before each training/retraining step.
+  training_batch_size: 500
+
+  # Reproducibility seed for train/test behavior in this module.
+  seed: 1111
+
+  # Optional per-module override for target conversion values.
+  # benign_target_value: 0.0
+  # malicious_target_value: 1.0
+
+  # Separate log names, so they never overwrite other ML module logs.
+  # Final files are: training_<log_suffix>.log and testing_<log_suffix>.log
+  log_suffix: ml_online_model
+
+  # Used only in mode: test.
+  # Emit one testing-log snapshot every N processed flows.
+  test_log_batch_size: 1000
+
+  # Artifact paths used by modules/ml_online_model/ml_online_model.py
+  # Load paths point to provided/default artifacts for immediate test/warm-start use.
+  model_load_path: modules/ml_online_model/artifacts/model.bin
+  preprocess_load_path: modules/ml_online_model/artifacts/scaler.bin
+  pca_load_path: modules/ml_online_model/artifacts/pca.bin
+  # Store paths point to custom artifacts so training does not overwrite provided ones.
+  model_store_path: modules/ml_online_model/artifacts/model_custom.bin
+  preprocess_store_path: modules/ml_online_model/artifacts/scaler_custom.bin
+  pca_store_path: modules/ml_online_model/artifacts/pca_custom.bin
+
+  # PCA training params (used only in mode: train)
+  pca_n_components: 11
+  pca_batch_size: 500
+
 #############################
 anomaly_detection_https:
   # Number of initial hours used to train the baseline model assuming benign traffic.

From b445389c80c51d1760670cd3b2ac9fc83baeb322 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 22:37:01 +0000
Subject: [PATCH 11/26] debugged 5 problems in models. not recognising loaded
 scaler, bad feature names in sgt, feature processing ifferences to pipeline
 and small ones

---
 modules/ml_linear_model/ml_linear_model.py | 148 ++++++++-------------
 modules/ml_online_model/ml_online_model.py | 137 +++++++------------
 2 files changed, 102 insertions(+), 183 deletions(-)

diff --git a/modules/ml_linear_model/ml_linear_model.py b/modules/ml_linear_model/ml_linear_model.py
index 417a8cd000..332e64bdc1 100644
--- a/modules/ml_linear_model/ml_linear_model.py
+++ b/modules/ml_linear_model/ml_linear_model.py
@@ -151,31 +151,25 @@ def get_dummy_flows(self) -> dict:
 
     def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
         try:
-            cols = [
-                "proto",
-                "dport",
-                "sport",
-                "dur",
-                "pkts",
-                "spkts",
-                "bytes",
-                "sbytes",
-                "state",
-            ]
-            for col in cols:
-                if col in dataset.columns:
-                    try:
-                        dataset[col] = dataset[col].astype("float64")
-                    except (ValueError, AttributeError):
-                        pass
+            dataset = dataset.copy()
+
+            # normalize proto to lowercase string before filtering
+            if "proto" in dataset.columns:
+                dataset["proto"] = (
+                    dataset["proto"].astype(str).str.strip().str.lower()
+                )
 
-            to_discard = ["arp", "ARP", "icmp", "igmp", "ipv6-icmp", ""]
-            for proto in to_discard:
-                dataset = dataset[dataset.proto != proto]
+            # filter unsupported protocols
+            discard_set = {"arp", "icmp", "igmp", "ipv6-icmp", ""}
+            if "proto" in dataset.columns:
+                dataset = dataset[
+                    ~dataset["proto"].fillna("").isin(discard_set)
+                ]
 
             if dataset.empty:
                 return dataset
 
+            # drop non-feature columns
             to_drop = [
                 "appproto",
                 "daddr",
@@ -191,65 +185,35 @@ def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
                 "flow_source",
                 "interface",
             ]
-            for field in to_drop:
-                try:
-                    dataset = dataset.drop(field, axis=1)
-                except (ValueError, KeyError):
-                    pass
-
-            dataset["state"] = dataset.apply(
-                lambda row: self.db.get_final_state_from_flags(
-                    row["state"], (row["spkts"] + row["dpkts"])
-                ),
-                axis=1,
-            )
+            dataset = dataset.drop(columns=to_drop, errors="ignore")
 
-            dataset.state = dataset.state.str.replace(
-                r"(^.*Not Established.*$)", "0", regex=True
-            )
-            dataset.state = dataset.state.str.replace(
-                r"(^.*Established.*$)", "1", regex=True
-            )
-            dataset.state = dataset.state.astype("float64")
-
-            dataset.proto = dataset.proto.str.lower()
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*tcp.*$)", "0", regex=True
-            )
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*udp.*$)", "1", regex=True
-            )
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*icmp.*$)", "2", regex=True
-            )
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*icmp-ipv6.*$)", "3", regex=True
-            )
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*arp.*$)", "4", regex=True
-            )
+            # coerce base numeric fields before deriving from them
+            for col in ["sbytes", "dbytes", "spkts", "dpkts"]:
+                if col not in dataset.columns:
+                    dataset[col] = 0.0
+                dataset[col] = pd.to_numeric(
+                    dataset[col], errors="coerce"
+                ).fillna(0.0)
 
+            # derived columns
             dataset["bytes"] = dataset["sbytes"] + dataset["dbytes"]
             dataset["pkts"] = dataset["spkts"] + dataset["dpkts"]
 
-            fields_to_convert_to_float = [
-                dataset.proto,
-                dataset.dport,
-                dataset.sport,
-                dataset.dur,
-                dataset.pkts,
-                dataset.spkts,
-                dataset.bytes,
-                dataset.sbytes,
-                dataset.state,
-            ]
-            for field in fields_to_convert_to_float:
-                try:
-                    field = field.astype("float64")
-                    dataset[field.name] = field
-                except (ValueError, AttributeError):
-                    pass
+            # encode proto via shared base class static
+            if "proto" in dataset.columns:
+                dataset["proto"] = dataset["proto"].apply(self._encode_proto)
 
+            # encode state via shared base class static
+            dataset["state"] = dataset.apply(
+                lambda row: self._infer_state(
+                    str(row.get("state", "")),
+                    row.get("spkts", 0.0),
+                    row.get("dpkts", 0.0),
+                ),
+                axis=1,
+            )
+
+            # enforce feature order and float64, fill missing with 0.0
             feature_order = [
                 "dur",
                 "proto",
@@ -274,18 +238,18 @@ def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
             for col in feature_order:
                 if col not in dataset.columns:
                     dataset[col] = 0.0
-
-            for col in feature_order:
-                dataset[col] = pd.to_numeric(
-                    dataset[col], errors="coerce"
-                ).fillna(0.0)
+                dataset[col] = (
+                    pd.to_numeric(dataset[col], errors="coerce")
+                    .fillna(0.0)
+                    .astype("float64")
+                )
 
             existing_label_cols = [
                 col for col in label_cols if col in dataset.columns
             ]
             dataset = dataset[feature_order + existing_label_cols]
-
             return dataset
+
         except Exception:
             self.print("Error in process_features()")
             self.print(traceback.format_exc(), 0, 1)
@@ -302,12 +266,6 @@ def create_empty_model(self):
     def create_empty_preprocessor(self):
         return StandardScaler()
 
-    def _is_scaler_initialized(self) -> bool:
-        return (
-            hasattr(self.preprocessor, "mean_")
-            and self.preprocessor.mean_ is not None
-        )
-
     def is_preprocessor_initialized(self) -> bool:
         return self._is_scaler_initialized() and self._is_pca_initialized()
 
@@ -431,16 +389,6 @@ def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
         preds = self.clf.predict(x_data)
         return numpy.asarray([self._decode_target(pred) for pred in preds])
 
-    @staticmethod
-    def _normalize_label(label):
-        if isinstance(label, str):
-            normalized = label.strip().lower()
-            if normalized in {"benign", "normal"}:
-                return BENIGN
-            if normalized in {"malicious", "malware"}:
-                return MALICIOUS
-        return label
-
     def _guess_numeric_targets(self) -> bool:
         module_name = getattr(self.clf.__class__, "__module__", "")
         if module_name.startswith("sklearn."):
@@ -454,6 +402,16 @@ def _guess_numeric_targets(self) -> bool:
                 return True
         return False
 
+    @staticmethod
+    def _normalize_label(label):
+        if isinstance(label, str):
+            normalized = label.strip().lower()
+            if normalized in {"benign", "normal"}:
+                return BENIGN
+            if normalized in {"malicious", "malware"}:
+                return MALICIOUS
+        return label
+
     def _encode_targets(
         self, targets: numpy.ndarray, numeric_targets: bool
     ) -> numpy.ndarray:
diff --git a/modules/ml_online_model/ml_online_model.py b/modules/ml_online_model/ml_online_model.py
index 25260e0fdd..28de8cf590 100644
--- a/modules/ml_online_model/ml_online_model.py
+++ b/modules/ml_online_model/ml_online_model.py
@@ -155,31 +155,25 @@ def get_dummy_flows(self) -> dict:
 
     def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
         try:
-            cols = [
-                "proto",
-                "dport",
-                "sport",
-                "dur",
-                "pkts",
-                "spkts",
-                "bytes",
-                "sbytes",
-                "state",
-            ]
-            for col in cols:
-                if col in dataset.columns:
-                    try:
-                        dataset[col] = dataset[col].astype("float64")
-                    except (ValueError, AttributeError):
-                        pass
+            dataset = dataset.copy()
+
+            # normalize proto to lowercase string before filtering
+            if "proto" in dataset.columns:
+                dataset["proto"] = (
+                    dataset["proto"].astype(str).str.strip().str.lower()
+                )
 
-            to_discard = ["arp", "ARP", "icmp", "igmp", "ipv6-icmp", ""]
-            for proto in to_discard:
-                dataset = dataset[dataset.proto != proto]
+            # filter unsupported protocols
+            discard_set = {"arp", "icmp", "igmp", "ipv6-icmp", ""}
+            if "proto" in dataset.columns:
+                dataset = dataset[
+                    ~dataset["proto"].fillna("").isin(discard_set)
+                ]
 
             if dataset.empty:
                 return dataset
 
+            # drop non-feature columns
             to_drop = [
                 "appproto",
                 "daddr",
@@ -195,65 +189,35 @@ def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
                 "flow_source",
                 "interface",
             ]
-            for field in to_drop:
-                try:
-                    dataset = dataset.drop(field, axis=1)
-                except (ValueError, KeyError):
-                    pass
+            dataset = dataset.drop(columns=to_drop, errors="ignore")
 
-            dataset["state"] = dataset.apply(
-                lambda row: self.db.get_final_state_from_flags(
-                    row["state"], (row["spkts"] + row["dpkts"])
-                ),
-                axis=1,
-            )
-
-            dataset.state = dataset.state.str.replace(
-                r"(^.*Not Established.*$)", "0", regex=True
-            )
-            dataset.state = dataset.state.str.replace(
-                r"(^.*Established.*$)", "1", regex=True
-            )
-            dataset.state = dataset.state.astype("float64")
-
-            dataset.proto = dataset.proto.str.lower()
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*tcp.*$)", "0", regex=True
-            )
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*udp.*$)", "1", regex=True
-            )
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*icmp.*$)", "2", regex=True
-            )
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*icmp-ipv6.*$)", "3", regex=True
-            )
-            dataset.proto = dataset.proto.str.replace(
-                r"(^.*arp.*$)", "4", regex=True
-            )
+            # coerce base numeric fields before deriving from them
+            for col in ["sbytes", "dbytes", "spkts", "dpkts"]:
+                if col not in dataset.columns:
+                    dataset[col] = 0.0
+                dataset[col] = pd.to_numeric(
+                    dataset[col], errors="coerce"
+                ).fillna(0.0)
 
+            # derived columns
             dataset["bytes"] = dataset["sbytes"] + dataset["dbytes"]
             dataset["pkts"] = dataset["spkts"] + dataset["dpkts"]
 
-            fields_to_convert_to_float = [
-                dataset.proto,
-                dataset.dport,
-                dataset.sport,
-                dataset.dur,
-                dataset.pkts,
-                dataset.spkts,
-                dataset.bytes,
-                dataset.sbytes,
-                dataset.state,
-            ]
-            for field in fields_to_convert_to_float:
-                try:
-                    field = field.astype("float64")
-                    dataset[field.name] = field
-                except (ValueError, AttributeError):
-                    pass
+            # encode proto via shared base class static
+            if "proto" in dataset.columns:
+                dataset["proto"] = dataset["proto"].apply(self._encode_proto)
 
+            # encode state via shared base class static
+            dataset["state"] = dataset.apply(
+                lambda row: self._infer_state(
+                    str(row.get("state", "")),
+                    row.get("spkts", 0.0),
+                    row.get("dpkts", 0.0),
+                ),
+                axis=1,
+            )
+
+            # enforce feature order and float64, fill missing with 0.0
             feature_order = [
                 "dur",
                 "proto",
@@ -278,11 +242,11 @@ def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
             for col in feature_order:
                 if col not in dataset.columns:
                     dataset[col] = 0.0
-
-            for col in feature_order:
-                dataset[col] = pd.to_numeric(
-                    dataset[col], errors="coerce"
-                ).fillna(0.0)
+                dataset[col] = (
+                    pd.to_numeric(dataset[col], errors="coerce")
+                    .fillna(0.0)
+                    .astype("float64")
+                )
 
             existing_label_cols = [
                 col for col in label_cols if col in dataset.columns
@@ -290,6 +254,7 @@ def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
             dataset = dataset[feature_order + existing_label_cols]
 
             return dataset
+
         except Exception:
             self.print("Error in process_features()")
             self.print(traceback.format_exc(), 0, 1)
@@ -311,12 +276,6 @@ def create_empty_model(self):
     def create_empty_preprocessor(self):
         return StandardScaler()
 
-    def _is_scaler_initialized(self) -> bool:
-        return (
-            hasattr(self.preprocessor, "mean_")
-            and self.preprocessor.mean_ is not None
-        )
-
     def _is_pca_initialized(self) -> bool:
         return self.pca is not None and hasattr(self.pca, "components_")
 
@@ -407,7 +366,8 @@ def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
             self._fit_pca_next_transform = False
 
         if self._is_pca_initialized():
-            return self.pca.transform(x_scaled)
+            transformed = self.pca.transform(x_scaled)
+            return transformed
 
         raise ValueError(
             "PCA is required but not initialized. "
@@ -417,7 +377,7 @@ def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
 
     @staticmethod
     def _row_to_dict(row: numpy.ndarray) -> dict:
-        return {f"f{i}": float(value) for i, value in enumerate(row)}
+        return {i: float(value) for i, value in enumerate(row)}
 
     @staticmethod
     def _normalize_label(label):
@@ -476,12 +436,13 @@ def fit_incremental_model(
 
     def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
         preds = []
-        for row in x_data:
+        for i, row in enumerate(x_data):
             pred = self.clf.predict_one(self._row_to_dict(row))
             if pred is None:
                 preds.append(BENIGN)
                 continue
-            preds.append(self._decode_target(pred))
+            decoded = self._decode_target(pred)
+            preds.append(decoded)
         return numpy.asarray(preds)
 
     def store_model(self):

From 6ab651da6e81a876ecad650d73e20ff8aedab9fb Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Thu, 26 Mar 2026 22:59:32 +0000
Subject: [PATCH 12/26] broader definition of BG label to catch different
 versions

---
 slips_files/common/abstracts/ml_module_base.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/slips_files/common/abstracts/ml_module_base.py b/slips_files/common/abstracts/ml_module_base.py
index 23f5b53d47..d43a75c5d1 100644
--- a/slips_files/common/abstracts/ml_module_base.py
+++ b/slips_files/common/abstracts/ml_module_base.py
@@ -25,7 +25,7 @@
     Victim,
 )
 
-BACKGROUND = "background"
+BACKGROUND = "Background"
 BENIGN = "Benign"
 MALICIOUS = "Malicious"
 
@@ -417,7 +417,11 @@ def filter_labels(y_true, y_pred):
 
     def store_testing_results(self, original_label, predicted_label):
         """Accumulate online test metrics and flush snapshots in configured batches."""
-        if original_label == BACKGROUND:
+        if original_label in [
+            BACKGROUND,
+            BACKGROUND.upper(),
+            BACKGROUND.lower(),
+        ]:
             return
 
         if not hasattr(self, "malware_metrics"):
@@ -977,7 +981,11 @@ def main(self):
                     self.ground_truth_config_label
                 )
 
-            if self.flow["ground_truth_label"] in [BACKGROUND]:
+            if self.flow["ground_truth_label"] in [
+                BACKGROUND,
+                BACKGROUND.upper(),
+                BACKGROUND.lower(),
+            ]:
                 return
 
             if self.mode == "train":

From a819b0d0b5fc0031e9145294488854766144ccc9 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Fri, 27 Mar 2026 08:44:11 +0000
Subject: [PATCH 13/26] added common functions to base ml class

---
 .../common/abstracts/ml_module_base.py        | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/slips_files/common/abstracts/ml_module_base.py b/slips_files/common/abstracts/ml_module_base.py
index d43a75c5d1..5cd55d6132 100644
--- a/slips_files/common/abstracts/ml_module_base.py
+++ b/slips_files/common/abstracts/ml_module_base.py
@@ -1009,3 +1009,42 @@ def main(self):
 
         if "tw_closed" in self.channels and (msg := self.get_msg("tw_closed")):
             self.handle_tw_closed(msg)
+
+    def _infer_state(self, state: str, spkts: float, dpkts: float) -> float:
+        pkts = int(float(spkts or 0) + float(dpkts or 0))
+        pre = state.split("_")[0]
+        st = state.lower()
+        if "new" in st or st == "established":
+            return 1.0
+        if "closed" in st or st == "not established":
+            return 0.0
+        if state in ("S0", "REJ", "RSTOS0", "RSTRH", "SH", "SHR"):
+            return 0.0
+        if state in ("S1", "SF", "S2", "S3", "RSTO", "RSTP", "OTH"):
+            return 1.0
+        if "S" in pre and "A" in pre:
+            return 1.0
+        if "PA" in pre:
+            return 1.0
+        if any(x in pre for x in ("ECO", "ECR", "URH", "URP")):
+            return 1.0
+        if "EST" in pre:
+            return 1.0
+        if "RST" in pre or "FIN" in pre:
+            return 0.0 if pkts <= 3 else 1.0
+        return 0.0
+
+    def _encode_proto(self, proto: str) -> float:
+        proto_map = {
+            "tcp": 0.0,
+            "udp": 1.0,
+            "icmp-ipv6": 3.0,
+            "icmp": 2.0,
+            "arp": 4.0,
+        }
+        return proto_map.get(str(proto).strip().lower(), 0.0)
+
+    def _is_scaler_initialized(self) -> bool:
+        """Works for StandardScaler, MinMaxScaler, RobustScaler, etc."""
+        attrs = ["mean_", "scale_", "var_", "data_min_", "data_max_"]
+        return any(hasattr(self.preprocessor, attr) for attr in attrs)

From 0a0b38b5678adc4b4ee75c08b419bbfeff850a35 Mon Sep 17 00:00:00 2001
From: alya <alyaggomaa@gmail.com>
Date: Tue, 31 Mar 2026 01:12:05 +0200
Subject: [PATCH 14/26] ml_module_base.py: close log_file on shutdown

---
 .pre-commit-config.yaml                        | 2 +-
 slips_files/common/abstracts/ml_module_base.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 231dad04a4..b85a1b0fa4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,7 +34,7 @@ repos:
           args: ['--line-length' , '79']
           language_version: python3.12.3
           # excludes formatting slips_files/common/imports.py
-          exclude: (imports|conftest.py|tests/*.py)
+          exclude: (imports|conftest\.py|^tests/.*\.py$)
 
 -     repo: https://github.com/adrienverge/yamllint.git
       rev: v1.35.1
diff --git a/slips_files/common/abstracts/ml_module_base.py b/slips_files/common/abstracts/ml_module_base.py
index 5cd55d6132..9f1d54aceb 100644
--- a/slips_files/common/abstracts/ml_module_base.py
+++ b/slips_files/common/abstracts/ml_module_base.py
@@ -891,6 +891,7 @@ def shutdown_gracefully(self):
 
         if self.log_file is not None:
             self.log_file.flush()
+            self.log_file.close()
 
     def last_training_in_window(self):
         """Optionally train on residual labeled flows before window/module ends."""

From 5c6ca2cd31c49aae60f886e19f9ef2c6735bf1e8 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Sun, 12 Apr 2026 15:42:46 +0200
Subject: [PATCH 15/26] moved ml template into the proper place

---
 modules/template/ml_backend_template.py | 69 +++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 modules/template/ml_backend_template.py

diff --git a/modules/template/ml_backend_template.py b/modules/template/ml_backend_template.py
new file mode 100644
index 0000000000..2d2766d8fe
--- /dev/null
+++ b/modules/template/ml_backend_template.py
@@ -0,0 +1,69 @@
+from typing import Any, Optional
+
+import numpy
+import pandas as pd
+
+from slips_files.common.abstracts.ml_module_base import MLBaseDetection
+
+
+# New backend checklist:
+# - Copy this file to modules/<module_name>/<module_name>.py
+# - Rename class, module_key, and module_config_section
+# - Set artifact paths in slips.yaml for your backend
+# - Implement all NotImplementedError methods
+
+
+class MLBackendTemplate(MLBaseDetection):
+    name = "ML backend template"
+    description = "Skeleton backend for a standalone ML flow detector"
+    authors = ["Your Name"]
+    module_key = "ml_template"
+    module_config_section = "ml_template"
+    # Add a dedicated EvidenceType for your ML module in
+    # slips_files/core/structures/evidence.py and set it here.
+    malicious_flow_evidence_type = None
+
+    def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
+        return dataset
+
+    def create_empty_model(self) -> Any:
+        raise NotImplementedError(
+            "Return an untrained backend model instance."
+        )
+
+    def create_empty_preprocessor(self) -> Any:
+        raise NotImplementedError("Return an untrained preprocessor or None.")
+
+    def update_preprocessor(self, x_train: pd.DataFrame):
+        raise NotImplementedError(
+            "Incrementally fit/update preprocessing on x_train."
+        )
+
+    def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
+        raise NotImplementedError(
+            "Convert features to model-ready numpy array."
+        )
+
+    def fit_incremental_model(
+        self,
+        x_train: numpy.ndarray,
+        y_train: numpy.ndarray,
+        classes: Optional[list] = None,
+    ):
+        raise NotImplementedError(
+            "Incrementally train model on current batch."
+        )
+
+    def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
+        raise NotImplementedError("Return batch predictions for x_data.")
+
+    def is_preprocessor_initialized(self) -> bool:
+        raise NotImplementedError(
+            "Return True when preprocessor can transform data."
+        )
+
+    def train(self, sum_labeled_flows):
+        return self._train_default(sum_labeled_flows)
+
+    def run_test_on_flow(self, flow: dict):
+        return self._test_default(flow)

From 31878dc3b227eb575d9c08fad6f2c51bde56813c Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Sun, 12 Apr 2026 15:44:53 +0200
Subject: [PATCH 16/26] updated docs on new modules to reflect ML module
 creation + adding new evidence

---
 docs/create_new_module.md                     | 101 ++++++++++++-
 slips_files/common/abstracts/README.md        |  50 -------
 .../common/abstracts/ml_backend_template.py   |  76 ----------
 .../common/abstracts/ml_module_base.py        | 141 ++++++------------
 4 files changed, 148 insertions(+), 220 deletions(-)
 delete mode 100644 slips_files/common/abstracts/README.md
 delete mode 100644 slips_files/common/abstracts/ml_backend_template.py

diff --git a/docs/create_new_module.md b/docs/create_new_module.md
index 62b1fc2820..93a81d0a7a 100644
--- a/docs/create_new_module.md
+++ b/docs/create_new_module.md
@@ -2,6 +2,19 @@
 
 # How to Create a New Slips Module
 
+## Table of Contents
+
+- [Detection module](#detection-module)
+- [Creating a Module](#creating-a-module)
+- [ML module](#ml-module)
+- [Evidence setup](#evidence-setup-required)
+- [Conclusion](#conclusion)
+- [Complete Code](#complete-code)
+- [Final Notes](#final-notes)
+
+
+## Detection module
+
 
 
 ## What is SLIPS and why are modules useful
@@ -337,8 +350,92 @@ Using develop - 9f5f9412a3c941b3146d92c8cb2f1f12aab3699e - 2022-06-02 16:51:43.9
 title="Testing The Module">
 
 
+## ML module
+
+Shared infrastructure for standalone ML modules (for example `ml_linear_model`, `ml_online_model`) lives in `slips_files/common/abstracts/ml_module_base.py`.
+
+### Template location
+
+- New backend template: `modules/template/ml_backend_template.py`
+
+### How to add a new ML backend
+
+1. Create a new module folder under `modules/` with matching file name (required by Slips discovery), e.g. `modules/ml_xxx/ml_xxx.py`.
+2. Copy `modules/template/ml_backend_template.py` into your module and adapt.
+3. Implement a class inheriting `MLBaseDetection`.
+4. Set class metadata: `name`, `description`, `authors`, `module_key`, `module_config_section`.
+5. Implement all abstract methods.
+
+### Required abstract methods
+
+- `process_features(self, dataset: pd.DataFrame) -> pd.DataFrame`
+- `create_empty_model(self) -> Any`
+- `create_empty_preprocessor(self) -> Any`
+- `update_preprocessor(self, x_train: pd.DataFrame)`
+- `transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray`
+- `fit_incremental_model(self, x_train: numpy.ndarray, y_train: numpy.ndarray, classes: Optional[list] = None)`
+- `predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray`
+- `is_preprocessor_initialized(self) -> bool`
+- `train(self, sum_labeled_flows)`
+- `run_test_on_flow(self, flow: dict)`
+
+### Evidence setup
+
+- Add a dedicated `EvidenceType` for your ML module in `slips_files/core/structures/evidence.py`. Do not reuse another module's evidence type.
+- Set `malicious_flow_evidence_type` in your module class to that dedicated type.
+- Use `Attacker` with `IoCType.IP` for the detected source when the source is the attacker.
+- Set `Victim` only when your detection semantics include a victim. If present, victim must be an IP (`IoCType.IP`).
+- For attacker-only evidence (no victim semantics), do not invent a victim.
+
+Example for individual ML modules:
+
+```python
+# modules/ml_linear_model/ml_linear_model.py
+import slips_files.common.abstracts.ml_module_base as ml_base
+
+class MLLinearModel(ml_base.MLBaseDetection):
+	malicious_flow_evidence_type = (
+		ml_base.EvidenceType.ML_LINEAR_MALICIOUS_FLOW
+	)
+
+
+# modules/ml_online_model/ml_online_model.py
+import slips_files.common.abstracts.ml_module_base as ml_base
+
+class MLOnlineModel(ml_base.MLBaseDetection):
+	malicious_flow_evidence_type = (
+		ml_base.EvidenceType.ML_ONLINE_MALICIOUS_FLOW
+	)
+```
+
+In `MLBaseDetection.set_evidence_malicious_flow()` the default flow semantics are:
+
+- `attacker`: source IP (`saddr`) as `IoCType.IP`
+- `victim`: destination IP (`daddr`) as `IoCType.IP`
+
+Use this only when the detection is truly source-attacker to destination-victim. If your detection does not have a victim, create evidence without `Victim`.
+
+### Config contract
+
+Add a section in `config/slips.yaml` matching `module_config_section` with:
+
+- `mode`, `training_batch_size`, `seed`
+- `create_performance_metrics_log_files`, `log_suffix`, `test_log_batch_size`
+- `model_load_path`, `model_store_path`, `preprocess_load_path`, `preprocess_store_path`
+
+Optional backend-specific keys (for example PCA) should be read in the child class.
+
+### Train/test workflow
+
+Each ML module has its own independent `mode` (`train` or `test`) and artifact paths in `config/slips.yaml`.
+
+- Test provided models: set that module section to `mode: test`.
+- Train custom models without overwriting defaults: set `mode: train`, keep `*_store_path` on custom files.
+- Test custom models: switch `*_load_path` to custom artifact files and set `mode: test`.
+
+
 
-### Conclusion
+## Conclusion
 
 Due to the high modularity of slips, adding a new slips module is as easy as modifying a few lines in our
 template module, and slips handles running
@@ -640,7 +737,7 @@ Feel free to join our [Discord server](https://discord.gg/zu5HwMFy5C) and ask qu
 
 PRs and Issues are welcomed in our repo.
 
-### Conclusion
+## Final Notes
 
 Adding a new feature to SLIPS is an easy task. The template is ready for everyone to use and there is not much to learn about Slips to be able to write a module.
 
diff --git a/slips_files/common/abstracts/README.md b/slips_files/common/abstracts/README.md
deleted file mode 100644
index 5ee21db9d5..0000000000
--- a/slips_files/common/abstracts/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# ML module base workflow
-
-Shared infrastructure for standalone ML modules (for example `ml_linear_model`, `ml_online_model`) lives in `slips_files/common`.
-
-## Folder purpose
-
-- `ml_module_base.py`: common runtime loop, buffering, config wiring, model I/O, evidence emission.
-- `ml_backend_template.py`: copy/adapt this skeleton when creating a new backend.
-- `../ml_modules_utils/base_utils.py`: metrics parsing/computation for logs/plots.
-- `../ml_modules_utils/plot_train_performance.py`, `../ml_modules_utils/plot_testing_performance.py`: log-based visualization helpers.
-
-## How to add a new model backend
-
-1. Create a new module folder under `modules/` with matching file name (required by Slips discovery), e.g. `modules/ml_xxx/ml_xxx.py`.
-2. Quick start: copy `slips_files/common/abstracts/ml_backend_template.py` into your module and adapt.
-3. Implement a class inheriting `MLBaseDetection`.
-4. Set class metadata: `name`, `description`, `authors`, `module_key`, `module_config_section`.
-5. Implement required abstract methods/signatures.
-
-## Required method signatures
-
-- `get_default_artifact_paths(self) -> Tuple[str, str, str, str]`
-- `process_features(self, dataset: pd.DataFrame) -> pd.DataFrame`
-- `create_empty_model(self) -> Any`
-- `create_empty_preprocessor(self) -> Any`
-- `update_preprocessor(self, x_train: pd.DataFrame)`
-- `transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray`
-- `fit_incremental_model(self, x_train: numpy.ndarray, y_train: numpy.ndarray, classes: Optional[list] = None)`
-- `predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray`
-- `is_preprocessor_initialized(self) -> bool`
-- `train(self, sum_labeled_flows, last_number_of_flows_when_trained)`
-- `run_test_on_flow(self, flow: dict)`
-
-## Config contract
-
-Add a section in `config/slips.yaml` matching `module_config_section` with:
-
-- `mode`, `training_batch_size`, `seed`
-- `create_performance_metrics_log_files`, `log_suffix`, `test_log_batch_size`
-- `model_load_path`, `model_store_path`, `preprocess_load_path`, `preprocess_store_path`
-
-Optional backend-specific keys (for example PCA) should be read in the child class.
-
-## Train/test workflow
-
-Each ML module has its own independent `mode` (`train` or `test`) and artifact paths in `config/slips.yaml`.
-
-- Test provided models: set that module section to `mode: test`.
-- Train custom models without overwriting defaults: set `mode: train`, keep `*_store_path` on custom files.
-- Test custom models: switch `*_load_path` to custom artifact files and set `mode: test`.
diff --git a/slips_files/common/abstracts/ml_backend_template.py b/slips_files/common/abstracts/ml_backend_template.py
deleted file mode 100644
index 3af27963b6..0000000000
--- a/slips_files/common/abstracts/ml_backend_template.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from typing import Any, Optional, Tuple
-
-import numpy
-import pandas as pd
-
-from slips_files.common.abstracts.ml_module_base import MLBaseDetection
-
-
-# New backend checklist:
-# - Copy this file to modules/<module_name>/<module_name>.py
-# - Rename class, module_key, and module_config_section
-# - Set artifact default paths for your backend
-# - Implement all NotImplementedError methods
-
-
-class MLBackendTemplate(MLBaseDetection):
-    name = "ML backend template"
-    description = "Skeleton backend for a standalone ML flow detector"
-    authors = ["Your Name"]
-    module_key = "ml_template"
-    module_config_section = "ml_template"
-
-    def get_default_artifact_paths(self) -> Tuple[str, str, str, str]:
-        return (
-            "./modules/ml_template/artifacts/model.bin",
-            "./modules/ml_template/artifacts/preprocess.bin",
-            "./modules/ml_template/artifacts/model.bin",
-            "./modules/ml_template/artifacts/preprocess.bin",
-        )
-
-    def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
-        return dataset
-
-    def create_empty_model(self) -> Any:
-        raise NotImplementedError(
-            "Return an untrained backend model instance."
-        )
-
-    def create_empty_preprocessor(self) -> Any:
-        raise NotImplementedError("Return an untrained preprocessor or None.")
-
-    def update_preprocessor(self, x_train: pd.DataFrame):
-        raise NotImplementedError(
-            "Incrementally fit/update preprocessing on x_train."
-        )
-
-    def transform_features(self, x_data: pd.DataFrame) -> numpy.ndarray:
-        raise NotImplementedError(
-            "Convert features to model-ready numpy array."
-        )
-
-    def fit_incremental_model(
-        self,
-        x_train: numpy.ndarray,
-        y_train: numpy.ndarray,
-        classes: Optional[list] = None,
-    ):
-        raise NotImplementedError(
-            "Incrementally train model on current batch."
-        )
-
-    def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
-        raise NotImplementedError("Return batch predictions for x_data.")
-
-    def is_preprocessor_initialized(self) -> bool:
-        raise NotImplementedError(
-            "Return True when preprocessor can transform data."
-        )
-
-    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
-        return self._train_default(
-            sum_labeled_flows, last_number_of_flows_when_trained
-        )
-
-    def run_test_on_flow(self, flow: dict):
-        return self._test_default(flow)
diff --git a/slips_files/common/abstracts/ml_module_base.py b/slips_files/common/abstracts/ml_module_base.py
index 9f1d54aceb..65807319b2 100644
--- a/slips_files/common/abstracts/ml_module_base.py
+++ b/slips_files/common/abstracts/ml_module_base.py
@@ -1,10 +1,11 @@
 import json
+import ipaddress
 import os
 import pickle
 import random
 import traceback
 from abc import ABC, abstractmethod
-from typing import Any, Optional, Tuple
+from typing import Any, Optional
 
 import numpy
 import pandas as pd
@@ -47,6 +48,7 @@ class MLBaseDetection(IModule, ABC):
     authors = ["Jan Svoboda"]
     module_key = "ml_module"
     module_config_section = "ml_module"
+    malicious_flow_evidence_type = None
 
     def subscribe_to_channels(self):
         self.c1 = self.db.subscribe("new_flow")
@@ -59,9 +61,13 @@ def init(self):
         """Initialize channels, config, reproducibility, artifact paths, and logging."""
         self.fieldseparator = self.db.get_field_separator()
 
+        if not isinstance(self.malicious_flow_evidence_type, EvidenceType):
+            raise ValueError(
+                "ML modules must define malicious_flow_evidence_type as a module-specific EvidenceType."
+            )
+
         self.read_configuration()
 
-        self.last_number_of_flows_when_trained = 0
         self.classifier_initialized = False
         self.all_classes = [MALICIOUS, BENIGN]
 
@@ -71,63 +77,41 @@ def init(self):
         self.last_closed_twid = None
 
         conf = ConfigParser()
-        (
-            default_model_load,
-            default_preprocess_load,
-            default_model_store,
-            default_preprocess_store,
-        ) = self.get_default_artifact_paths()
-
         section = self.module_config_section
         configured_model_load = conf.ml_module_model_load_path(
             section,
-            default_model_load,
+            None,
         )
         configured_preprocess_load = conf.ml_module_preprocess_load_path(
             section,
-            default_preprocess_load,
+            None,
         )
         configured_model_store = conf.ml_module_model_store_path(
             section,
-            default_model_store,
+            None,
         )
         configured_preprocess_store = conf.ml_module_preprocess_store_path(
             section,
-            default_preprocess_store,
+            None,
         )
 
-        key_upper = self.module_key.upper()
-
         configured_seed = conf.ml_module_seed(section, default=self.seed)
-        self.seed = int(
-            os.getenv(
-                f"SLIPS_{key_upper}_SEED",
-                os.getenv("SLIPS_FLOW_ML_SEED", str(configured_seed)),
-            )
-        )
+        self.seed = int(configured_seed)
         random.seed(self.seed)
         numpy.random.seed(self.seed)
         self.rng = numpy.random.default_rng(self.seed)
 
         self.model_load_path = self.resolve_artifact_path(
-            env_var=f"SLIPS_{key_upper}_MODEL_LOAD_PATH",
             explicit_path=configured_model_load,
-            fallback_env_var="SLIPS_FLOW_ML_MODEL_LOAD_PATH",
         )
         self.preprocess_load_path = self.resolve_artifact_path(
-            env_var=f"SLIPS_{key_upper}_PREPROCESS_LOAD_PATH",
             explicit_path=configured_preprocess_load,
-            fallback_env_var="SLIPS_FLOW_ML_PREPROCESS_LOAD_PATH",
         )
         self.model_path = self.resolve_artifact_path(
-            env_var=f"SLIPS_{key_upper}_MODEL_STORE_PATH",
             explicit_path=configured_model_store,
-            fallback_env_var="SLIPS_FLOW_ML_MODEL_STORE_PATH",
         )
         self.preprocess_path = self.resolve_artifact_path(
-            env_var=f"SLIPS_{key_upper}_PREPROCESS_STORE_PATH",
             explicit_path=configured_preprocess_store,
-            fallback_env_var="SLIPS_FLOW_ML_PREPROCESS_STORE_PATH",
         )
 
         configured_test_log_batch_size = conf.ml_module_test_log_batch_size(
@@ -135,26 +119,14 @@ def init(self):
             default=self.batch_size,
         )
         self.testing_log_batch_size = max(
-            1,
-            int(
-                os.getenv(
-                    f"SLIPS_{key_upper}_TEST_LOG_BATCH_SIZE",
-                    os.getenv(
-                        "SLIPS_FLOW_ML_TEST_LOG_BATCH_SIZE",
-                        str(configured_test_log_batch_size),
-                    ),
-                )
-            ),
+            1, int(configured_test_log_batch_size)
         )
 
         configured_log_suffix = conf.ml_module_log_suffix(
             section,
             default=self.module_key,
         )
-        self.log_suffix = os.getenv(
-            f"SLIPS_{key_upper}_LOG_SUFFIX",
-            os.getenv("SLIPS_FLOW_ML_LOG_SUFFIX", configured_log_suffix),
-        )
+        self.log_suffix = configured_log_suffix
 
         # Backward compatibility for existing sklearn-specific references.
         self.scaler_load_path = self.preprocess_load_path
@@ -164,16 +136,19 @@ def init(self):
 
     def resolve_artifact_path(
         self,
-        env_var: str,
-        explicit_path: str,
+        explicit_path: Optional[str],
+        env_var: Optional[str] = None,
         fallback_env_var: Optional[str] = None,
     ) -> str:
-        """Resolve artifact path from env/config and normalize relative paths."""
-        path = os.getenv(env_var)
-        if path is None and fallback_env_var:
-            path = os.getenv(fallback_env_var)
-        if path is None:
-            path = explicit_path
+        """Resolve artifact path from config and normalize relative paths."""
+        _ = env_var
+        _ = fallback_env_var
+        if explicit_path is None or str(explicit_path).strip() == "":
+            raise ValueError(
+                "Missing ML artifact path in slips.yaml. "
+                "Set model/preprocess load/store paths in the module config section."
+            )
+        path = str(explicit_path)
         if os.path.isabs(path):
             return path
         return os.path.join(".", path.lstrip("./"))
@@ -267,15 +242,6 @@ def write_to_log(self, message: str):
         except Exception as exc:
             self.print(f"Error writing to log: {exc}", 0, 1)
 
-    @abstractmethod
-    def get_default_artifact_paths(self) -> Tuple[str, str, str, str]:
-        """
-        Return backend default artifact paths.
-
-        Returns:
-            model_load_path, preprocess_load_path, model_store_path, preprocess_store_path.
-        """
-
     @abstractmethod
     def process_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
         """Convert raw flow dataframe to backend-ready numeric feature dataframe."""
@@ -325,7 +291,6 @@ def is_preprocessor_initialized(self) -> bool:
     def train(
         self,
         sum_labeled_flows,
-        last_number_of_flows_when_trained,
     ):
         """Backend train entrypoint; typically delegates to `_train_default`."""
 
@@ -557,9 +522,7 @@ def _debug_training_dataframe(
                     1,
                 )
 
-    def _train_default(
-        self, sum_labeled_flows, last_number_of_flows_when_trained
-    ):
+    def _train_default(self, sum_labeled_flows):
         """Shared incremental training flow used by backend `train` hooks."""
         if self.flows is None or self.flows.empty:
             self.print("No flows to train on. Skipping training.", 0, 1)
@@ -675,7 +638,6 @@ def _train_default(
             self.print(traceback.format_exc(), 0, 1)
             self.write_to_log("Error occurred during training.")
 
-        self.last_number_of_flows_when_trained = self.labeled_counter
         self.labeled_counter = 0
         self.training_flows = []
 
@@ -713,16 +675,9 @@ def _test_default(self, flow: dict):
             pred[0],
         )
 
-    def process_training_flows(self, last_number_of_flows_when_trained):
+    def process_training_flows(self):
         """Build and preprocess one training batch from buffered labeled flows."""
         try:
-            if last_number_of_flows_when_trained is None:
-                last_number_of_flows_when_trained = 0
-            else:
-                last_number_of_flows_when_trained = int(
-                    last_number_of_flows_when_trained
-                )
-
             new_flows = self.training_flows
             if len(new_flows) > self.batch_size:
                 self.print(
@@ -848,29 +803,40 @@ def read_model(self):
 
     def set_evidence_malicious_flow(self, flow: dict, twid: str):
         """Emit Slips evidence object when a flow is predicted as malicious."""
+        try:
+            src_ip = str(ipaddress.ip_address(flow["saddr"]))
+            dst_ip = str(ipaddress.ip_address(flow["daddr"]))
+        except (ValueError, KeyError) as exc:
+            self.print(
+                f"Skipping ML evidence with invalid attacker/victim IPs: {exc}",
+                0,
+                1,
+            )
+            return
+
         confidence = 0.1
         description = (
             f"Flow with malicious characteristics by ML. Src IP"
-            f" {flow['saddr']}:{flow['sport']} to "
-            f"{flow['daddr']}:{flow['dport']}"
+            f" {src_ip}:{flow['sport']} to "
+            f"{dst_ip}:{flow['dport']}"
         )
         twid_number = int(twid.replace("timewindow", ""))
         evidence = Evidence(
-            evidence_type=EvidenceType.MALICIOUS_FLOW,
+            evidence_type=self.malicious_flow_evidence_type,
             attacker=Attacker(
                 direction=Direction.SRC,
                 ioc_type=IoCType.IP,
-                value=flow["saddr"],
+                value=src_ip,
             ),
             victim=Victim(
                 direction=Direction.DST,
                 ioc_type=IoCType.IP,
-                value=flow["daddr"],
+                value=dst_ip,
             ),
             threat_level=ThreatLevel.LOW,
             confidence=confidence,
             description=description,
-            profile=ProfileID(ip=flow["saddr"]),
+            profile=ProfileID(ip=src_ip),
             timewindow=TimeWindow(twid_number),
             uid=[flow["uid"]],
             timestamp=flow["starttime"],
@@ -891,7 +857,6 @@ def shutdown_gracefully(self):
 
         if self.log_file is not None:
             self.log_file.flush()
-            self.log_file.close()
 
     def last_training_in_window(self):
         """Optionally train on residual labeled flows before window/module ends."""
@@ -910,14 +875,11 @@ def last_training_in_window(self):
             self.print(
                 f"Training on the last {flows_left} flows in the window", 0, 1
             )
-            self.process_training_flows(self.last_number_of_flows_when_trained)
+            self.process_training_flows()
             self.print(
                 f"Size of the last training batch: {len(self.flows)}", 0, 1
             )
-            self.train(
-                self.labeled_counter,
-                self.last_number_of_flows_when_trained,
-            )
+            self.train(self.labeled_counter)
         else:
             self.print(
                 f"Not enough flows to finalize training. "
@@ -997,13 +959,8 @@ def main(self):
                 if self.labeled_counter < self.minimum_labels_to_retrain:
                     return
 
-                self.process_training_flows(
-                    self.last_number_of_flows_when_trained
-                )
-                self.train(
-                    self.labeled_counter,
-                    self.last_number_of_flows_when_trained,
-                )
+                self.process_training_flows()
+                self.train(self.labeled_counter)
 
             elif self.mode == "test":
                 self.run_test_on_flow(self.flow)

From 7d3da71653ebe9974757ab003b1b0022e930a792 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Sun, 12 Apr 2026 15:47:22 +0200
Subject: [PATCH 17/26] updated readme and removed env variables from modules

---
 modules/ml_linear_model/README.md          |  4 +-
 modules/ml_linear_model/ml_linear_model.py | 47 ++++------------------
 modules/ml_online_model/README.md          |  4 +-
 modules/ml_online_model/ml_online_model.py | 39 ++++--------------
 4 files changed, 20 insertions(+), 74 deletions(-)

diff --git a/modules/ml_linear_model/README.md b/modules/ml_linear_model/README.md
index 5c34f77ae5..1c92e9e6a0 100644
--- a/modules/ml_linear_model/README.md
+++ b/modules/ml_linear_model/README.md
@@ -67,7 +67,7 @@ python3 slips_files/common/ml_modules_utils/plot_testing_performance.py -f path/
 ## Creating your own ML module
 
 To create a new ML module, see:
-- [slips_files/common/abstracts/README.md](../../slips_files/common/abstracts/README.md)
+- [docs/create_new_module.md#ml-module](../../docs/create_new_module.md#ml-module)
 - [docs/create_new_module.md](../../docs/create_new_module.md)
 
 These documents explain the base class, required methods, and configuration for new modules.
@@ -86,7 +86,7 @@ For reproducibility, keep `seed` fixed in `config/slips.yaml`.
 
 ## Train/test (module-specific)
 
-Canonical workflow is in `slips_files/common/abstracts/README.md`.
+Canonical workflow is in `docs/create_new_module.md#ml-module`.
 
 `ml_linear_model`-specific paths:
 
diff --git a/modules/ml_linear_model/ml_linear_model.py b/modules/ml_linear_model/ml_linear_model.py
index 332e64bdc1..cc9fdc7d04 100644
--- a/modules/ml_linear_model/ml_linear_model.py
+++ b/modules/ml_linear_model/ml_linear_model.py
@@ -1,7 +1,7 @@
 import traceback
 import warnings
-from typing import Optional, Tuple
 import os
+from typing import Optional
 import pickle
 
 import numpy
@@ -23,25 +23,6 @@ def warn(*args, **kwargs):
 
 warnings.warn = warn
 
-# ---------------------------------------------------------------------------
-# Default artifact paths for linear sklearn model + preprocessor.
-# Override at runtime with environment variables:
-# - SLIPS_ML_LINEAR_MODEL_MODEL_LOAD_PATH
-# - SLIPS_ML_LINEAR_MODEL_PREPROCESS_LOAD_PATH
-# - SLIPS_ML_LINEAR_MODEL_MODEL_STORE_PATH
-# - SLIPS_ML_LINEAR_MODEL_PREPROCESS_STORE_PATH
-# ---------------------------------------------------------------------------
-DEFAULT_MODEL_LOAD_PATH = "./modules/ml_linear_model/artifacts/model.bin"
-DEFAULT_PREPROCESS_LOAD_PATH = "./modules/ml_linear_model/artifacts/scaler.bin"
-DEFAULT_MODEL_STORE_PATH = (
-    "./modules/ml_linear_model/artifacts/model_custom.bin"
-)
-DEFAULT_PREPROCESS_STORE_PATH = (
-    "./modules/ml_linear_model/artifacts/scaler_custom.bin"
-)
-DEFAULT_PCA_LOAD_PATH = "./modules/ml_linear_model/artifacts/pca.bin"
-DEFAULT_PCA_STORE_PATH = "./modules/ml_linear_model/artifacts/pca_custom.bin"
-
 
 class MLLinearModel(ml_base.MLBaseDetection):
     name = "ml_linear_model"
@@ -49,6 +30,9 @@ class MLLinearModel(ml_base.MLBaseDetection):
     authors = ["Jan Svoboda"]
     module_key = "ml_linear_model"
     module_config_section = "ml_linear_model"
+    malicious_flow_evidence_type = (
+        ml_base.EvidenceType.ML_LINEAR_MALICIOUS_FLOW
+    )
 
     def init(self):
         super().init()
@@ -57,26 +41,21 @@ def init(self):
 
         conf = ConfigParser()
         section = self.module_config_section
-        key_upper = self.module_key.upper()
 
         configured_pca_load = conf.ml_module_pca_load_path(
             section,
-            DEFAULT_PCA_LOAD_PATH,
+            None,
         )
         configured_pca_store = conf.ml_module_pca_store_path(
             section,
-            DEFAULT_PCA_STORE_PATH,
+            None,
         )
 
         self.pca_load_path = self.resolve_artifact_path(
-            env_var=f"SLIPS_{key_upper}_PCA_LOAD_PATH",
             explicit_path=configured_pca_load,
-            fallback_env_var="SLIPS_FLOW_ML_PCA_LOAD_PATH",
         )
         self.pca_store_path = self.resolve_artifact_path(
-            env_var=f"SLIPS_{key_upper}_PCA_STORE_PATH",
             explicit_path=configured_pca_store,
-            fallback_env_var="SLIPS_FLOW_ML_PCA_STORE_PATH",
         )
 
         self.pca_n_components = conf.ml_module_pca_n_components(
@@ -102,14 +81,6 @@ def init(self):
             MALICIOUS: self.malicious_target_value,
         }
 
-    def get_default_artifact_paths(self) -> Tuple[str, str, str, str]:
-        return (
-            DEFAULT_MODEL_LOAD_PATH,
-            DEFAULT_PREPROCESS_LOAD_PATH,
-            DEFAULT_MODEL_STORE_PATH,
-            DEFAULT_PREPROCESS_STORE_PATH,
-        )
-
     def _add_dummy_flows(self):
         self.dummy_malicious_flow = numpy.array(
             [
@@ -466,10 +437,8 @@ def read_model(self):
 
         self.pca = self._create_incremental_pca()
 
-    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
-        self._train_default(
-            sum_labeled_flows, last_number_of_flows_when_trained
-        )
+    def train(self, sum_labeled_flows):
+        self._train_default(sum_labeled_flows)
 
     def run_test_on_flow(self, flow: dict):
         self._test_default(flow)
diff --git a/modules/ml_online_model/README.md b/modules/ml_online_model/README.md
index 7af90cf9ce..7468c894e4 100644
--- a/modules/ml_online_model/README.md
+++ b/modules/ml_online_model/README.md
@@ -10,7 +10,7 @@ This module provides a River-based flow model for SLIPS.
 
 ## Train/test (module-specific)
 
-Canonical workflow is in `slips_files/common/abstracts/README.md`.
+Canonical workflow is in `docs/create_new_module.md#ml-module`.
 
 `ml_online_model`-specific paths:
 
@@ -115,7 +115,7 @@ python3 slips_files/common/ml_modules_utils/plot_testing_performance.py -f path/
 ## Creating your own ML module
 
 To create a new ML module, see:
-- [slips_files/common/abstracts/README.md](../../slips_files/common/abstracts/README.md)
+- [docs/create_new_module.md#ml-module](../../docs/create_new_module.md#ml-module)
 - [docs/create_new_module.md](../../docs/create_new_module.md)
 
 These documents explain the base class, required methods, and configuration for new modules.
diff --git a/modules/ml_online_model/ml_online_model.py b/modules/ml_online_model/ml_online_model.py
index 28de8cf590..cd2dafa3a0 100644
--- a/modules/ml_online_model/ml_online_model.py
+++ b/modules/ml_online_model/ml_online_model.py
@@ -1,7 +1,7 @@
 import traceback
 import warnings
-from typing import Optional, Tuple
 import os
+from typing import Optional
 import pickle
 
 import numpy
@@ -22,17 +22,6 @@ def warn(*args, **kwargs):
 
 warnings.warn = warn
 
-DEFAULT_MODEL_LOAD_PATH = "./modules/ml_online_model/artifacts/model.bin"
-DEFAULT_PREPROCESS_LOAD_PATH = "./modules/ml_online_model/artifacts/scaler.bin"
-DEFAULT_MODEL_STORE_PATH = (
-    "./modules/ml_online_model/artifacts/model_custom.bin"
-)
-DEFAULT_PREPROCESS_STORE_PATH = (
-    "./modules/ml_online_model/artifacts/scaler_custom.bin"
-)
-DEFAULT_PCA_LOAD_PATH = "./modules/ml_online_model/artifacts/pca.bin"
-DEFAULT_PCA_STORE_PATH = "./modules/ml_online_model/artifacts/pca_custom.bin"
-
 
 class _FallbackRiverModel:
     def __init__(self):
@@ -53,6 +42,9 @@ class MLOnlineModel(ml_base.MLBaseDetection):
     authors = ["Jan Svoboda"]
     module_key = "ml_online_model"
     module_config_section = "ml_online_model"
+    malicious_flow_evidence_type = (
+        ml_base.EvidenceType.ML_ONLINE_MALICIOUS_FLOW
+    )
 
     def init(self):
         super().init()
@@ -61,26 +53,21 @@ def init(self):
 
         conf = ConfigParser()
         section = self.module_config_section
-        key_upper = self.module_key.upper()
 
         configured_pca_load = conf.ml_module_pca_load_path(
             section,
-            DEFAULT_PCA_LOAD_PATH,
+            None,
         )
         configured_pca_store = conf.ml_module_pca_store_path(
             section,
-            DEFAULT_PCA_STORE_PATH,
+            None,
         )
 
         self.pca_load_path = self.resolve_artifact_path(
-            env_var=f"SLIPS_{key_upper}_PCA_LOAD_PATH",
             explicit_path=configured_pca_load,
-            fallback_env_var="SLIPS_FLOW_ML_PCA_LOAD_PATH",
         )
         self.pca_store_path = self.resolve_artifact_path(
-            env_var=f"SLIPS_{key_upper}_PCA_STORE_PATH",
             explicit_path=configured_pca_store,
-            fallback_env_var="SLIPS_FLOW_ML_PCA_STORE_PATH",
         )
 
         self.pca_n_components = conf.ml_module_pca_n_components(
@@ -106,14 +93,6 @@ def init(self):
             MALICIOUS: self.malicious_target_value,
         }
 
-    def get_default_artifact_paths(self) -> Tuple[str, str, str, str]:
-        return (
-            DEFAULT_MODEL_LOAD_PATH,
-            DEFAULT_PREPROCESS_LOAD_PATH,
-            DEFAULT_MODEL_STORE_PATH,
-            DEFAULT_PREPROCESS_STORE_PATH,
-        )
-
     def _add_dummy_flows(self):
         self.dummy_malicious_flow = numpy.array(
             [
@@ -476,10 +455,8 @@ def read_model(self):
 
         self.pca = self._create_incremental_pca()
 
-    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
-        self._train_default(
-            sum_labeled_flows, last_number_of_flows_when_trained
-        )
+    def train(self, sum_labeled_flows):
+        self._train_default(sum_labeled_flows)
 
     def run_test_on_flow(self, flow: dict):
         self._test_default(flow)

From 1f99ea4729bb9f4edff24e5379f91fc87b02b27f Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Sun, 12 Apr 2026 15:49:58 +0200
Subject: [PATCH 18/26] unit tests for the new ml modules updated --> removed
 unused variable int he function calls

---
 tests/unit/modules/ml_models/test_ml_base_detection.py | 5 ++---
 tests/unit/modules/ml_models/test_ml_modules.py        | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/unit/modules/ml_models/test_ml_base_detection.py b/tests/unit/modules/ml_models/test_ml_base_detection.py
index 836bf5eaf8..069866ca52 100644
--- a/tests/unit/modules/ml_models/test_ml_base_detection.py
+++ b/tests/unit/modules/ml_models/test_ml_base_detection.py
@@ -47,7 +47,7 @@ def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
     def is_preprocessor_initialized(self) -> bool:
         return True
 
-    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
+    def train(self, sum_labeled_flows):
         return None
 
     def run_test_on_flow(self, flow: dict):
@@ -85,7 +85,6 @@ def base_module():
     module.write_to_log = lambda *args, **kwargs: None
     module.labeled_counter = 0
     module.training_flows = []
-    module.last_number_of_flows_when_trained = 0
     module.preprocessor = object()
     return module
 
@@ -106,7 +105,7 @@ def test_drop_labels_removes_known_label_columns(self, base_module):
 
     def test_train_default_passes_both_classes_on_first_fit(self, base_module):
         base_module._train_default(
-            sum_labeled_flows=2, last_number_of_flows_when_trained=0
+            sum_labeled_flows=2
         )
         assert len(base_module.fit_calls) == 1
         assert base_module.fit_calls[0]["classes"] == [MALICIOUS, BENIGN]
diff --git a/tests/unit/modules/ml_models/test_ml_modules.py b/tests/unit/modules/ml_models/test_ml_modules.py
index 8c17086b48..49c0f3fffa 100644
--- a/tests/unit/modules/ml_models/test_ml_modules.py
+++ b/tests/unit/modules/ml_models/test_ml_modules.py
@@ -49,7 +49,7 @@ def predict_batch(self, x_data: numpy.ndarray) -> numpy.ndarray:
     def is_preprocessor_initialized(self) -> bool:
         return True
 
-    def train(self, sum_labeled_flows, last_number_of_flows_when_trained):
+    def train(self, sum_labeled_flows):
         return None
 
     def run_test_on_flow(self, flow: dict):
@@ -136,7 +136,6 @@ def base_module():
     module.write_to_log = lambda *args, **kwargs: None
     module.labeled_counter = 0
     module.training_flows = []
-    module.last_number_of_flows_when_trained = 0
     module.preprocessor = object()
     return module
 
@@ -189,7 +188,7 @@ def test_drop_labels_removes_known_label_columns(self, base_module):
 
     def test_train_default_passes_both_classes_on_first_fit(self, base_module):
         base_module._train_default(
-            sum_labeled_flows=2, last_number_of_flows_when_trained=0
+            sum_labeled_flows=2
         )
 
         assert len(base_module.fit_calls) == 1

From 584445e3c5b47ec357ebd96bab14fe309ca233f5 Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Sun, 12 Apr 2026 15:50:14 +0200
Subject: [PATCH 19/26] new evidence types for each ml module

---
 slips_files/core/structures/evidence.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/slips_files/core/structures/evidence.py b/slips_files/core/structures/evidence.py
index b05b45c5fe..e8b8b3e67f 100644
--- a/slips_files/core/structures/evidence.py
+++ b/slips_files/core/structures/evidence.py
@@ -86,6 +86,8 @@ class EvidenceType(Enum):
     SMTP_LOGIN_BRUTEFORCE = auto()
     MALICIOUS_SSL_CERT = auto()
     MALICIOUS_FLOW = auto()
+    ML_LINEAR_MALICIOUS_FLOW = auto()
+    ML_ONLINE_MALICIOUS_FLOW = auto()
     SUSPICIOUS_USER_AGENT = auto()
     EMPTY_CONNECTIONS = auto()
     INCOMPATIBLE_USER_AGENT = auto()

From 7220daa9872958f22c3e7825f624b208094265ee Mon Sep 17 00:00:00 2001
From: jsvobo <svoboda.honzik@post.cz>
Date: Mon, 13 Apr 2026 14:44:24 +0200
Subject: [PATCH 20/26] secret baseline regenerated

---
 .secrets.baseline | 426 ++++++++++++++++++++++------------------------
 1 file changed, 206 insertions(+), 220 deletions(-)

diff --git a/.secrets.baseline b/.secrets.baseline
index 997c3e42f3..dbcddcd9af 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -149,7 +149,7 @@
         "filename": "config/slips.yaml",
         "hashed_secret": "4cac50cee3ad8e462728e711eac3e670753d5016",
         "is_verified": false,
-        "line_number": 412
+        "line_number": 419
       }
     ],
     "dataset/test14-malicious-zeek-dir/http.log": [
@@ -5365,845 +5365,831 @@
         "is_verified": false,
         "line_number": 705
       },
-      {
-        "type": "Hex High Entropy String",
-        "filename": "dataset/test9-mixed-zeek-dir/files.log",
-        "hashed_secret": "2e7dcd2ccf9d6d430fea6ac98ffc1b9d42f7f65d",
-        "is_verified": false,
-        "line_number": 711
-      },
-      {
-        "type": "Hex High Entropy String",
-        "filename": "dataset/test9-mixed-zeek-dir/files.log",
-        "hashed_secret": "9e2f3fa6cb3139cf9bbfecddd2be60592f7491af",
-        "is_verified": false,
-        "line_number": 711
-      },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "0135de5ac17ed5236e1a82a8887bbf1e2e7ad181",
         "is_verified": false,
-        "line_number": 712
+        "line_number": 711
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "e48868449fa2b1eb09a81e3122a0e200ce455fcd",
         "is_verified": false,
-        "line_number": 712
+        "line_number": 711
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "49f903dad4fd2fe36ff840593ebe5a5db9efda11",
         "is_verified": false,
-        "line_number": 714
+        "line_number": 713
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "504342ba596fa724ee131daa699b1d91cccf31d2",
         "is_verified": false,
-        "line_number": 714
+        "line_number": 713
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "0122eae225e93a080e38cdfdd3b9383a940816f4",
         "is_verified": false,
-        "line_number": 718
+        "line_number": 717
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "c0b651b2d8f8b8fbaa692960471efd10c87f6eaa",
         "is_verified": false,
-        "line_number": 718
+        "line_number": 717
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "016d3288d50e871ec5cfac882eb5f64eacd565da",
         "is_verified": false,
-        "line_number": 724
+        "line_number": 723
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "88e10725567d7907c8d8637d4fc40a7c9f8d5738",
         "is_verified": false,
-        "line_number": 724
+        "line_number": 723
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "be592e5f782f9d6768ff513fb4e888c323c95794",
         "is_verified": false,
-        "line_number": 733
+        "line_number": 732
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "d2ece94611876b26a3730ba39a3e8575bbd58b29",
         "is_verified": false,
-        "line_number": 733
+        "line_number": 732
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ab50308b64efaa28005bf4377036d14c648fbb26",
         "is_verified": false,
-        "line_number": 734
+        "line_number": 733
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "f5dd2504ca1677707cb687397fe01f98a1c08fab",
         "is_verified": false,
-        "line_number": 734
+        "line_number": 733
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "86360885827733cbc8053fa49759b7d8895cb5a0",
         "is_verified": false,
-        "line_number": 735
+        "line_number": 734
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "b6c25851e11397eb3fc81273997ac5717fdeed59",
         "is_verified": false,
-        "line_number": 735
+        "line_number": 734
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "884bcfe83836a3f814af90419d6fa44ac0f73e4c",
         "is_verified": false,
-        "line_number": 745
+        "line_number": 744
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "de68b580d56f9cb0cb45636915b9de91559f202c",
         "is_verified": false,
-        "line_number": 745
+        "line_number": 744
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "5b1edf8342ae3375fa1330ed75ec5e7797f53a3a",
         "is_verified": false,
-        "line_number": 751
+        "line_number": 750
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "a79bc0ed7ed14de358946cb1ce3dbcdff090777f",
         "is_verified": false,
-        "line_number": 751
+        "line_number": 750
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "85a1e96fab0340c9f957471d497a84f618a2e444",
         "is_verified": false,
-        "line_number": 752
+        "line_number": 751
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "efec807123b0681cb299c3693f2f6e8bec49326c",
         "is_verified": false,
-        "line_number": 752
+        "line_number": 751
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "70fe648317e187f485af174df5c8dd386f0daa9a",
         "is_verified": false,
-        "line_number": 753
+        "line_number": 752
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "f7fc7e144342e9b8434806aabef7a2f23d762272",
         "is_verified": false,
-        "line_number": 753
+        "line_number": 752
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "64ca326b8fcd5df5907e3c3b449ba562f27d252c",
         "is_verified": false,
-        "line_number": 754
+        "line_number": 753
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "f48a765b0acf94fc63293b47c9137a3de75ebcb6",
         "is_verified": false,
-        "line_number": 754
+        "line_number": 753
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "346685a6018c2c491de868d0be7650a96aa0ae28",
         "is_verified": false,
-        "line_number": 755
+        "line_number": 754
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "e77e5537bfe212b745ae24419890deff5fece50d",
         "is_verified": false,
-        "line_number": 755
+        "line_number": 754
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "a12ac0eebc6ae1baf022dec2b3903ac069f86578",
         "is_verified": false,
-        "line_number": 760
+        "line_number": 759
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ac81aa25967bd0dd82a6c092ca462d2a244a044b",
         "is_verified": false,
-        "line_number": 760
+        "line_number": 759
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "8a451db871c50764f3b8106eec5ce26961018958",
         "is_verified": false,
-        "line_number": 761
+        "line_number": 760
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "f654a748eca4bd1fc0756acd7e26db31b861e29d",
         "is_verified": false,
-        "line_number": 761
+        "line_number": 760
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "cf379622c9811fe604a0e89c921d6a85f469097b",
         "is_verified": false,
-        "line_number": 762
+        "line_number": 761
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "df3708f6c0f441394e1831ac6f12aa3e807c1acf",
         "is_verified": false,
-        "line_number": 762
+        "line_number": 761
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "43a706e4bf89b60a10fbff59218cb118a1270547",
         "is_verified": false,
-        "line_number": 783
+        "line_number": 782
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "4b70550293dbe88ea61438ffd70b0d8843afa1b1",
         "is_verified": false,
-        "line_number": 783
+        "line_number": 782
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "a8722fde111ee1b4416c2339ddb5204ab9cab779",
         "is_verified": false,
-        "line_number": 784
+        "line_number": 783
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "e458026d5fc0194eeabe65008d6a5a16b0732dff",
         "is_verified": false,
-        "line_number": 784
+        "line_number": 783
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "c811787dfe9223a372cfbb82e6b8226648d43fbf",
         "is_verified": false,
-        "line_number": 785
+        "line_number": 784
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "da744e4f2477fc3b45a045fd57bd4af86d0cc3da",
         "is_verified": false,
-        "line_number": 785
+        "line_number": 784
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "aa97f1be1c4e8c20b062d751347fea8cecf50f42",
         "is_verified": false,
-        "line_number": 786
+        "line_number": 785
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "b9fd5c57504715da2ee39bc1c22add54ccb54110",
         "is_verified": false,
-        "line_number": 786
+        "line_number": 785
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "0f96272328093b3a917faf219a133985888cbbda",
         "is_verified": false,
-        "line_number": 787
+        "line_number": 786
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "cb1819efe3b9255409b9eb92af5b371937e90f1b",
         "is_verified": false,
-        "line_number": 787
+        "line_number": 786
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ca7f34c33d36658bb64e5c6e6c5ddce16a721349",
         "is_verified": false,
-        "line_number": 803
+        "line_number": 802
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "d9d8a5e72bc0b2674951fb15ebaf9381cb175f3f",
         "is_verified": false,
-        "line_number": 803
+        "line_number": 802
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "2cf8da58ededcd24e8d972f2ac091c913ef143db",
         "is_verified": false,
-        "line_number": 805
+        "line_number": 804
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "b7bd16890928c2af829e2b6ddec5a74bab246838",
         "is_verified": false,
-        "line_number": 805
+        "line_number": 804
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "2e1950f45fd6f1cd6e0eb90b7453f1f42f31b1f6",
         "is_verified": false,
-        "line_number": 806
+        "line_number": 805
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "e5cdf15a35f51762d1b0916260de05b520b8031d",
         "is_verified": false,
-        "line_number": 806
+        "line_number": 805
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "1f7d497f701c66cc51f009d7d9ff7cd74cdfb29a",
         "is_verified": false,
-        "line_number": 807
+        "line_number": 806
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "d484216fe0269eee8eff27c0e3c7689b0c8322f1",
         "is_verified": false,
-        "line_number": 807
+        "line_number": 806
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "04554029367a20bfc0f0a40df2e33ff430807e6a",
         "is_verified": false,
-        "line_number": 808
+        "line_number": 807
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "3e69cc53ccd14ed34521220dc0da95190983439f",
         "is_verified": false,
-        "line_number": 808
+        "line_number": 807
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "4e7a99f1495e023c5ab99e10e55606233f8cd976",
         "is_verified": false,
-        "line_number": 813
+        "line_number": 812
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "5a32b30681213ecdd57afa81a3c91c7e965523c1",
         "is_verified": false,
-        "line_number": 813
+        "line_number": 812
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "d88e692967c36f2b73a3b71312ec877692c1c05e",
         "is_verified": false,
-        "line_number": 817
+        "line_number": 816
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "fa0a362a98ac102a1f73c9281f731474ec599aab",
         "is_verified": false,
-        "line_number": 817
+        "line_number": 816
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "05e4cd48e8f6443a3e97cb31a2ca5c6fa4b7b896",
         "is_verified": false,
-        "line_number": 818
+        "line_number": 817
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "cd9229a36451f2aa996811d77b56912c584bd474",
         "is_verified": false,
-        "line_number": 818
+        "line_number": 817
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "3b0aba910edbe716482c94591ce409f335ad8d5f",
         "is_verified": false,
-        "line_number": 819
+        "line_number": 818
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "6916c4db5296a464a6fc9308878869881951ca84",
         "is_verified": false,
-        "line_number": 819
+        "line_number": 818
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "3bd557233052edf9246be00b5d391eb0fd5a1caa",
         "is_verified": false,
-        "line_number": 823
+        "line_number": 822
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ad1931614c872fe11eb3ec8c252337f83b8720ad",
         "is_verified": false,
-        "line_number": 823
+        "line_number": 822
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "48e4199ad69e4d0224771556251b4bbb55edbfb3",
         "is_verified": false,
-        "line_number": 824
+        "line_number": 823
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "e04ab8db0c01dfea66946c9aaa91d22b375aff61",
         "is_verified": false,
-        "line_number": 824
+        "line_number": 823
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "60fa3bd739ad68f1f5a381b6ee27f127c0ba32ad",
         "is_verified": false,
-        "line_number": 827
+        "line_number": 826
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "e96fce94c758e3a85900f76fc057ff3724071844",
         "is_verified": false,
-        "line_number": 827
+        "line_number": 826
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "5b47541012e8f2755e345222478f2583dd8ac22f",
         "is_verified": false,
-        "line_number": 829
+        "line_number": 828
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "d958e6b8bb9d19f867db4ae63095b2dc336dfca2",
         "is_verified": false,
-        "line_number": 829
+        "line_number": 828
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "7801e816d029ff12c3e8c9be1a203001d3e72566",
         "is_verified": false,
-        "line_number": 836
+        "line_number": 835
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "e8aa939e8f12b33b7e02703cd4a1a720f6271d92",
         "is_verified": false,
-        "line_number": 836
+        "line_number": 835
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "0db71c6d3ca3091c1e0553d441bea452e860e373",
         "is_verified": false,
-        "line_number": 840
+        "line_number": 839
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "f0d20428987080d04fc01a28ce6230e9383e01d6",
         "is_verified": false,
-        "line_number": 840
+        "line_number": 839
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ea6f7fb73d4b29787b64e646a5c24bc038974c24",
         "is_verified": false,
-        "line_number": 842
+        "line_number": 841
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "fd6342ff46ffd226ab011dda7dfbabaa936a7454",
         "is_verified": false,
-        "line_number": 842
+        "line_number": 841
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "a941e691d5afb02087f212aafb96f0ff381013a9",
         "is_verified": false,
-        "line_number": 846
+        "line_number": 845
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "cdc017091b65aa223da7676a79499431ce22ca92",
         "is_verified": false,
-        "line_number": 846
+        "line_number": 845
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "29f577087c91ce4cac58125b385e9b6b56bd3948",
         "is_verified": false,
-        "line_number": 848
+        "line_number": 847
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "3d7479c30d90d38511d5f5d5b5eb86477a4861c5",
         "is_verified": false,
-        "line_number": 848
+        "line_number": 847
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "48f21e2e3cd237dc0fe7dc7fb479a6d8db35154e",
         "is_verified": false,
-        "line_number": 854
+        "line_number": 853
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "b66f6f577a43b3ba50aebb02d8fea8bf38767380",
         "is_verified": false,
-        "line_number": 854
+        "line_number": 853
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ae6e7a1df413e4ff565bc1c8314d1ab51a8d42ef",
         "is_verified": false,
-        "line_number": 856
+        "line_number": 855
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "b024ebf502aca28c5cff54c6fa8abec3cc5bd53a",
         "is_verified": false,
-        "line_number": 856
+        "line_number": 855
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "6da56e48511e6fdfbf4519fbd5fabe75e7abfd8e",
         "is_verified": false,
-        "line_number": 859
+        "line_number": 858
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "a37fb57cac54e4bfd401b8f744b6d835f618565c",
         "is_verified": false,
-        "line_number": 859
+        "line_number": 858
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "61e1aee596b0cfbb2a9c3ab7c9e1fea0356f8dfb",
         "is_verified": false,
-        "line_number": 873
+        "line_number": 872
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "a3b427fa51843a81806c9819cad6123117d64d45",
         "is_verified": false,
-        "line_number": 873
+        "line_number": 872
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "7f00582e3698cfde33cb176ec7ef90724460ec6d",
         "is_verified": false,
-        "line_number": 878
+        "line_number": 877
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "af3820ff0a884c252eb6bf2d2eb9b207652a0951",
         "is_verified": false,
-        "line_number": 878
+        "line_number": 877
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "46dbd04b9455a3d4c670ae9c430fe65532bce655",
         "is_verified": false,
-        "line_number": 880
+        "line_number": 879
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "bf7ab2fb8ce43fd3fd03ec6558fc376fcd25c573",
         "is_verified": false,
-        "line_number": 880
+        "line_number": 879
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "c637f4a40dc091e3dfeeb98127c5eb7c06feba4d",
         "is_verified": false,
-        "line_number": 881
+        "line_number": 880
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "dc19d4073c3611195c7519b465b413fa901a9ae4",
         "is_verified": false,
-        "line_number": 881
+        "line_number": 880
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "3c4d6702e0c7866383b386eca52bbe0fffbce0f3",
         "is_verified": false,
-        "line_number": 882
+        "line_number": 881
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ede30abda8d269bb05c75c2286f5f12f66c3360f",
         "is_verified": false,
-        "line_number": 882
+        "line_number": 881
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "7914b28a9505ee65fe5a50dc058f322a069781ed",
         "is_verified": false,
-        "line_number": 893
+        "line_number": 892
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ffcd7655917ab1469744c4d79c3bf1270ce94a20",
         "is_verified": false,
-        "line_number": 893
+        "line_number": 892
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "2d2c64b88205b797d38815c8f7ebda62059b8660",
         "is_verified": false,
-        "line_number": 895
+        "line_number": 894
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "e533630a6c19676d53cd29301357edee1d943481",
         "is_verified": false,
-        "line_number": 895
+        "line_number": 894
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ca5296128b3fa72795bd19f04427ea24526f0105",
         "is_verified": false,
-        "line_number": 896
+        "line_number": 895
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "ec18126931229f89429bcf972094132ae2105422",
         "is_verified": false,
-        "line_number": 896
+        "line_number": 895
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "63a999310c82808f4d3b8808f5957f207bea438d",
         "is_verified": false,
-        "line_number": 898
+        "line_number": 897
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "bfc851fb4908464b9cded648831010c1d205c1f9",
         "is_verified": false,
-        "line_number": 898
+        "line_number": 897
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "9ae9ab78e654fdfae0bd41e85cd55e51987acfc6",
         "is_verified": false,
-        "line_number": 903
+        "line_number": 902
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "d3264a7cc9d82a156a6680125df87e806e73b8e3",
         "is_verified": false,
-        "line_number": 903
+        "line_number": 902
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "6ec87d1b26bd1031be0985df77158ce86037a640",
         "is_verified": false,
-        "line_number": 904
+        "line_number": 903
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "a6c09a4674feeedeb31bddde1ec843f6c605f3d7",
         "is_verified": false,
-        "line_number": 904
+        "line_number": 903
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "0f8966c5b70f83947fb4a238ae475b6c6db5f0f2",
         "is_verified": false,
-        "line_number": 913
+        "line_number": 912
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "73f0301c448e6e4efe86910313ba0f8e0a15bb4a",
         "is_verified": false,
-        "line_number": 913
+        "line_number": 912
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "4344ea2555a802a8b97ebd67e5ad36d7c6167a86",
         "is_verified": false,
-        "line_number": 914
+        "line_number": 913
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "f4ff617bd75911c38fd7101293f979acead6400e",
         "is_verified": false,
-        "line_number": 914
+        "line_number": 913
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "be3bef15bc7bed6bafcb9179169b4832fb564227",
         "is_verified": false,
-        "line_number": 915
+        "line_number": 914
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "cca8cdd6f518b2b879b3197195dac4e3042a1ad3",
         "is_verified": false,
-        "line_number": 915
+        "line_number": 914
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "83c1003f406f34fba4d6279a948fee3abc802884",
         "is_verified": false,
-        "line_number": 916
+        "line_number": 915
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "c28b4da54bc68775fa8e2e0ea98f852eb49d5870",
         "is_verified": false,
-        "line_number": 916
+        "line_number": 915
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "4b7d6ec00d8e651549d3e939e4d3ebff394b7655",
         "is_verified": false,
-        "line_number": 919
+        "line_number": 918
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "c95063c24bdf721f80f88bce7c52565e574c3de9",
         "is_verified": false,
-        "line_number": 919
+        "line_number": 918
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "8a16496a48de4448d6a430af9f8d756b41690e19",
         "is_verified": false,
-        "line_number": 920
+        "line_number": 919
       },
       {
         "type": "Hex High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/files.log",
         "hashed_secret": "b9c473ba6eac8ef97783fe05078383c757919085",
         "is_verified": false,
-        "line_number": 920
+        "line_number": 919
       }
     ],
     "dataset/test9-mixed-zeek-dir/http.log": [
@@ -6401,14 +6387,14 @@
         "filename": "dataset/test9-mixed-zeek-dir/http.log",
         "hashed_secret": "631d18d0259084c9784b44eedaf1667a3470fea6",
         "is_verified": false,
-        "line_number": 511
+        "line_number": 510
       },
       {
         "type": "Base64 High Entropy String",
         "filename": "dataset/test9-mixed-zeek-dir/http.log",
         "hashed_secret": "37522b5b749c6ceaa3820e74a0924758772a8793",
         "is_verified": false,
-        "line_number": 514
+        "line_number": 513
       }
     ],
     "dataset/test9-mixed-zeek-dir/ssl.log": [
@@ -6827,14 +6813,14 @@
         "filename": "docs/detection_modules.md",
         "hashed_secret": "2e621bc4ae7af0e821c2a7f45b1e9ff83780ff3e",
         "is_verified": false,
-        "line_number": 448
+        "line_number": 494
       },
       {
         "type": "Hex High Entropy String",
         "filename": "docs/detection_modules.md",
         "hashed_secret": "b6aa435b77ffc6bb2f5c3a1647eb4a5e45a316ee",
         "is_verified": false,
-        "line_number": 449
+        "line_number": 495
       }
     ],
     "docs/features.md": [
@@ -6843,14 +6829,14 @@
         "filename": "docs/features.md",
         "hashed_secret": "2e621bc4ae7af0e821c2a7f45b1e9ff83780ff3e",
         "is_verified": false,
-        "line_number": 643
+        "line_number": 729
       },
       {
         "type": "Hex High Entropy String",
         "filename": "docs/features.md",
         "hashed_secret": "b6aa435b77ffc6bb2f5c3a1647eb4a5e45a316ee",
         "is_verified": false,
-        "line_number": 644
+        "line_number": 730
       }
     ],
     "docs/feel_project.md": [
@@ -6933,246 +6919,246 @@
         "line_number": 676
       }
     ],
-    "tests/integration_tests/fides_config.yaml": [
+    "tests/integration/config/fides_config.yaml": [
       {
         "type": "Secret Keyword",
-        "filename": "tests/integration_tests/fides_config.yaml",
+        "filename": "tests/integration/config/fides_config.yaml",
         "hashed_secret": "4cac50cee3ad8e462728e711eac3e670753d5016",
         "is_verified": false,
         "line_number": 199
       },
       {
         "type": "Secret Keyword",
-        "filename": "tests/integration_tests/fides_config.yaml",
+        "filename": "tests/integration/config/fides_config.yaml",
         "hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997",
         "is_verified": false,
         "line_number": 323
       }
     ],
-    "tests/integration_tests/test.yaml": [
+    "tests/integration/config/slips_iris_main.yaml": [
       {
         "type": "Secret Keyword",
-        "filename": "tests/integration_tests/test.yaml",
-        "hashed_secret": "4cac50cee3ad8e462728e711eac3e670753d5016",
+        "filename": "tests/integration/config/slips_iris_main.yaml",
+        "hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997",
         "is_verified": false,
-        "line_number": 170
+        "line_number": 36
       },
       {
         "type": "Secret Keyword",
-        "filename": "tests/integration_tests/test.yaml",
-        "hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997",
+        "filename": "tests/integration/config/slips_iris_main.yaml",
+        "hashed_secret": "4cac50cee3ad8e462728e711eac3e670753d5016",
         "is_verified": false,
-        "line_number": 275
+        "line_number": 100
       }
     ],
-    "tests/integration_tests/test2.yaml": [
+    "tests/integration/config/test.yaml": [
       {
         "type": "Secret Keyword",
-        "filename": "tests/integration_tests/test2.yaml",
-        "hashed_secret": "4cac50cee3ad8e462728e711eac3e670753d5016",
+        "filename": "tests/integration/config/test.yaml",
+        "hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997",
         "is_verified": false,
-        "line_number": 201
+        "line_number": 37
       },
       {
         "type": "Secret Keyword",
-        "filename": "tests/integration_tests/test2.yaml",
-        "hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997",
+        "filename": "tests/integration/config/test.yaml",
+        "hashed_secret": "4cac50cee3ad8e462728e711eac3e670753d5016",
         "is_verified": false,
-        "line_number": 325
+        "line_number": 103
       }
     ],
-    "tests/test_circllu.py": [
+    "tests/unit/modules/flowalerts/test_set_evidence.py": [
       {
         "type": "Hex High Entropy String",
-        "filename": "tests/test_circllu.py",
-        "hashed_secret": "125fbc14773f228e72f16d55be21bad750d30b19",
+        "filename": "tests/unit/modules/flowalerts/test_set_evidence.py",
+        "hashed_secret": "2e621bc4ae7af0e821c2a7f45b1e9ff83780ff3e",
         "is_verified": false,
-        "line_number": 78
+        "line_number": 1421
+      },
+      {
+        "type": "Hex High Entropy String",
+        "filename": "tests/unit/modules/flowalerts/test_set_evidence.py",
+        "hashed_secret": "b6aa435b77ffc6bb2f5c3a1647eb4a5e45a316ee",
+        "is_verified": false,
+        "line_number": 1428
       }
     ],
-    "tests/test_go_director.py": [
+    "tests/unit/modules/p2ptrust/trust/test_go_director.py": [
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "c7e0bc120ee16cfa95983e550301dd6555b23ea2",
         "is_verified": false,
         "line_number": 33
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "b586d0c2cdebc6defd0f81605610399ae8aaeed8",
         "is_verified": false,
         "line_number": 34
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "35f29e14d00c0fc6f35a1587c68889c112e24bcc",
         "is_verified": false,
         "line_number": 35
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "9afc0822c443ae9cecbb373900429e890da8e231",
         "is_verified": false,
         "line_number": 44
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "675c1e48dfa313a3cebe293cfc950c4ae2618d7f",
         "is_verified": false,
         "line_number": 45
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "a73d04a4137a59599d3ff20111762f1cddc8c470",
         "is_verified": false,
         "line_number": 46
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "5fe39eccf20874c5a842ed8bd7238d65a07bc053",
         "is_verified": false,
         "line_number": 96
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "165ace5923919eea8e15c4ad9279f68f35d4b5b8",
         "is_verified": false,
         "line_number": 97
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "9e039b5e2fb24f9d8feddf914c6dac07cb1eca66",
         "is_verified": false,
         "line_number": 98
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "dd31e02ba5fefdf6409c0cd23aaf360287626787",
         "is_verified": false,
         "line_number": 99
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "ef475528c602bfac33d4198ba750deb1e3fb904e",
         "is_verified": false,
         "line_number": 119
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "e486019fb66365e852c77158645768bb5038da87",
         "is_verified": false,
         "line_number": 120
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "032ab16f80631bb3df49b7b941bc95f187e893db",
         "is_verified": false,
         "line_number": 150
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "de261b4d0d37516dccffd67dcf0e216723ee3790",
         "is_verified": false,
         "line_number": 151
       },
       {
         "type": "Base64 High Entropy String",
-        "filename": "tests/test_go_director.py",
+        "filename": "tests/unit/modules/p2ptrust/trust/test_go_director.py",
         "hashed_secret": "81a86724010e489b2063575bfc9439a2c9ba45b5",
         "is_verified": false,
         "line_number": 152
       }
     ],
-    "tests/test_set_evidence.py": [
-      {
-        "type": "Hex High Entropy String",
-        "filename": "tests/test_set_evidence.py",
-        "hashed_secret": "2e621bc4ae7af0e821c2a7f45b1e9ff83780ff3e",
-        "is_verified": false,
-        "line_number": 1394
-      },
+    "tests/unit/modules/threat_intelligence/test_circllu.py": [
       {
         "type": "Hex High Entropy String",
-        "filename": "tests/test_set_evidence.py",
-        "hashed_secret": "b6aa435b77ffc6bb2f5c3a1647eb4a5e45a316ee",
+        "filename": "tests/unit/modules/threat_intelligence/test_circllu.py",
+        "hashed_secret": "125fbc14773f228e72f16d55be21bad750d30b19",
         "is_verified": false,
-        "line_number": 1401
+        "line_number": 78
       }
     ],
-    "tests/test_slips_utils.py": [
+    "tests/unit/modules/threat_intelligence/test_threat_intelligence.py": [
       {
         "type": "Hex High Entropy String",
-        "filename": "tests/test_slips_utils.py",
-        "hashed_secret": "0142af6be109425ab73fc66b36d8981ec919ba0b",
+        "filename": "tests/unit/modules/threat_intelligence/test_threat_intelligence.py",
+        "hashed_secret": "125fbc14773f228e72f16d55be21bad750d30b19",
         "is_verified": false,
-        "line_number": 20
+        "line_number": 639
       },
       {
         "type": "Hex High Entropy String",
-        "filename": "tests/test_slips_utils.py",
-        "hashed_secret": "10470c3b4b1fed12c3baac014be15fac67c6e815",
+        "filename": "tests/unit/modules/threat_intelligence/test_threat_intelligence.py",
+        "hashed_secret": "47784580758b20256793a484ce89c74d6724936c",
         "is_verified": false,
-        "line_number": 66
+        "line_number": 664
       },
       {
-        "type": "Basic Auth Credentials",
-        "filename": "tests/test_slips_utils.py",
-        "hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8",
+        "type": "Hex High Entropy String",
+        "filename": "tests/unit/modules/threat_intelligence/test_threat_intelligence.py",
+        "hashed_secret": "7d7c596baa46487dce0e2036e14982612f6b50da",
         "is_verified": false,
-        "line_number": 510
+        "line_number": 1221
       }
     ],
-    "tests/test_threat_intelligence.py": [
-      {
-        "type": "Hex High Entropy String",
-        "filename": "tests/test_threat_intelligence.py",
-        "hashed_secret": "125fbc14773f228e72f16d55be21bad750d30b19",
-        "is_verified": false,
-        "line_number": 638
-      },
+    "tests/unit/modules/update_manager/test_update_file_manager.py": [
       {
         "type": "Hex High Entropy String",
-        "filename": "tests/test_threat_intelligence.py",
-        "hashed_secret": "47784580758b20256793a484ce89c74d6724936c",
+        "filename": "tests/unit/modules/update_manager/test_update_file_manager.py",
+        "hashed_secret": "2431dcd348f1cc7e2d70c13eed1df1ee77452bfb",
         "is_verified": false,
-        "line_number": 663
+        "line_number": 321
       },
       {
         "type": "Hex High Entropy String",
-        "filename": "tests/test_threat_intelligence.py",
-        "hashed_secret": "7d7c596baa46487dce0e2036e14982612f6b50da",
+        "filename": "tests/unit/modules/update_manager/test_update_file_manager.py",
+        "hashed_secret": "13603b78502e7568249304e035f904029e4c81c6",
         "is_verified": false,
-        "line_number": 1220
+        "line_number": 791
       }
     ],
-    "tests/test_update_file_manager.py": [
+    "tests/unit/slips_files/common/test_slips_utils.py": [
       {
         "type": "Hex High Entropy String",
-        "filename": "tests/test_update_file_manager.py",
-        "hashed_secret": "2431dcd348f1cc7e2d70c13eed1df1ee77452bfb",
+        "filename": "tests/unit/slips_files/common/test_slips_utils.py",
+        "hashed_secret": "0142af6be109425ab73fc66b36d8981ec919ba0b",
         "is_verified": false,
-        "line_number": 322
+        "line_number": 20
       },
       {
         "type": "Hex High Entropy String",
-        "filename": "tests/test_update_file_manager.py",
-        "hashed_secret": "13603b78502e7568249304e035f904029e4c81c6",
+        "filename": "tests/unit/slips_files/common/test_slips_utils.py",
+        "hashed_secret": "10470c3b4b1fed12c3baac014be15fac67c6e815",
         "is_verified": false,
-        "line_number": 791
+        "line_number": 112
+      },
+      {
+        "type": "Basic Auth Credentials",
+        "filename": "tests/unit/slips_files/common/test_slips_utils.py",
+        "hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8",
+        "is_verified": false,
+        "line_number": 560
       }
     ],
     "webinterface/templates/app.html": [
@@ -7185,5 +7171,5 @@
       }
     ]
   },
-  "generated_at": "2026-03-30T22:35:13Z"
+  "generated_at": "2026-04-13T12:41:15Z"
 }

From e36a4879eb7dc409f606fd1ffa28e5fe4aee14a9 Mon Sep 17 00:00:00 2001
From: alya <alyaggomaa@gmail.com>
Date: Tue, 14 Apr 2026 22:06:47 +0200
Subject: [PATCH 21/26] Add logs for each step of the evidence handle
 shutdown_gracefully()

---
 managers/process_manager.py          | 17 +++++++++--------
 slips_files/core/evidence_handler.py |  4 ++++
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/managers/process_manager.py b/managers/process_manager.py
index 2c50143570..6a27b574de 100644
--- a/managers/process_manager.py
+++ b/managers/process_manager.py
@@ -596,7 +596,8 @@ def wait_for_processes_to_finish(
         self, processes_to_wait_for: List[Process]
     ) -> List[Process]:
         """
-        :param processes_to_wait_for: list of PIDs to wait for
+        :param processes_to_wait_for: list of PIDs to wait for, if one of
+        them is joined, a msg will be printed
         :return: list of PIDs that still are not done yet
         """
         alive_processes: List[Process] = []
@@ -690,7 +691,6 @@ def shutdown_interactive(
         returns 2 lists of alive children
         """
         # wait for the processes to be killed first as long as they want
-        # maximum time to wait is timeout_seconds
         alive_processes = self.wait_for_processes_to_finish(to_kill_first)
         if alive_processes:
             # update the list of processes to kill first with only the ones
@@ -702,11 +702,11 @@ def shutdown_interactive(
             # to join() em yet
             self.warn_about_pending_modules(alive_processes + to_kill_last)
             return to_kill_first, to_kill_last
-        else:
-            # all of them are killed
-            to_kill_first = []
-            # tell evidence to stop since all the modules are done
-            self.evidence_handler_termination_event.set()
+
+        # all of them are killed
+        to_kill_first = []
+        # tell evidence to stop since all the modules are done
+        self.evidence_handler_termination_event.set()
 
         alive_processes = self.wait_for_processes_to_finish(to_kill_last)
         if alive_processes:
@@ -835,7 +835,8 @@ def shutdown_gracefully(self):
                 self.termination_event.set()
 
                 try:
-                    # Wait timeout_seconds for all the processes to finish
+                    # Wait up to timeout_seconds for all the processes to
+                    # finish
                     while time.time() - method_start_time < timeout:
                         (
                             to_kill_first,
diff --git a/slips_files/core/evidence_handler.py b/slips_files/core/evidence_handler.py
index 40e8d9cf0c..e0d1f310b1 100644
--- a/slips_files/core/evidence_handler.py
+++ b/slips_files/core/evidence_handler.py
@@ -98,13 +98,16 @@ def read_configuration(self):
         )
 
     def shutdown_gracefully(self):
+        self.print("Stopping all workers.", log_to_logfiles_only=True)
         self.stop_evidence_workers()
         self.logger_stop_signal.set()
+        self.print("Stopping the logger thread.", log_to_logfiles_only=True)
         try:
             self.logger_thread.join(timeout=5)
         except Exception:
             pass
 
+        self.print("Stopping the used queues.", log_to_logfiles_only=True)
         used_queues = [
             self.evidence_worker_queue,
             self.evidence_logger_q,
@@ -113,6 +116,7 @@ def shutdown_gracefully(self):
         for q in used_queues:
             q.cancel_join_thread()
             q.close()
+        self.print("Done shutting down gracefully.")
 
     def stop_evidence_workers(self):
         for _ in self.evidence_worker_child_processes:

From ac5cc81803e7a531eb4291034c6dccc3d0e52942 Mon Sep 17 00:00:00 2001
From: alya <alyaggomaa@gmail.com>
Date: Tue, 14 Apr 2026 22:16:34 +0200
Subject: [PATCH 22/26] temporarily disable integration tests output dir
 cleanup to be able to check CI artifacts

---
 tests/integration/test_dataset.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py
index 13f46f835f..88605cb002 100644
--- a/tests/integration/test_dataset.py
+++ b/tests/integration/test_dataset.py
@@ -4,6 +4,7 @@
 This file tests all kinds of input in our dataset/
 It checks a random evidence and the total number of profiles in every file
 """
+from pathlib import PosixPath
 
 from tests.common_test_utils import (
     run_slips,
@@ -21,6 +22,11 @@
 alerts_file = "alerts.log"
 
 
+
+def cleanup(dir: PosixPath):
+    return
+    shutil.rmtree(dir)
+
 @pytest.mark.parametrize(
     "binetflow_path, expected_profiles, expected_evidence, output_dir, redis_port",
     [
@@ -91,7 +97,7 @@ def test_binetflow(
 
     log_file = output_dir / "alerts" / alerts_file
     assert is_evidence_present(log_file, expected_evidence) is True
-    shutil.rmtree(output_dir)
+    cleanup(output_dir)
 
 
 @pytest.mark.parametrize(
@@ -134,7 +140,7 @@ def test_suricata(suricata_path, output_dir, redis_port, expected_evidence):
 
     log_file = output_dir / "alerts" / alerts_file
     assert any(is_evidence_present(log_file, ev) for ev in expected_evidence)
-    shutil.rmtree(output_dir)
+    cleanup(output_dir)
 
 
 @pytest.mark.skipif(
@@ -173,4 +179,4 @@ def test_nfdump(nfdump_path, output_dir, redis_port):
 
     # log_file = os.path.join(output_dir, alerts_file)
     # assert is_evidence_present(log_file, expected_evidence) == True
-    shutil.rmtree(output_dir)
+    cleanup(output_dir)

From 685b067d36e075ad02d9ef358afe41d6e15bb7be Mon Sep 17 00:00:00 2001
From: alya <alyaggomaa@gmail.com>
Date: Wed, 15 Apr 2026 00:10:50 +0200
Subject: [PATCH 23/26] Fix github CI uploading the wrong integration tests
 output dir

---
 .github/workflows/integration-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 25e4eac083..9c4b7f88c8 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -68,4 +68,4 @@ jobs:
           # Replaces slashes with underscores for valid artifact naming
           name: ${{ github.run_id }}-${{ strategy.job-index }}-integration-output
           path: |
-            output/integration
+            output/integration_tests

From 0875b91c9b2fb8487190c24dbe97ddfccb9f9f69 Mon Sep 17 00:00:00 2001
From: alya <alyaggomaa@gmail.com>
Date: Wed, 15 Apr 2026 00:55:54 +0200
Subject: [PATCH 24/26] log the number of started profiler workers the input
 process is sending a "stop" msg to. (main race condition suspect)

---
 slips_files/core/input/input.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/slips_files/core/input/input.py b/slips_files/core/input/input.py
index de594dc0ec..676c10e2ea 100644
--- a/slips_files/core/input/input.py
+++ b/slips_files/core/input/input.py
@@ -131,8 +131,8 @@ def mark_self_as_done_processing(self):
         # ok this very terrible solution is to prevent the race condition
         # that happens when the analyzed file is extremely small, that the
         # input reads it, sends to the profiler queue, and reaches here,
-        # before the workers all start!! so we end up sending 0 stop msgs
-        # because 0 workers has started. this race condition causes slips
+        # before all the profiler workers even start!! so we end up sending 0
+        # stop msgs because 0 workers has started. this race condition causes slips
         # to stay up forever waiting for stop msgs that will never be recvd
         # in the profiler.
         # this says " if the input took less than 3mins to reach this line,
@@ -149,6 +149,12 @@ def mark_self_as_done_processing(self):
                 log_to_logfiles_only=True,
             )
             time.sleep(20)
+            started_workers: int = self.db.get_profiler_workers_started()
+            self.print(
+                f"Done giving slips time to start all profilers. "
+                f"started_workers: {started_workers}",
+                log_to_logfiles_only=True,
+            )
 
         started_workers: int = self.db.get_profiler_workers_started()
         self.print(

From 4737bcfd3ecd61fa2ec3ad3dbd04133f03028f56 Mon Sep 17 00:00:00 2001
From: alya <alyaggomaa@gmail.com>
Date: Wed, 15 Apr 2026 20:29:50 +0200
Subject: [PATCH 25/26] let profiler.py handle the shutdown of all the profiler
 workers

---
 slips_files/core/input/input.py | 41 ---------------------------------
 slips_files/core/profiler.py    | 22 ++++++++++++++++--
 2 files changed, 20 insertions(+), 43 deletions(-)

diff --git a/slips_files/core/input/input.py b/slips_files/core/input/input.py
index 676c10e2ea..8a3f4c07fd 100644
--- a/slips_files/core/input/input.py
+++ b/slips_files/core/input/input.py
@@ -2,8 +2,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 # Stratosphere Linux IPS. A machine-learning Intrusion Detection System
 # Copyright (C) 2021 Sebastian Garcia
-import time
-
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
@@ -128,45 +126,6 @@ def mark_self_as_done_processing(self):
             log_to_logfiles_only=True,
         )
 
-        # ok this very terrible solution is to prevent the race condition
-        # that happens when the analyzed file is extremely small, that the
-        # input reads it, sends to the profiler queue, and reaches here,
-        # before all the profiler workers even start!! so we end up sending 0
-        # stop msgs because 0 workers has started. this race condition causes slips
-        # to stay up forever waiting for stop msgs that will never be recvd
-        # in the profiler.
-        # this says " if the input took less than 3mins to reach this line,
-        # give slips extra 10s justt o make sure profilers are started
-        # before sending the stop msgs"
-        max_time_slips_can_take_to_start_all_processes = 60 * 3
-        if (
-            time.time()
-            < float(self.db.get_slips_start_time())
-            + max_time_slips_can_take_to_start_all_processes
-        ):
-            self.print(
-                "Giving Slips time to start all profilers.",
-                log_to_logfiles_only=True,
-            )
-            time.sleep(20)
-            started_workers: int = self.db.get_profiler_workers_started()
-            self.print(
-                f"Done giving slips time to start all profilers. "
-                f"started_workers: {started_workers}",
-                log_to_logfiles_only=True,
-            )
-
-        started_workers: int = self.db.get_profiler_workers_started()
-        self.print(
-            f"Sending {started_workers} stop "
-            f"signals for the profiler workers.",
-            log_to_logfiles_only=True,
-        )
-        for _ in range(started_workers):
-            self.profiler_queue.put("stop")
-
-        # this has to be done after the sentinel is put in the queue,
-        # or else we'll have a deadlock when slips is stopping
         if self.is_input_done_event is not None:
             self.is_input_done_event.set()
 
diff --git a/slips_files/core/profiler.py b/slips_files/core/profiler.py
index c23c0ac227..8defe5f8c1 100644
--- a/slips_files/core/profiler.py
+++ b/slips_files/core/profiler.py
@@ -170,11 +170,17 @@ def stop_profiler_workers(self):
         """
         wait as long as needed foreach worker to stop
         """
-        # ensure we don't block forever waiting for workers that will never
-        # receive the stop sentinel
         if self.is_input_done_event is not None:
             self.is_input_done_event.wait()
 
+        workers_count = len(self.profiler_child_processes)
+        self.print(
+            f"Sending {workers_count} stop signals for the profiler workers.",
+            log_to_logfiles_only=True,
+        )
+        for _ in range(workers_count):
+            self.profiler_queue.put("stop")
+
         for process in self.profiler_child_processes:
             try:
                 process.join()
@@ -211,6 +217,13 @@ def get_msg_from_queue(self, q: multiprocessing.Queue):
         except Exception:
             return None
 
+    def is_done_receiving_input(self) -> bool:
+        """Return True when input has signaled that no more flows will arrive."""
+        return (
+            self.is_input_done_event is not None
+            and self.is_input_done_event.is_set()
+        )
+
     def start_profiler_worker(self, worker_id: int = None):
         """starts A profiler worker for faster processing of the flows"""
         worker_name = f"profiler_worker_process_{worker_id}"
@@ -324,6 +337,9 @@ def _check_if_high_throughput_and_add_workers(self):
         Checks for input and profile flows/sec imbalance and adds more
         profiler workers if needed.
         """
+        if self.is_done_receiving_input():
+            return
+
         if self.max_workers_started():
             return
 
@@ -387,6 +403,8 @@ def main(self):
         msg = None
         while not msg:
             msg = self.get_msg_from_queue(self.profiler_queue)
+            if not msg and self.is_done_receiving_input():
+                return
             time.sleep(0.1)
 
         self.input_handler_obj = self.get_handler_obj(msg)

From 8c5be25b0e01f0abf11ca5e79862d46e946d9ad6 Mon Sep 17 00:00:00 2001
From: alya <alyaggomaa@gmail.com>
Date: Wed, 15 Apr 2026 20:29:58 +0200
Subject: [PATCH 26/26] update unit tests

---
 .../unit/slips_files/core/input/test_input.py | 17 ++++++
 tests/unit/slips_files/core/test_profiler.py  | 56 ++++++++++++++++++-
 2 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/tests/unit/slips_files/core/input/test_input.py b/tests/unit/slips_files/core/input/test_input.py
index 4db7e39cfb..053662beda 100644
--- a/tests/unit/slips_files/core/input/test_input.py
+++ b/tests/unit/slips_files/core/input/test_input.py
@@ -396,6 +396,23 @@ def test_give_profiler(line, input_type, expected_line, expected_input_type):
     assert line_sent["input_type"] == expected_input_type
 
 
+def test_mark_self_as_done_processing_signals_eof_without_worker_stops():
+    """Test input signals EOF without sending profiler worker sentinels."""
+    input_process = ModuleFactory().create_input_obj("", InputType.STDIN)
+    input_process.profiler_queue = Mock()
+    input_process.done_processing = Mock()
+    input_process.is_input_done_event = Mock()
+    input_process.is_profiler_done_event = Mock()
+
+    type(input_process).mark_self_as_done_processing(input_process)
+
+    input_process.is_input_done_event.set.assert_called_once()
+    input_process.is_profiler_done_event.wait.assert_called_once()
+    input_process.done_processing.release.assert_called_once()
+    input_process.profiler_queue.put.assert_not_called()
+    input_process.db.get_profiler_workers_started.assert_not_called()
+
+
 def test_get_file_handle_existing_file(tmp_path):
     """
     Test that the get_file_handle method correctly
diff --git a/tests/unit/slips_files/core/test_profiler.py b/tests/unit/slips_files/core/test_profiler.py
index a101defab1..f7dc794389 100644
--- a/tests/unit/slips_files/core/test_profiler.py
+++ b/tests/unit/slips_files/core/test_profiler.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 """Unit tests for the profiler core process."""
 
-from unittest.mock import Mock, patch
+from unittest.mock import Mock, call, patch
 
 import pytest
 from tests.module_factory import ModuleFactory
@@ -111,6 +111,60 @@ def test_shutdown_gracefully(monkeypatch):
     profiler.mark_self_as_done_processing.assert_called_once()
 
 
+def test_stop_profiler_workers_sends_stop_per_started_worker():
+    """Test profiler owns worker stop sentinels and joins each worker."""
+    profiler = ModuleFactory().create_profiler_obj()
+    workers = [Mock(), Mock(), Mock()]
+    profiler.profiler_child_processes = workers
+    profiler.profiler_queue = Mock()
+    profiler.is_input_done_event = Mock()
+
+    profiler.stop_profiler_workers()
+
+    profiler.is_input_done_event.wait.assert_called_once()
+    assert profiler.profiler_queue.put.call_count == len(workers)
+    profiler.profiler_queue.put.assert_has_calls([call("stop")] * len(workers))
+    for worker in workers:
+        worker.join.assert_called_once()
+    assert profiler.did_all_workers_stop.is_set()
+
+
+@pytest.mark.parametrize(
+    "event, expected",
+    [
+        (None, False),
+        (Mock(is_set=Mock(return_value=False)), False),
+        (Mock(is_set=Mock(return_value=True)), True),
+    ],
+)
+def test_is_done_receiving_input(event, expected):
+    """Test profiler detects the input EOF event state."""
+    profiler = ModuleFactory().create_profiler_obj()
+    profiler.is_input_done_event = event
+
+    assert profiler.is_done_receiving_input() is expected
+
+
+def test_main_returns_when_input_done_before_first_msg():
+    """Test profiler exits when input ends without sending any flows."""
+    profiler = ModuleFactory().create_profiler_obj()
+    profiler.get_msg_from_queue = Mock(return_value=None)
+    profiler.is_done_receiving_input = Mock(return_value=True)
+
+    assert profiler.main() is None
+
+
+def test_high_throughput_check_skips_new_workers_after_input_done():
+    """Test profiler does not add workers after input has ended."""
+    profiler = ModuleFactory().create_profiler_obj()
+    profiler.is_done_receiving_input = Mock(return_value=True)
+    profiler.max_workers_started = Mock()
+
+    profiler._check_if_high_throughput_and_add_workers()
+
+    profiler.max_workers_started.assert_not_called()
+
+
 def test_notify_observers_no_observers():
     profiler = ModuleFactory().create_profiler_obj()
     test_msg = {"action": "test"}