From 48ffcbdcbca9368bfd37eef4bcc8a3b27c04de50 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Wed, 13 Dec 2023 16:15:45 -0500 Subject: [PATCH] Fixes to modular DFP examples and benchmarks (#1429) - Fix modular DFP benchmark and examples after `DFPArgParser`updates from PR #1245 - Shorten `load` file paths in example control messages - Doc fixes Fixes #1431 Fixes #1432 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Eli Fajardo (https://github.com/efajardo-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1429 --- .../10_modular_pipeline_digital_fingerprinting.md | 8 ++++---- .../production/morpheus/benchmarks/README.md | 8 ++++---- .../benchmarks/benchmark_conf_generator.py | 2 ++ .../control_messages/azure_payload_inference.json | 2 +- .../azure_payload_load_train_inference.json | 4 ++-- .../azure_payload_load_training.json | 2 +- .../control_messages/azure_payload_lti.json | 4 ++-- .../control_messages/azure_payload_training.json | 2 +- .../azure_streaming_inference.json | 2 +- .../control_messages/azure_streaming_lti.json | 4 ++-- .../azure_streaming_training.json | 2 +- .../control_messages/duo_payload_inference.json | 2 +- .../duo_payload_load_train_inference.json | 4 ++-- .../control_messages/duo_payload_lti.json | 4 ++-- .../control_messages/duo_payload_only_load.json | 2 +- .../control_messages/duo_payload_training.json | 2 +- .../control_messages/duo_streaming_inference.json | 2 +- .../control_messages/duo_streaming_lti.json | 4 ++-- .../control_messages/duo_streaming_only_load.json | 2 +- .../control_messages/duo_streaming_payload.json | 4 ++-- .../control_messages/duo_streaming_training.json | 2 +- .../dfp_integrated_training_batch_pipeline.py | 14 ++++++++++---- .../dfp_integrated_training_streaming_pipeline.py | 15 +++++++++++---- 23 files changed, 56 insertions(+), 41 deletions(-) diff --git a/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md b/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md index 408d6e6804..e30952dd41 100644 --- a/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md +++ b/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md @@ -527,7 +527,7 @@ From the `examples/digital_fingerprinting/production` dir, run: ```bash docker compose run morpheus_pipeline bash ``` -To run the DFP pipelines with the example datasets within the container, run: +To run the DFP pipelines with the example datasets within the container, run the following from `examples/digital_fingerprinting/production/morpheus`: * Duo Training Pipeline ```bash @@ -560,7 +560,7 @@ To run the DFP pipelines with the example datasets within the container, run: --start_time "2022-08-01" \ --duration "60d" \ --train_users generic \ - --input_file "./control_messages/duo_payload_load_train_inference.json" + --input_file "./control_messages/duo_payload_load_train_inference.json" ``` * Azure Training Pipeline @@ -594,7 +594,7 @@ To run the DFP pipelines with the example datasets within the container, run: --start_time "2022-08-01" \ --duration "60d" \ --train_users generic \ - --input_file "./control_messages/azure_payload_load_train_inference.json" + --input_file "./control_messages/azure_payload_load_train_inference.json" ``` ### Output Fields @@ -615,4 +615,4 @@ In addition to this, for each input feature the following output fields will exi | `_z_loss` | FLOAT | The loss z-score | | `_pred` | FLOAT | The predicted value | -Refer to [DFPInferenceStage](6_digital_fingerprinting_reference.md#inference-stage-dfpinferencestage) for more on these fields. \ No newline at end of file +Refer to [DFPInferenceStage](6_digital_fingerprinting_reference.md#inference-stage-dfpinferencestage) for more on these fields. diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md b/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md index cad1fe96d6..4ff9f77e99 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md @@ -103,11 +103,11 @@ To ensure the [file_to_df_loader.py](../../../../../morpheus/loaders/file_to_df_ export MORPHEUS_FILE_DOWNLOAD_TYPE=dask ``` -Benchmarks for an individual workflow can be run using the following in your dev container: +Benchmarks for an individual workflow can be run from `examples/digital_fingerprinting/production/morpheus` in your dev container: ``` -pytest -s --log-level=WARN --benchmark-enable --benchmark-warmup=on --benchmark-warmup-iterations=1 --benchmark-autosave test_bench_e2e_dfp_pipeline.py:: +pytest -s --log-level=WARN --benchmark-enable --benchmark-warmup=on --benchmark-warmup-iterations=1 --benchmark-autosave benchmarks/test_bench_e2e_dfp_pipeline.py:: ``` The `-s` option allows outputs of pipeline execution to be displayed so you can ensure there are no errors while running your benchmarks. @@ -137,12 +137,12 @@ The `--benchmark-warmup` and `--benchmark-warmup-iterations` options are used to For example, to run E2E benchmarks on the DFP training (modules) workflow on the azure logs: ``` -pytest -s --benchmark-enable --benchmark-warmup=on --benchmark-warmup-iterations=1 --benchmark-autosave test_bench_e2e_dfp_pipeline.py::test_dfp_modules_azure_payload_lti_e2e +pytest -s --benchmark-enable --benchmark-warmup=on --benchmark-warmup-iterations=1 --benchmark-autosave benchmarks/test_bench_e2e_dfp_pipeline.py::test_dfp_modules_azure_payload_lti_e2e ``` To run E2E benchmarks on all workflows: ``` -pytest -s --benchmark-enable --benchmark-warmup=on --benchmark-warmup-iterations=1 --benchmark-autosave test_bench_e2e_dfp_pipeline.py +pytest -s --benchmark-enable --benchmark-warmup=on --benchmark-warmup-iterations=1 --benchmark-autosave benchmarks/test_bench_e2e_dfp_pipeline.py ``` Here are the benchmark comparisons for individual tests. When the control message type is "payload", the rolling window stage is bypassed, whereas when it is "streaming", the windows are created with historical data. diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py index d86ddbc660..0cb187a51b 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py @@ -161,6 +161,8 @@ def get_module_conf(self): source=(self.source), tracking_uri=mlflow.get_tracking_uri(), silence_monitors=True, + mlflow_experiment_name_formatter=self._get_experiment_name_formatter(), + mlflow_model_name_formatter=self._get_model_name_formatter(), train_users='generic') dfp_arg_parser.init() config_generator = ConfigGenerator(self.pipe_config, dfp_arg_parser, self.get_schema()) diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_inference.json b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_inference.json index a258cf3fd5..f8d2b9ed3a 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_inference.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_inference.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-inference-data/*.json" + "../../../data/dfp/azure-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_load_train_inference.json b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_load_train_inference.json index 0286129151..bf0a3771ef 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_load_train_inference.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_load_train_inference.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-training-data/*.json" + "../../../data/dfp/azure-training-data/*.json" ] } }, @@ -28,7 +28,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-inference-data/*.json" + "../../../data/dfp/azure-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_load_training.json b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_load_training.json index d6e028d4eb..dad09e6062 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_load_training.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_load_training.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-training-data/*.json" + "../../../data/dfp/azure-training-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_lti.json b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_lti.json index 97053f82a4..1b1e226145 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_lti.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_lti.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-training-data/*.json" + "../../../data/dfp/azure-training-data/*.json" ] } }, @@ -34,7 +34,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-inference-data/*.json" + "../../../data/dfp/azure-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_training.json b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_training.json index d6e028d4eb..dad09e6062 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_training.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_payload_training.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-training-data/*.json" + "../../../data/dfp/azure-training-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_inference.json b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_inference.json index d122241e8c..9c5d889d5c 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_inference.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_inference.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-inference-data/*.json" + "../../../data/dfp/azure-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_lti.json b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_lti.json index f798a1a475..7a28c85d73 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_lti.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_lti.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-training-data/*.json" + "../../../data/dfp/azure-training-data/*.json" ] } }, @@ -34,7 +34,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-inference-data/*.json" + "../../../data/dfp/azure-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_training.json b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_training.json index 8397489cb2..882b23a0a3 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_training.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/azure_streaming_training.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/azure-training-data/*.json" + "../../../data/dfp/azure-training-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_inference.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_inference.json index 7b4bb9672a..ebd6669be7 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_inference.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_inference.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-inference-data/*.json" + "../../../data/dfp/duo-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_load_train_inference.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_load_train_inference.json index e378d82760..e30f66ccfe 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_load_train_inference.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_load_train_inference.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-training-data/*.json" + "../../../data/dfp/duo-training-data/*.json" ] } }, @@ -28,7 +28,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-inference-data/*.json" + "../../../data/dfp/duo-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_lti.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_lti.json index 07c69233ba..382ff22808 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_lti.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_lti.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-training-data/*.json" + "../../../data/dfp/duo-training-data/*.json" ] } }, @@ -34,7 +34,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-inference-data/*.json" + "../../../data/dfp/duo-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_only_load.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_only_load.json index 3a214ea810..ed69cbf5af 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_only_load.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_only_load.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-inference-data/*.json" + "../../../data/dfp/duo-inference-data/*.json" ] } } diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_training.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_training.json index df21751d36..2b9995875f 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_training.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_payload_training.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-training-data/*.json" + "../../../data/dfp/duo-training-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_inference.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_inference.json index fc1db57669..2b033b9d7c 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_inference.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_inference.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-inference-data/*.json" + "../../../data/dfp/duo-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_lti.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_lti.json index 91d41cad22..d74bae952b 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_lti.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_lti.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../../examples/data/dfp/duo-training-data/*.json" + "../../../data/dfp/duo-training-data/*.json" ] } }, @@ -34,7 +34,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../../examples/data/dfp/duo-inference-data/*.json" + "../../../data/dfp/duo-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_only_load.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_only_load.json index 28b2a2f7f1..205842d72b 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_only_load.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_only_load.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-inference-data/*.json" + "../../../data/dfp/duo-inference-data/*.json" ] } } diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_payload.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_payload.json index ab5dadf0e5..3daf318961 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_payload.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_payload.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-training-data/*.json" + "../../../data/dfp/duo-training-data/*.json" ] } }, @@ -28,7 +28,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-inference-data/*.json" + "../../../data/dfp/duo-inference-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_training.json b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_training.json index 73cc9046d9..4486149127 100644 --- a/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_training.json +++ b/examples/digital_fingerprinting/production/morpheus/control_messages/duo_streaming_training.json @@ -7,7 +7,7 @@ "properties": { "loader_id": "fsspec", "files": [ - "../../../../examples/data/dfp/duo-training-data/*.json" + "../../../data/dfp/duo-training-data/*.json" ] } }, diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py index c18da19ee4..a1ab2cb9c6 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py @@ -103,12 +103,14 @@ help=("The MLflow tracking URI to connect to the tracking backend.")) @click.option('--mlflow_experiment_name_template', type=str, - default="dfp/{source}/training/{reg_model_name}", - help="The MLflow experiment name template to use when logging experiments. ") + default=None, + help=("The MLflow experiment name template to use when logging experiments." + "If None, defaults to dfp/source/training/{reg_model_name}")) @click.option('--mlflow_model_name_template', type=str, - default="DFP-{source}-{user_id}", - help="The MLflow model name template to use when logging models. ") + default=None, + help=("The MLflow model name template to use when logging models." + "If None, defaults to DFP-source-{user_id}")) @click.option("--disable_pre_filtering", is_flag=True, help=("Enabling this option will skip pre-filtering of json messages. " @@ -140,6 +142,10 @@ def run_pipeline(source: str, if (skip_user and only_user): logging.error("Option --skip_user and --only_user are mutually exclusive. Exiting") + if mlflow_experiment_name_template is None: + mlflow_experiment_name_template = f'dfp/{source}/training/' + '{reg_model_name}' + if mlflow_model_name_template is None: + mlflow_model_name_template = f'DFP-{source}-' + '{user_id}' dfp_arg_parser = DFPArgParser(skip_user, only_user, start_time, diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py index e60792d6d3..29e7893d04 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py @@ -103,12 +103,14 @@ help=("The MLflow tracking URI to connect to the tracking backend.")) @click.option('--mlflow_experiment_name_template', type=str, - default="dfp/{source}/training/{reg_model_name}", - help="The MLflow experiment name template to use when logging experiments. ") + default=None, + help=("The MLflow experiment name template to use when logging experiments." + "If None, defaults to dfp/source/training/{reg_model_name}")) @click.option('--mlflow_model_name_template', type=str, - default="DFP-{source}-{user_id}", - help="The MLflow model name template to use when logging models. ") + default=None, + help=("The MLflow model name template to use when logging models." + "If None, defaults to DFP-source-{user_id}")) @click.option('--bootstrap_servers', type=str, default="localhost:9092", @@ -152,6 +154,11 @@ def run_pipeline(source: str, if (skip_user and only_user): logging.error("Option --skip_user and --only_user are mutually exclusive. Exiting") + if mlflow_experiment_name_template is None: + mlflow_experiment_name_template = f'dfp/{source}/training/' + '{reg_model_name}' + if mlflow_model_name_template is None: + mlflow_model_name_template = f'DFP-{source}-' + '{user_id}' + dfp_arg_parser = DFPArgParser(skip_user, only_user, start_time,