From a343c77b9fbc0a1f6e32de5b009d7d781788f7c5 Mon Sep 17 00:00:00 2001 From: Michael Demoret <42954918+mdemoret-nv@users.noreply.github.com> Date: Wed, 23 Aug 2023 12:00:22 -0400 Subject: [PATCH] Fixing linting errors which could not be resolved in 23.07 (#1082) There were too many linting errors to fix in 23.07 burndown so a PR has been created to fix this in 23.11. Closes #1050 Authors: - Michael Demoret (https://github.com/mdemoret-nv) - David Gardner (https://github.com/dagardner-nv) Approvers: - David Gardner (https://github.com/dagardner-nv) - Tad ZeMicheal (https://github.com/tzemicheal) URL: https://github.com/nv-morpheus/Morpheus/pull/1082 --- docker/conda/environments/cuda11.8_dev.yml | 4 +- .../abp_pcap_preprocessing.py | 15 ++- .../recipient_features_stage.py | 2 +- .../{ => _lib}/pass_thru.py | 8 +- .../developer_guide/3_simple_cpp_stage/run.py | 3 +- .../morpheus/benchmarks/conftest.py | 8 +- .../benchmarks/test_bench_e2e_dfp_pipeline.py | 16 +-- .../dfp/modules/dfp_postprocessing.py | 2 +- .../dfp/stages/dfp_file_batcher_stage.py | 2 +- .../dfp/stages/dfp_inference_stage.py | 8 +- .../dfp/stages/dfp_mlflow_model_writer.py | 26 ++-- .../dfp/stages/dfp_postprocessing_stage.py | 2 +- .../dfp/stages/dfp_rolling_window_stage.py | 6 +- .../dfp/stages/dfp_split_users_stage.py | 3 +- .../morpheus/dfp/stages/dfp_training.py | 4 +- .../morpheus/dfp/utils/dfp_arg_parser.py | 8 +- .../morpheus/dfp/utils/model_cache.py | 37 +++--- .../morpheus/dfp/utils/user_model_manager.py | 15 ++- .../dfp_integrated_training_batch_pipeline.py | 6 +- ..._integrated_training_streaming_pipeline.py | 6 +- .../stages/graph_sage_stage.py | 7 +- examples/log_parsing/inference.py | 32 ++--- examples/log_parsing/postprocessing.py | 4 +- .../stages/create_features.py | 4 +- .../stages/preprocessing.py | 2 +- .../fraud-detection-models/training.py | 36 +++--- .../phish_bert_training_script.py | 6 +- .../root-cause-models/root_cause_bert.py | 6 +- .../sid-minibert-20230424-script.py | 12 +- morpheus.code-workspace | 10 +- morpheus/cli/register_stage.py | 76 +++++++----- morpheus/io/deserializers.py | 8 +- morpheus/messages/memory/tensor_memory.py | 4 +- morpheus/messages/multi_message.py | 2 +- morpheus/messages/multi_tensor_message.py | 4 +- morpheus/models/dfencoder/__init__.py | 2 +- morpheus/modules/filter_detections.py | 2 +- morpheus/parsers/splunk_notable_parser.py | 6 +- morpheus/parsers/url_parser.py | 2 +- morpheus/pipeline/pipeline.py | 94 ++++++++------- morpheus/pipeline/receiver.py | 8 +- morpheus/pipeline/single_port_stage.py | 14 ++- morpheus/pipeline/stream_wrapper.py | 24 ++-- morpheus/stages/general/trigger_stage.py | 4 - morpheus/stages/inference/inference_stage.py | 24 ++-- .../stages/input/autoencoder_source_stage.py | 17 +-- .../control_message_file_source_stage.py | 2 +- morpheus/stages/input/kafka_source_stage.py | 15 ++- .../stages/output/compare_dataframe_stage.py | 16 +-- .../stages/output/write_to_kafka_stage.py | 8 +- .../postprocess/filter_detections_stage.py | 11 +- .../stages/postprocess/ml_flow_drift_stage.py | 4 +- .../stages/postprocess/serialize_stage.py | 18 ++- .../stages/postprocess/timeseries_stage.py | 50 ++++---- morpheus/stages/preprocess/train_ae_stage.py | 8 +- morpheus/utils/module_utils.py | 8 +- morpheus/utils/monitor_utils.py | 41 ++++--- pyproject.toml | 27 ++++- tests/conftest.py | 104 ++++++++++++++-- tests/dfencoder/test_autoencoder.py | 113 +++++++++--------- .../test_dfencoder_distributed_e2e.py | 5 +- tests/dfencoder/test_dfencoder_e2e.py | 9 +- .../developer_guide/test_pass_thru.py | 21 ++-- .../test_dfp_file_batcher_stage.py | 17 +-- .../test_dfp_inference_stage.py | 10 +- .../test_dfp_mlflow_model_writer.py | 24 ++-- .../test_dfp_postprocessing_stage.py | 7 +- .../test_dfp_rolling_window_stage.py | 10 +- .../test_dfp_split_users_stage.py | 18 +-- .../test_dfp_training.py | 4 +- .../test_dfp_viz_postproc.py | 10 +- .../gnn_fraud_detection_pipeline/conftest.py | 16 ++- .../test_graph_construction_stage.py | 24 ++-- .../test_graph_sage_stage.py | 39 +++--- tests/examples/log_parsing/conftest.py | 4 +- tests/examples/log_parsing/test_inference.py | 5 +- tests/examples/log_parsing/test_pipe.py | 2 +- .../examples/ransomware_detection/conftest.py | 5 +- .../test_preprocessing.py | 20 +++- tests/io/test_loader_registry.py | 4 +- tests/modules/test_from_control_message.py | 8 +- tests/test_add_classifications_stage.py | 26 ++-- tests/test_add_classifications_stage_pipe.py | 4 +- tests/test_add_scores_stage.py | 20 ++-- tests/test_add_scores_stage_pipe.py | 4 +- tests/test_appshield_source_stage.py | 7 +- tests/test_concat_df.py | 5 +- tests/test_conftest.py | 33 +++-- tests/test_deserialize_stage_pipe.py | 3 +- tests/test_file_in_out.py | 1 + tests/test_inference_worker.py | 26 ++-- tests/test_ip.py | 50 ++++---- tests/test_kafka_source_stage_pipe.py | 4 +- tests/test_linear_modules_stage.py | 2 +- tests/test_multi_message.py | 38 ++++-- tests/test_multi_port_modules_stage.py | 3 +- tests/test_multi_port_pipeline.py | 2 +- tests/test_phishing.py | 2 +- tests/test_phishing_kafka.py | 6 +- tests/test_pipe_viz.py | 1 + tests/test_preallocation_pipe.py | 14 +-- tests/test_sid_kafka.py | 6 +- tests/test_url_parser.py | 48 ++++---- tests/utils/inference_worker.py | 7 ++ tests/utils/stages/conv_msg.py | 8 +- tests/utils/test_directories.py | 2 +- 106 files changed, 919 insertions(+), 681 deletions(-) rename examples/developer_guide/3_simple_cpp_stage/{ => _lib}/pass_thru.py (89%) diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index ab8cf4faa2..cea252c9a4 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -82,8 +82,8 @@ dependencies: - python-confluent-kafka=1.9.2 - python-graphviz - python=3.10 - - pytorch=2.0.1 - pytorch-cuda + - pytorch=2.0.1 - rapidjson=1.1.0 - scikit-build=0.17.1 - scikit-learn=1.2.2 @@ -91,8 +91,8 @@ dependencies: - sphinx_rtd_theme - sqlalchemy<2.0 # 2.0 is incompatible with pandas=1.3 - sysroot_linux-64=2.17 - - tritonclient=2.26 # Required by NvTabular, force the version, so we get protobufs compatible with 4.21 - tqdm=4 + - tritonclient=2.26 # Required by NvTabular, force the version, so we get protobufs compatible with 4.21 - typing_utils=0.1 - watchdog=2.1 - yapf=0.40.1 diff --git a/examples/abp_pcap_detection/abp_pcap_preprocessing.py b/examples/abp_pcap_detection/abp_pcap_preprocessing.py index 9986ec5c9b..4519c516f9 100644 --- a/examples/abp_pcap_detection/abp_pcap_preprocessing.py +++ b/examples/abp_pcap_detection/abp_pcap_preprocessing.py @@ -21,7 +21,6 @@ import cudf -import morpheus._lib.stages as _stages from morpheus.cli.register_stage import register_stage from morpheus.common import TypeId from morpheus.config import Config @@ -98,17 +97,17 @@ def pre_process_batch(x: MultiMessage, fea_len: int, fea_cols: typing.List[str], df["timestamp"] = x.get_meta("timestamp").astype("int64") def round_time_kernel(timestamp, rollup_time, secs): - for i, ts in enumerate(timestamp): - x = ts % secs + for i, time in enumerate(timestamp): + x = time % secs y = 1 - (x / secs) delta = y * secs - rollup_time[i] = ts + delta + rollup_time[i] = time + delta df = df.apply_rows( round_time_kernel, incols=["timestamp"], - outcols=dict(rollup_time=np.int64), - kwargs=dict(secs=60000000), + outcols={"rollup_time": np.int64}, + kwargs={"secs": 60000000}, ) df["rollup_time"] = cudf.to_datetime(df["rollup_time"], unit="us").dt.strftime("%Y-%m-%d %H:%M") @@ -149,7 +148,7 @@ def round_time_kernel(timestamp, rollup_time, secs): # syn/all - Number of flows with SYN flag to all flows # fin/all - Number of flows with FIN flag to all flows for col in ["rst", "syn", "fin"]: - dst_col = "{}/all".format(col) + dst_col = f"{col}/all" grouped_df[dst_col] = grouped_df[col] / grouped_df["all"] # Adding index column to retain the order of input messages. @@ -197,4 +196,4 @@ def _get_preprocess_fn(self) -> typing.Callable[[MultiMessage], MultiInferenceMe req_cols=self.req_cols) def _get_preprocess_node(self, builder: mrc.Builder): - return _stages.AbpPcapPreprocessingStage(builder, self.unique_name) + raise NotImplementedError("C++ node not implemented for this stage") diff --git a/examples/developer_guide/2_1_real_world_phishing/recipient_features_stage.py b/examples/developer_guide/2_1_real_world_phishing/recipient_features_stage.py index 1b456f4905..0e933e909c 100644 --- a/examples/developer_guide/2_1_real_world_phishing/recipient_features_stage.py +++ b/examples/developer_guide/2_1_real_world_phishing/recipient_features_stage.py @@ -45,7 +45,7 @@ def __init__(self, config: Config, sep_token: str = '[SEP]'): if config.mode != PipelineModes.NLP: raise RuntimeError("RecipientFeaturesStage must be used in a pipeline configured for NLP") - if len(sep_token): + if len(sep_token) > 0: self._sep_token = sep_token else: raise ValueError("sep_token cannot be an empty string") diff --git a/examples/developer_guide/3_simple_cpp_stage/pass_thru.py b/examples/developer_guide/3_simple_cpp_stage/_lib/pass_thru.py similarity index 89% rename from examples/developer_guide/3_simple_cpp_stage/pass_thru.py rename to examples/developer_guide/3_simple_cpp_stage/_lib/pass_thru.py index 12de92f7b1..18042750ac 100644 --- a/examples/developer_guide/3_simple_cpp_stage/pass_thru.py +++ b/examples/developer_guide/3_simple_cpp_stage/_lib/pass_thru.py @@ -18,18 +18,17 @@ import mrc from mrc.core import operators as ops -from _lib import morpheus_example as morpheus_example_cpp from morpheus.cli.register_stage import register_stage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stream_pair import StreamPair -@register_stage("pass-thru") +@register_stage("pass-thru-cpp") class PassThruStage(SinglePortStage): @property def name(self) -> str: - return "pass-thru" + return "pass-thru-cpp" def accepted_types(self) -> typing.Tuple: return (typing.Any, ) @@ -43,6 +42,9 @@ def on_data(self, message: typing.Any): def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: if self._build_cpp_node(): + from . import morpheus_example as morpheus_example_cpp + + # pylint: disable=c-extension-no-member node = morpheus_example_cpp.PassThruStage(builder, self.unique_name) else: node = builder.make_node(self.unique_name, ops.map(self.on_data)) diff --git a/examples/developer_guide/3_simple_cpp_stage/run.py b/examples/developer_guide/3_simple_cpp_stage/run.py index 1a9bcfc90b..a01d5fa161 100755 --- a/examples/developer_guide/3_simple_cpp_stage/run.py +++ b/examples/developer_guide/3_simple_cpp_stage/run.py @@ -17,8 +17,7 @@ import logging import os -from pass_thru import PassThruStage - +from _lib.pass_thru import PassThruStage from morpheus.config import Config from morpheus.pipeline import LinearPipeline from morpheus.stages.general.monitor_stage import MonitorStage diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/conftest.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/conftest.py index ef3ea99924..649c4666be 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/conftest.py +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/conftest.py @@ -28,7 +28,7 @@ def get_json_lines_count(filename): return len(lines) -def pytest_benchmark_update_json(config, benchmarks, output_json): +def pytest_benchmark_update_json(_, __, output_json): curr_dir = path.dirname(path.abspath(__file__)) @@ -58,9 +58,9 @@ def pytest_benchmark_update_json(config, benchmarks, output_json): elif "glob_path" in PIPELINES_CONF[bench["name"]]: source_files_glob = path.join(curr_dir, PIPELINES_CONF[bench["name"]]["glob_path"]) - for fn in glob.glob(source_files_glob): - line_count += get_json_lines_count(fn) - byte_count += path.getsize(fn) + for filename in glob.glob(source_files_glob): + line_count += get_json_lines_count(filename) + byte_count += path.getsize(filename) elif "message_path" in PIPELINES_CONF[bench["name"]]: source_message_glob = path.join(curr_dir, PIPELINES_CONF[bench["name"]]["message_path"]) for message_fn in glob.glob(source_message_glob): diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py index 18a9768344..7a7a4ffc6d 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py @@ -20,7 +20,7 @@ import typing import boto3 -import dfp.modules # noqa: F401 +import dfp.modules # noqa: F401 # pylint:disable=unused-import import pytest from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage @@ -35,13 +35,13 @@ from dfp.utils.regex_utils import iso_date_regex from dfp.utils.schema_utils import Schema -import morpheus.loaders # noqa: F401 -import morpheus.modules # noqa: F401 +import morpheus.loaders # noqa: F401 # pylint:disable=unused-import +import morpheus.modules # noqa: F401 # pylint:disable=unused-import from benchmarks.benchmark_conf_generator import BenchmarkConfGenerator from benchmarks.benchmark_conf_generator import load_json from benchmarks.benchmark_conf_generator import set_mlflow_tracking_uri -from morpheus._lib.common import FileTypes -from morpheus._lib.common import FilterSource +from morpheus.common import FileTypes +from morpheus.common import FilterSource from morpheus.config import Config from morpheus.pipeline.linear_pipeline import LinearPipeline from morpheus.pipeline.pipeline import Pipeline @@ -77,7 +77,7 @@ def remove_cache(cache_dir: str): def dfp_modules_pipeline(pipe_config: Config, - modules_conf: typing.Dict[str, any], + modules_conf: typing.Dict[str, typing.Any], filenames: typing.List[str], reuse_cache=False): @@ -99,7 +99,7 @@ def dfp_modules_pipeline(pipe_config: Config, def dfp_training_pipeline_stages(pipe_config: Config, - stages_conf: typing.Dict[str, any], + stages_conf: typing.Dict[str, typing.Any], source_schema: DataFrameInputSchema, preprocess_schema: DataFrameInputSchema, filenames: typing.List[str], @@ -150,7 +150,7 @@ def dfp_training_pipeline_stages(pipe_config: Config, def dfp_inference_pipeline_stages(pipe_config: Config, - stages_conf: typing.Dict[str, any], + stages_conf: typing.Dict[str, typing.Any], source_schema: DataFrameInputSchema, preprocess_schema: DataFrameInputSchema, filenames: typing.List[str], diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py b/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py index 8002c1ec70..76b2a6db1b 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py @@ -53,7 +53,7 @@ def process_events(message: MultiAEMessage): # df = message.get_meta() # df['event_time'] = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') # df.replace(np.nan, 'NaN', regex=True, inplace=True) - # TODO figure out why we are not able to set meta for a whole dataframe, but works for single column. + # TODO(Devin): figure out why we are not able to set meta for a whole dataframe, but works for single column. # message.set_meta(None, df) message.set_meta("event_time", datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')) diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py index f48417ceb1..271acc4833 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py @@ -28,7 +28,7 @@ from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stream_pair import StreamPair -logger = logging.getLogger("morpheus.{}".format(__name__)) +logger = logging.getLogger(f"morpheus.{__name__}") TimestampFileObj = namedtuple("TimestampFileObj", ["timestamp", "file_object"]) diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py index 9dacbde662..2bab6dadcd 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py @@ -30,7 +30,7 @@ from ..utils.model_cache import ModelCache from ..utils.model_cache import ModelManager -logger = logging.getLogger("morpheus.{}".format(__name__)) +logger = logging.getLogger(f"morpheus.{__name__}") class DFPInferenceStage(SinglePortStage): @@ -93,12 +93,12 @@ def on_data(self, message: MultiDFPMessage) -> MultiDFPMessage: model_cache = self.get_model(user_id) if (model_cache is None): - raise RuntimeError("Could not find model for user {}".format(user_id)) + raise RuntimeError(f"Could not find model for user {user_id}") loaded_model = model_cache.load_model(self._client) - except Exception: # TODO - logger.exception("Error trying to get model") + except Exception: + logger.exception("Error trying to get model", exc_info=True) return None post_model_time = time.time() diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_mlflow_model_writer.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_mlflow_model_writer.py index 8daf3f167b..240a329065 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_mlflow_model_writer.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_mlflow_model_writer.py @@ -45,7 +45,7 @@ # Setup conda environment conda_env = { 'channels': ['defaults', 'conda-forge'], - 'dependencies': ['python={}'.format('3.10'), 'pip'], + 'dependencies': ['python=3.10', 'pip'], 'pip': ['mlflow'], 'name': 'mlflow-env' } @@ -131,7 +131,8 @@ def _apply_model_permissions(self, reg_model_name: str): get_registered_model_response = requests.get(url=get_registered_model_url, headers=headers, - params={"name": reg_model_name}) + params={"name": reg_model_name}, + timeout=10) registered_model_response = get_registered_model_response.json() @@ -150,7 +151,8 @@ def _apply_model_permissions(self, reg_model_name: str): requests.patch(url=patch_registered_model_permissions_url, headers=headers, - json=patch_registered_model_permissions_body) + json=patch_registered_model_permissions_body, + timeout=10) except Exception: logger.exception("Error occurred trying to apply model permissions to model: %s", @@ -194,14 +196,14 @@ def on_data(self, message: MultiAEMessage): metrics_dict: typing.Dict[str, float] = {} # Add info on the embeddings - for k, v in model.categorical_fts.items(): - embedding = v.get("embedding", None) + for key, value in model.categorical_fts.items(): + embedding = value.get("embedding", None) if (embedding is None): continue - metrics_dict[f"embedding-{k}-num_embeddings"] = embedding.num_embeddings - metrics_dict[f"embedding-{k}-embedding_dim"] = embedding.embedding_dim + metrics_dict[f"embedding-{key}-num_embeddings"] = embedding.num_embeddings + metrics_dict[f"embedding-{key}-embedding_dim"] = embedding.embedding_dim mlflow.log_metrics(metrics_dict) @@ -252,12 +254,12 @@ def on_data(self, message: MultiAEMessage): } # Now create the model version - mv = client.create_model_version(name=reg_model_name, - source=model_src, - run_id=run.info.run_id, - tags=tags) + model_version = client.create_model_version(name=reg_model_name, + source=model_src, + run_id=run.info.run_id, + tags=tags) - logger.debug("ML Flow model upload complete: %s:%s:%s", user, reg_model_name, mv.version) + logger.debug("ML Flow model upload complete: %s:%s:%s", user, reg_model_name, model_version.version) except Exception: logger.exception("Error uploading model to ML Flow", exc_info=True) diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_postprocessing_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_postprocessing_stage.py index 7d9002ba22..a6d17e2cf5 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_postprocessing_stage.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_postprocessing_stage.py @@ -28,7 +28,7 @@ from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stream_pair import StreamPair -logger = logging.getLogger("morpheus.{}".format(__name__)) +logger = logging.getLogger(f"morpheus.{__name__}") class DFPPostprocessingStage(SinglePortStage): diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py index 089ce07acb..1e31fd305a 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py @@ -31,7 +31,7 @@ from ..utils.cached_user_window import CachedUserWindow from ..utils.logging_timer import log_time -logger = logging.getLogger("morpheus.{}".format(__name__)) +logger = logging.getLogger(f"morpheus.{__name__}") class DFPRollingWindowStage(SinglePortStage): @@ -124,8 +124,8 @@ def _build_window(self, message: DFPMessageMeta) -> MultiDFPMessage: if (not user_cache.append_dataframe(incoming_df=incoming_df)): # Then our incoming dataframe wasnt even covered by the window. Generate warning - logger.warn(("Incoming data preceeded existing history. " - "Consider deleting the rolling window cache and restarting.")) + logger.warning(("Incoming data preceeded existing history. " + "Consider deleting the rolling window cache and restarting.")) return None # Exit early if we dont have enough data diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py index 06cdbc4074..e37688c8df 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py @@ -31,7 +31,7 @@ from ..messages.multi_dfp_message import DFPMessageMeta from ..utils.logging_timer import log_time -logger = logging.getLogger("morpheus.{}".format(__name__)) +logger = logging.getLogger(f"morpheus.{__name__}") class DFPSplitUsersStage(SinglePortStage): @@ -114,6 +114,7 @@ def extract_users(self, message: DataFrameType) -> typing.List[DFPMessageMeta]: if (self._include_individual): + # pylint: disable=unnecessary-comprehension split_dataframes.update({ username: user_df for username, diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py index 2269a71f32..78486fc29d 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py @@ -28,7 +28,7 @@ from ..messages.multi_dfp_message import MultiDFPMessage -logger = logging.getLogger("morpheus.{}".format(__name__)) +logger = logging.getLogger(f"morpheus.{__name__}") class DFPTraining(SinglePortStage): @@ -71,7 +71,7 @@ def __init__(self, c: Config, model_kwargs: dict = None, epochs=30, validation_s self._epochs = epochs - if (validation_size >= 0.0 and validation_size < 1.0): + if (0.0 <= validation_size < 1.0): self._validation_size = validation_size else: raise ValueError(f"validation_size={validation_size} should be a positive float in the (0, 1) range") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py b/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py index aca6e8a760..dffe16e33c 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py @@ -68,7 +68,7 @@ def __init__(self, self._model_name_formatter = f"DFP-{source}-" + "{user_id}" self._experiment_name_formatter = f"dfp/{source}/training/" + "{reg_model_name}" - def verify_init(func): + def verify_init(self, func): def wrapper(self, *args, **kwargs): if not self._initialized: @@ -134,7 +134,7 @@ def experiment_name_formatter(self): return self._experiment_name_formatter def _set_include_generic(self): - self._include_generic = self._train_users == "all" or self._train_users == "generic" + self._include_generic = self._train_users in ('all', 'generic') def _set_include_individual(self): self._include_individual = self._train_users != "generic" @@ -150,9 +150,9 @@ def _create_time_fields(self, duration) -> TimeFields: end_time = self._start_time + duration - tf = TimeFields(self._start_time, end_time) + time_fields = TimeFields(self._start_time, end_time) - return tf + return time_fields def _set_mlflow_tracking_uri(self): if self._tracking_uri is None: diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py b/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py index 3378d5a98d..2a0da79752 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py @@ -29,7 +29,7 @@ from .logging_timer import log_time -logger = logging.getLogger("morpheus.{}".format(__name__)) +logger = logging.getLogger(f"morpheus.{__name__}") @contextmanager @@ -92,7 +92,7 @@ def last_used(self): def last_checked(self): return self._last_checked - def load_model(self, client) -> AutoEncoder: + def load_model(self, _) -> AutoEncoder: now = datetime.now() @@ -164,13 +164,12 @@ def load_model_cache(self, client) -> ModelCache: model_cache = self._manager.load_model_cache(client=client, reg_model_name=self._reg_model_name) if (model_cache is None): - raise RuntimeError("Model was found but now no longer exists. Model: {}".format( - self._reg_model_name)) + raise RuntimeError(f"Model was found but now no longer exists. Model: {self._reg_model_name}") return model_cache - except TimeoutError: + except TimeoutError as e: logger.error("Deadlock detected while loading model cache. Please report this to the developers.") - raise RuntimeError("Deadlock detected while loading model cache") + raise RuntimeError("Deadlock detected while loading model cache") from e class ModelManager: @@ -230,9 +229,9 @@ def _model_exists(self, reg_model_name: str) -> bool: # Save the update time self._existing_models_updated = now - except TimeoutError: + except TimeoutError as e: logger.error("Deadlock detected checking for new models. Please report this to the developers.") - raise RuntimeError("Deadlock detected checking for new models") + raise RuntimeError("Deadlock detected checking for new models") from e except Exception: logger.exception("Exception occurred when querying the list of available models", exc_info=True) raise @@ -242,7 +241,10 @@ def _model_exists(self, reg_model_name: str) -> bool: def user_id_to_model(self, user_id: str): return user_to_model_name(user_id=user_id, model_name_formatter=self._model_name_formatter) - def load_user_model(self, client, user_id: str, fallback_user_ids: typing.List[str] = []) -> ModelCache: + def load_user_model(self, client, user_id: str, fallback_user_ids: typing.List[str] = None) -> ModelCache: + + if (fallback_user_ids is None): + fallback_user_ids = [] # First get the UserModel user_model_cache = self.load_user_model_cache(user_id=user_id, fallback_user_ids=fallback_user_ids) @@ -311,17 +313,20 @@ def load_model_cache(self, client: MlflowClient, reg_model_name: str) -> ModelCa # Check if we need to push out a cache entry if (len(self._model_cache) > self._model_cache_size_max): - time_sorted = sorted([(k, v) for k, v in self._model_cache.items()], key=lambda x: x[1].last_used) + time_sorted = sorted(list(self._model_cache.items()), key=lambda x: x[1].last_used) to_delete = time_sorted[0][0] self._model_cache.pop(to_delete) return model_cache - except TimeoutError: + except TimeoutError as e: logger.error("Deadlock when trying to acquire model cache lock") - raise RuntimeError("Deadlock when trying to acquire model cache lock") + raise RuntimeError("Deadlock when trying to acquire model cache lock") from e + + def load_user_model_cache(self, user_id: str, fallback_user_ids: typing.List[str] = None) -> UserModelMap: + if (fallback_user_ids is None): + fallback_user_ids = [] - def load_user_model_cache(self, user_id: str, fallback_user_ids: typing.List[str] = []) -> UserModelMap: try: with timed_acquire(self._user_model_cache_lock, timeout=1.0): @@ -331,6 +336,6 @@ def load_user_model_cache(self, user_id: str, fallback_user_ids: typing.List[str fallback_user_ids=fallback_user_ids) return self._user_model_cache[user_id] - except TimeoutError: - logger.error("Deadlock when trying to acquire user model cache lock") - raise RuntimeError("Deadlock when trying to acquire user model cache lock") + except TimeoutError as e: + logger.error("Deadlock when trying to acquire user model cache lock", exc_info=True) + raise RuntimeError("Deadlock when trying to acquire user model cache lock") from e diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/user_model_manager.py b/examples/digital_fingerprinting/production/morpheus/dfp/utils/user_model_manager.py index 9f09971c38..f13340aeaf 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/user_model_manager.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/utils/user_model_manager.py @@ -22,7 +22,7 @@ from morpheus.models.dfencoder import AutoEncoder from morpheus.utils.seed import manual_seed -logger = logging.getLogger("morpheus.{}".format(__name__)) +logger = logging.getLogger(f"morpheus.{__name__}") class DFPDataLoader: @@ -92,26 +92,29 @@ class InsufficientDataError(RuntimeError): pass -class UserModelManager(object): +class UserModelManager: def __init__(self, - c: Config, + config: Config, user_id: str, save_model: bool, epochs: int, min_history: int, max_history: int, seed: int = None, - batch_files: typing.List = [], + batch_files: typing.List = None, model_class=AutoEncoder) -> None: super().__init__() + if (batch_files is None): + batch_files = [] + self._user_id = user_id self._history: pd.DataFrame = None self._min_history: int = min_history self._max_history: int = max_history self._seed: int = seed - self._feature_columns = c.ae.feature_columns + self._feature_columns = config.ae.feature_columns self._epochs = epochs self._save_model = save_model self._model_class = model_class @@ -178,7 +181,7 @@ def train_from_batch(self, filter_func=lambda df: df): return model, loader.get_sample_frame() except InsufficientDataError: - logger.debug(f"Training AE model for user: '{self._user_id}... Skipped") + logger.debug("Training AE model for user: '%s... Skipped", self._user_id) return None, None except Exception: logger.exception("Error during training for user: %s", self._user_id, exc_info=True) diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py index 46ee152474..8384a0ebaf 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py @@ -19,15 +19,15 @@ import click # When segment modules are imported, they're added to the module registry. # To avoid flake8 warnings about unused code, the noqa flag is used during import -import dfp.modules # noqa: F401 +import dfp.modules # noqa: F401 # pylint:disable=unused-import from dfp.utils.config_generator import ConfigGenerator from dfp.utils.config_generator import generate_ae_config from dfp.utils.dfp_arg_parser import DFPArgParser from dfp.utils.schema_utils import Schema from dfp.utils.schema_utils import SchemaBuilder -import morpheus.loaders # noqa: F401 -import morpheus.modules # noqa: F401 +import morpheus.loaders # noqa: F401 # pylint:disable=unused-import +import morpheus.modules # noqa: F401 # pylint:disable=unused-import from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import parse_log_level from morpheus.config import Config diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py index d115c09e44..6374a61f5d 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py @@ -19,15 +19,15 @@ import click # When segment modules are imported, they're added to the module registry. # To avoid flake8 warnings about unused code, the noqa flag is used during import. -import dfp.modules # noqa: F401 +import dfp.modules # noqa: F401 # pylint:disable=unused-import from dfp.utils.config_generator import ConfigGenerator from dfp.utils.config_generator import generate_ae_config from dfp.utils.dfp_arg_parser import DFPArgParser from dfp.utils.schema_utils import Schema from dfp.utils.schema_utils import SchemaBuilder -import morpheus.loaders # noqa: F401 -import morpheus.modules # noqa: F401 +import morpheus.loaders # noqa: F401 # pylint:disable=unused-import +import morpheus.modules # noqa: F401 # pylint:disable=unused-import from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import parse_log_level from morpheus.config import Config diff --git a/examples/gnn_fraud_detection_pipeline/stages/graph_sage_stage.py b/examples/gnn_fraud_detection_pipeline/stages/graph_sage_stage.py index 0c2cc139e9..90c3ebd5df 100644 --- a/examples/gnn_fraud_detection_pipeline/stages/graph_sage_stage.py +++ b/examples/gnn_fraud_detection_pipeline/stages/graph_sage_stage.py @@ -58,13 +58,16 @@ def __init__(self, c: Config, model_hinsage_file: str, batch_size: int = 5, - sample_size: typing.List[int] = [2, 32], + sample_size: typing.List[int] = None, record_id: str = "index", target_node: str = "transaction"): super().__init__(c) + if (sample_size is None): + sample_size = [2, 32] + # Must import stellargraph before loading the model - import stellargraph.mapper # noqa + import stellargraph.mapper # noqa: F401 # pylint:disable=unused-import import tensorflow as tf self._keras_model = tf.keras.models.load_model(model_hinsage_file) diff --git a/examples/log_parsing/inference.py b/examples/log_parsing/inference.py index d9a7c6011c..a04d39e0e0 100644 --- a/examples/log_parsing/inference.py +++ b/examples/log_parsing/inference.py @@ -23,10 +23,10 @@ from mrc.core import operators as ops from scipy.special import softmax -from messages import MultiPostprocLogParsingMessage -from messages import MultiResponseLogParsingMessage -from messages import PostprocMemoryLogParsing -from messages import ResponseMemoryLogParsing +from messages import MultiPostprocLogParsingMessage # pylint: disable=no-name-in-module +from messages import MultiResponseLogParsingMessage # pylint: disable=no-name-in-module +from messages import PostprocMemoryLogParsing # pylint: disable=no-name-in-module +from messages import ResponseMemoryLogParsing # pylint: disable=no-name-in-module from morpheus.cli.register_stage import register_stage from morpheus.config import Config from morpheus.config import PipelineModes @@ -205,17 +205,17 @@ def on_next(x: MultiInferenceMessage): fut = mrc.Future() - def set_output_fut(resp: ResponseMemoryLogParsing, b, f: mrc.Future): + def set_output_fut(resp: ResponseMemoryLogParsing, inner_b, inner_f: mrc.Future): nonlocal outstanding_requests - m = self._convert_one_response(memory, b, resp) + inner_memory = self._convert_one_response(memory, inner_b, resp) - f.set_result(m) + inner_f.set_result(inner_memory) outstanding_requests -= 1 fut_list.append(fut) - worker.process(batch, partial(set_output_fut, b=batch, f=fut)) + worker.process(batch, partial(set_output_fut, inner_b=batch, inner_f=fut)) for f in fut_list: f.result() @@ -240,17 +240,17 @@ def set_output_fut(resp: ResponseMemoryLogParsing, b, f: mrc.Future): return stream, out_type @staticmethod - def _convert_one_response(memory: PostprocMemoryLogParsing, + def _convert_one_response(output: PostprocMemoryLogParsing, inf: MultiInferenceMessage, res: ResponseMemoryLogParsing): - memory.input_ids[inf.offset:inf.count + inf.offset, :] = inf.input_ids - memory.seq_ids[inf.offset:inf.count + inf.offset, :] = inf.seq_ids + output.input_ids[inf.offset:inf.count + inf.offset, :] = inf.input_ids + output.seq_ids[inf.offset:inf.count + inf.offset, :] = inf.seq_ids # Two scenarios: if (inf.mess_count == inf.count): - memory.confidences[inf.offset:inf.count + inf.offset, :] = res.confidences - memory.labels[inf.offset:inf.count + inf.offset, :] = res.labels + output.confidences[inf.offset:inf.count + inf.offset, :] = res.confidences + output.labels[inf.offset:inf.count + inf.offset, :] = res.labels else: assert inf.count == res.count @@ -258,10 +258,10 @@ def _convert_one_response(memory: PostprocMemoryLogParsing, # Out message has more reponses, so we have to do key based blending of probs for i, idx in enumerate(mess_ids): - memory.confidences[idx, :] = cp.maximum(memory.confidences[idx, :], res.confidences[i, :]) - memory.labels[idx, :] = cp.maximum(memory.labels[idx, :], res.labels[i, :]) + output.confidences[idx, :] = cp.maximum(output.confidences[idx, :], res.confidences[i, :]) + output.labels[idx, :] = cp.maximum(output.labels[idx, :], res.labels[i, :]) - return MultiPostprocLogParsingMessage.from_message(inf, memory=memory, offset=inf.offset, count=inf.mess_count) + return MultiPostprocLogParsingMessage.from_message(inf, memory=output, offset=inf.offset, count=inf.mess_count) def _get_inference_worker(self, inf_queue: ProducerConsumerQueue) -> InferenceWorker: diff --git a/examples/log_parsing/postprocessing.py b/examples/log_parsing/postprocessing.py index fe24eb370e..7965ceb384 100644 --- a/examples/log_parsing/postprocessing.py +++ b/examples/log_parsing/postprocessing.py @@ -22,8 +22,8 @@ import pandas as pd from mrc.core import operators as ops -from messages import MultiPostprocLogParsingMessage -from messages import MultiResponseLogParsingMessage +from messages import MultiPostprocLogParsingMessage # pylint: disable=no-name-in-module +from messages import MultiResponseLogParsingMessage # pylint: disable=no-name-in-module from morpheus.cli.register_stage import register_stage from morpheus.config import Config from morpheus.config import PipelineModes diff --git a/examples/ransomware_detection/stages/create_features.py b/examples/ransomware_detection/stages/create_features.py index bd74bdb83b..3ad09d6968 100644 --- a/examples/ransomware_detection/stages/create_features.py +++ b/examples/ransomware_detection/stages/create_features.py @@ -19,8 +19,8 @@ from dask.distributed import Client -from common.data_models import FeatureConfig -from common.feature_extractor import FeatureExtractor +from common.data_models import FeatureConfig # pylint: disable=no-name-in-module +from common.feature_extractor import FeatureExtractor # pylint: disable=no-name-in-module from morpheus.cli.register_stage import register_stage from morpheus.config import Config from morpheus.config import PipelineModes diff --git a/examples/ransomware_detection/stages/preprocessing.py b/examples/ransomware_detection/stages/preprocessing.py index 9f8abfe423..80f2df924e 100644 --- a/examples/ransomware_detection/stages/preprocessing.py +++ b/examples/ransomware_detection/stages/preprocessing.py @@ -18,7 +18,7 @@ import mrc import pandas as pd -from common.data_models import SnapshotData +from common.data_models import SnapshotData # pylint: disable=no-name-in-module from morpheus.cli.register_stage import register_stage from morpheus.common import TypeId from morpheus.config import Config diff --git a/models/training-tuning-scripts/fraud-detection-models/training.py b/models/training-tuning-scripts/fraud-detection-models/training.py index 80c7144a06..67873cc39e 100644 --- a/models/training-tuning-scripts/fraud-detection-models/training.py +++ b/models/training-tuning-scripts/fraud-detection-models/training.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""" +""" # EXample usage: python training.py --training-data ../../datasets/training-data/fraud-detection-training-data.csv \ --validation-data ../../datasets/validation-data/fraud-detection-validation-data.csv \ @@ -95,7 +95,7 @@ def train_model(train_graph, node_identifiers, label): # Global parameters: batch_size = 5 xgb_n_estimator = 100 - num_samples = [2, 32] + n_samples = [2, 32] # The mapper feeds data from sampled subgraph to GraphSAGE model train_node_identifiers = node_identifiers[:round(0.8 * len(node_identifiers))] @@ -103,12 +103,12 @@ def train_model(train_graph, node_identifiers, label): validation_node_identifiers = node_identifiers[round(0.8 * len(node_identifiers)):] validation_labels = label.loc[validation_node_identifiers] - generator = HinSAGENodeGenerator(train_graph, batch_size, num_samples, head_node_type=embedding_node_type) + generator = HinSAGENodeGenerator(train_graph, batch_size, n_samples, head_node_type=EMBEDDING_NODE_TYPE) train_gen = generator.flow(train_node_identifiers, train_labels, shuffle=True) test_gen = generator.flow(validation_node_identifiers, validation_labels) # HinSAGE model - model = HinSAGE(layer_sizes=[embedding_size] * len(num_samples), generator=generator, dropout=0) + model = HinSAGE(layer_sizes=[embedding_size] * len(n_samples), generator=generator, dropout=0) x_inp, x_out = model.build() # Final estimator layer @@ -144,7 +144,7 @@ def save_model(model, output_xgboost, output_hinsage): model['xgb'].save_model(output_xgboost) -def inductive_step_hinsage(S, trained_model, inductive_node_identifiers, batch_size): +def inductive_step_hinsage(graph: StellarGraph, trained_model, inductive_node_identifiers: list, batch_size: int): """ This function generates embeddings for unseen nodes using a trained hinsage model. @@ -152,9 +152,9 @@ def inductive_step_hinsage(S, trained_model, inductive_node_identifiers, batch_s Parameters ---------- - S : StellarGraph Object + graph : StellarGraph Object The graph on which HinSAGE is deployed. - trained_model : Neural Network + trained_model: Model The trained hinsage model, containing the trained and optimized aggregation functions per depth. inductive_node_identifiers : list Defines the nodes that HinSAGE needs to generate embeddings for @@ -164,7 +164,7 @@ def inductive_step_hinsage(S, trained_model, inductive_node_identifiers, batch_s """ # The mapper feeds data from sampled subgraph to HinSAGE model - generator = HinSAGENodeGenerator(S, batch_size, num_samples, head_node_type="transaction") + generator = HinSAGENodeGenerator(graph, batch_size, num_samples, head_node_type="transaction") test_gen_not_shuffled = generator.flow(inductive_node_identifiers, shuffle=False) inductive_emb = np.concatenate([trained_model.predict(row[0], verbose=1) for row in test_gen_not_shuffled]) @@ -173,14 +173,14 @@ def inductive_step_hinsage(S, trained_model, inductive_node_identifiers, batch_s return inductive_emb -def model_eval(trained_model, S, node_identifier, label): +def model_eval(trained_model, graph, node_identifier, label): - inductive_emb = inductive_step_hinsage(S, trained_model['hinsage'], node_identifier, batch_size=5) + inductive_emb = inductive_step_hinsage(graph, trained_model['hinsage'], node_identifier, batch_size=5) predictions = trained_model['xgb'].predict_proba(inductive_emb) # evaluate performance. - eval = Evaluation(predictions, label, "GraphSAGE+features") - eval.f1_ap_rec() - print(f"AUC -- {eval.roc_curve()}") + eval_obj = Evaluation(predictions, label, "GraphSAGE+features") + eval_obj.f1_ap_rec() + print(f"AUC -- {eval_obj.roc_curve()}") def main(): @@ -192,9 +192,9 @@ def main(): # train_data, val_data, train_data_index, val_data_index = split_train_test(df, 0.7, 1.0,0.7) print("Graph construction") - S_graph = build_graph_features(train_data) + s_graph = build_graph_features(train_data) print("Model Training...") - model = train_model(S_graph, node_identifiers=list(train_data.index), label=train_data['fraud_label']) + model = train_model(s_graph, node_identifiers=list(train_data.index), label=train_data['fraud_label']) # print(model) print("Save trained model") if args.save_model: @@ -202,8 +202,8 @@ def main(): # Save graph info print("Model Evaluation...") inductive_data = pd.concat((train_data, val_data)) - S_graph = build_graph_features(inductive_data) - model_eval(model, S_graph, node_identifier=list(val_data.index), label=val_data['fraud_label']) + s_graph = build_graph_features(inductive_data) + model_eval(model, s_graph, node_identifier=list(val_data.index), label=val_data['fraud_label']) if __name__ == "__main__": @@ -223,7 +223,7 @@ def main(): # Global parameters: embedding_size = int(args.embedding_size) epochs = int(args.epochs) - embedding_node_type = str(args.node_type) + EMBEDDING_NODE_TYPE = str(args.node_type) num_samples = [2, 32] main() diff --git a/models/training-tuning-scripts/phishing-models/phish_bert_training_script.py b/models/training-tuning-scripts/phishing-models/phish_bert_training_script.py index 5c5d4af18c..c8dfb822d8 100644 --- a/models/training-tuning-scripts/phishing-models/phish_bert_training_script.py +++ b/models/training-tuning-scripts/phishing-models/phish_bert_training_script.py @@ -19,13 +19,13 @@ import os.path import zipfile -import cudf - import requests from sklearn.metrics import f1_score from sklearn.model_selection import train_test_split -from common.sequence_classifier import SequenceClassifier +import cudf + +from common.sequence_classifier import SequenceClassifier # pylint: disable=no-name-in-module def preprocessing(): diff --git a/models/training-tuning-scripts/root-cause-models/root_cause_bert.py b/models/training-tuning-scripts/root-cause-models/root_cause_bert.py index af793aab77..61b32892d0 100644 --- a/models/training-tuning-scripts/root-cause-models/root_cause_bert.py +++ b/models/training-tuning-scripts/root-cause-models/root_cause_bert.py @@ -21,12 +21,14 @@ import argparse import time -import cudf + import pandas as pd from sklearn.metrics import f1_score from sklearn.model_selection import train_test_split -from common.sequence_classifier import SequenceClassifier +import cudf + +from common.sequence_classifier import SequenceClassifier # pylint: disable=no-name-in-module from morpheus.utils.seed import manual_seed diff --git a/models/training-tuning-scripts/sid-models/sid-minibert-20230424-script.py b/models/training-tuning-scripts/sid-models/sid-minibert-20230424-script.py index e283bd40f0..f39ad07c6d 100644 --- a/models/training-tuning-scripts/sid-models/sid-minibert-20230424-script.py +++ b/models/training-tuning-scripts/sid-models/sid-minibert-20230424-script.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +# pylint: disable=invalid-name """ Example Usage: python sid-minibert-20230424-script.py \ @@ -21,11 +23,11 @@ """ import argparse + +import torch from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score from sklearn.metrics import multilabel_confusion_matrix - -import torch from torch.nn import BCEWithLogitsLoss from torch.utils.data import DataLoader from torch.utils.data import TensorDataset @@ -152,7 +154,7 @@ def train_model(model_dir, train_dataloader, idx2label): nb_tr_examples += b_input_ids.size(0) nb_tr_steps += 1 - print("Train loss: {}".format(tr_loss / nb_tr_steps)) + print(f"Train loss: {tr_loss / nb_tr_steps}") return model @@ -198,9 +200,9 @@ def model_eval(model, val_dataloader, idx2label): print('F1 Macro Validation Accuracy: ', val_f1_accuracy) print('Flat Validation Accuracy: ', val_flat_accuracy) - for label, cf in zip(list(idx2label.values()), multilabel_confusion_matrix(true_bools, pred_bools)): + for label, cfm in zip(list(idx2label.values()), multilabel_confusion_matrix(true_bools, pred_bools)): print(label) - print(cf) + print(cfm) def main(): diff --git a/morpheus.code-workspace b/morpheus.code-workspace index 08fff40f90..d4e71cc79c 100644 --- a/morpheus.code-workspace +++ b/morpheus.code-workspace @@ -594,6 +594,11 @@ "files.insertFinalNewline": true, "files.trimFinalNewlines": true, "files.trimTrailingWhitespace": true, + "pylint.args": [ + "--rcfile=${workspaceFolder}/pyproject.toml", + "--init-hook=import sys; sys.path.append(\"${workspaceFolder}\")" + ], + "pylint.importStrategy": "fromEnvironment", "python.analysis.extraPaths": [ "./examples/digital_fingerprinting/production/morpheus" ], @@ -602,11 +607,6 @@ "--style=${workspaceFolder}/setup.cfg" ], "python.linting.flake8Enabled": true, - "python.linting.pylintArgs": [ - "--rcfile=${workspaceFolder}/pyproject.toml", - "--init-hook=import sys; sys.path.append(\"${workspaceFolder}\")" - ], - "python.linting.pylintEnabled": true, "python.testing.pytestArgs": [ "-s", "tests" diff --git a/morpheus/cli/register_stage.py b/morpheus/cli/register_stage.py index 3e8302650a..f404c752c4 100644 --- a/morpheus/cli/register_stage.py +++ b/morpheus/cli/register_stage.py @@ -68,7 +68,7 @@ def get_param_type(numpydoc_obj: numpydoc.docscrape.NumpyDocString, name: str): return found_doc.type -def parse_type_value(value_str: str) -> typing.Any: +def parse_type_value(value_str: str) -> typing.Any: # pylint: disable=too-many-return-statements value_lower = value_str.lower() @@ -119,7 +119,7 @@ def parse_doc_type_str(doc_type_str: str) -> dict: # Single type out_dict[""] = equal_split[0].strip() else: - raise RuntimeError("Invalid docstring: {}".format(doc_type_str)) + raise RuntimeError(f"Invalid docstring: {doc_type_str}") return out_dict @@ -161,7 +161,7 @@ def has_matching_kwargs(function, input_dict: dict) -> bool: return len([True for input_name in list(input_dict.keys()) if input_name in fn_sig.parameters]) > 0 -def _convert_enum_default(options_kwargs: dict, annotation, use_value: bool = False): +def _convert_enum_default(options_kwargs: dict, annotation): """ Display the default value of an enum argument as a string not an enum instance """ @@ -190,7 +190,7 @@ def set_options_param_type(options_kwargs: dict, annotation, doc_type: str): case_sensitive = doc_type_kwargs.get('case_sensitive', True) options_kwargs["type"] = partial_pop_kwargs(click.Choice, doc_type_kwargs)(get_enum_keys(annotation)) - _convert_enum_default(options_kwargs, annotation, use_value=True) + _convert_enum_default(options_kwargs, annotation) options_kwargs["callback"] = functools.partial(parse_enum, enum_class=annotation, case_sensitive=case_sensitive) elif (issubtype(annotation, int) and not issubtype(annotation, bool)): @@ -214,7 +214,10 @@ def set_options_param_type(options_kwargs: dict, annotation, doc_type: str): options_kwargs.update(doc_type_kwargs) -def compute_option_name(stage_arg_name: str, rename_options: typing.Dict[str, str] = dict()) -> tuple: +def compute_option_name(stage_arg_name: str, rename_options: typing.Dict[str, str] = None) -> tuple: + + if (rename_options is None): + rename_options = {} rename_val = rename_options.get(stage_arg_name, f"--{stage_arg_name}") @@ -223,10 +226,10 @@ def compute_option_name(stage_arg_name: str, rename_options: typing.Dict[str, st elif (not issubtype(type(rename_val), tuple)): rename_val = tuple(rename_val) - for n in rename_val: - if (not n.startswith("-")): - raise RuntimeError("Rename value '{}' for option '{}', must start with '-'. i.e. '--my_new_option".format( - n, stage_arg_name)) + for name in rename_val: + if (not name.startswith("-")): + raise RuntimeError( + f"Rename value '{name}' for option '{stage_arg_name}', must start with '-'. i.e. '--my_new_option") # Create the click option name as a ("stage_arg_name", "--rename1", "--rename2", "-r") return (stage_arg_name, ) + rename_val @@ -234,13 +237,25 @@ def compute_option_name(stage_arg_name: str, rename_options: typing.Dict[str, st def register_stage(command_name: str = None, modes: typing.Sequence[PipelineModes] = None, - ignore_args: typing.List[str] = list(), - command_args: dict = dict(), - option_args: typing.Dict[str, dict] = dict(), - rename_options: typing.Dict[str, str] = dict()): + ignore_args: typing.List[str] = None, + command_args: dict = None, + option_args: typing.Dict[str, dict] = None, + rename_options: typing.Dict[str, str] = None): + + if (ignore_args is None): + ignore_args = [] + + if (command_args is None): + command_args = {} + + if (option_args is None): + option_args = {} + + if (rename_options is None): + rename_options = {} if (modes is None): - modes = [x for x in PipelineModes] + modes = list(PipelineModes) def register_stage_inner(stage_class: _DecoratorType) -> _DecoratorType: @@ -274,13 +289,16 @@ def build_command(): if (p_value.annotation == Config): config_param_name = p_name continue - elif (p_name in ignore_args): + + if (p_name in ignore_args): assert p_value.default != inspect.Parameter.empty, ( "Cannot ignore argument without default value") continue - elif (p_value.kind == inspect.Parameter.VAR_POSITIONAL): + + if (p_value.kind == inspect.Parameter.VAR_POSITIONAL): continue - elif (p_value.kind == inspect.Parameter.VAR_KEYWORD): + + if (p_value.kind == inspect.Parameter.VAR_KEYWORD): continue option_kwargs = {} @@ -322,7 +340,7 @@ def command_callback(ctx: click.Context, **kwargs): from morpheus.pipeline.source_stage import SourceStage config = get_config_from_ctx(ctx) - p = get_pipeline_from_ctx(ctx) + pipeline = get_pipeline_from_ctx(ctx) # Set the config to the correct parameter kwargs[config_param_name] = config @@ -330,9 +348,9 @@ def command_callback(ctx: click.Context, **kwargs): stage = stage_class(**kwargs) if (issubclass(stage_class, SourceStage)): - p.set_source(stage) + pipeline.set_source(stage) else: - p.add_stage(stage) + pipeline.add_stage(stage) return stage @@ -363,8 +381,8 @@ def command_callback(ctx: click.Context, **kwargs): existing_registrations: typing.Set[PipelineModes] = set() # Get any already registered nodes - for m in modes: - registered_stage = GlobalStageRegistry.get().get_stage_info(command_name, m) + for mode in modes: + registered_stage = GlobalStageRegistry.get().get_stage_info(command_name, mode) if (registered_stage is not None): @@ -372,14 +390,14 @@ def command_callback(ctx: click.Context, **kwargs): if (isinstance(registered_stage, LazyStageInfo)): # Only check the qualified name if (registered_stage.qualified_name != get_full_qualname(stage_class)): - raise RuntimeError("Qualified name {} != {}".format(registered_stage.qualified_name, - get_full_qualname(stage_class))) + raise RuntimeError( + f"Qualified name {registered_stage.qualified_name} != {get_full_qualname(stage_class)}") elif (registered_stage != stage_info): - raise RuntimeError( - ("Registering stage '{}' failed. Stage is already registered with different options. " - "Ensure `register_stage` is only executed once for each mode and name combination. " - "If registered multiple times (i.e. on module reload), the registration must be identical" - ).format(command_name)) + raise RuntimeError(( + f"Registering stage '{command_name}' failed. Stage is already registered with different " + "options. Ensure `register_stage` is only executed once for each mode and name combination." + " If registered multiple times (i.e. on module reload), the registration must be identical" + )) existing_registrations.update(registered_stage.modes) diff --git a/morpheus/io/deserializers.py b/morpheus/io/deserializers.py index 682c1c2153..8eb5c666ea 100644 --- a/morpheus/io/deserializers.py +++ b/morpheus/io/deserializers.py @@ -73,13 +73,13 @@ def read_file_to_df(file_name: typing.Union[str, io.IOBase], # The opener objects are subclasses of io.IOBase, which avoids introducing fsspec to this part of the API if (isinstance(file_name, io.IOBase)): if (hasattr(file_name, 'path')): # This attr is not in the base - fp = file_name.path + filepath = file_name.path else: raise ValueError("Unable to determine file type from instance of io.IOBase," " set `file_type` to a value other than Auto") else: - fp = file_name - mode = determine_file_type(fp) + filepath = file_name + mode = determine_file_type(filepath) # Special args for JSON kwargs = {} @@ -107,7 +107,7 @@ def read_file_to_df(file_name: typing.Union[str, io.IOBase], df = df_class.read_parquet(file_name, **kwargs) else: - assert False, "Unsupported file type mode: {}".format(mode) + assert False, f"Unsupported file type mode: {mode}" assert df is not None diff --git a/morpheus/messages/memory/tensor_memory.py b/morpheus/messages/memory/tensor_memory.py index 003fbc58fc..faec04c7fe 100644 --- a/morpheus/messages/memory/tensor_memory.py +++ b/morpheus/messages/memory/tensor_memory.py @@ -155,8 +155,8 @@ def _get_tensor_prop(self, name: str): """ try: return self._tensors[name] - except KeyError: - raise AttributeError + except KeyError as e: + raise AttributeError from e def set_tensor(self, name: str, tensor: cp.ndarray): """ diff --git a/morpheus/messages/multi_message.py b/morpheus/messages/multi_message.py index 55d40000a3..cf1c83dd8c 100644 --- a/morpheus/messages/multi_message.py +++ b/morpheus/messages/multi_message.py @@ -365,7 +365,7 @@ def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): return self.from_message(self, meta=MessageMeta(sliced_rows), mess_offset=0, mess_count=len(sliced_rows)) @classmethod - def from_message(cls: typing.Type[Self], + def from_message(cls: type[Self], message: "MultiMessage", *, meta: MessageMeta = None, diff --git a/morpheus/messages/multi_tensor_message.py b/morpheus/messages/multi_tensor_message.py index 731b20f86f..952ea45a2c 100644 --- a/morpheus/messages/multi_tensor_message.py +++ b/morpheus/messages/multi_tensor_message.py @@ -286,7 +286,7 @@ def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): offset=0, count=sliced_count) - def get_slice(self, start, stop): + def get_slice(self: Self, start, stop) -> Self: """ Perform a slice of the current message from `start`:`stop` (excluding `stop`) @@ -322,7 +322,7 @@ def get_slice(self, start, stop): return self.from_message(self, **kwargs) @classmethod - def from_message(cls: typing.Type[Self], + def from_message(cls: type[Self], message: "MultiTensorMessage", *, meta: MessageMeta = None, diff --git a/morpheus/models/dfencoder/__init__.py b/morpheus/models/dfencoder/__init__.py index f8dc4bd464..4b0768a71a 100644 --- a/morpheus/models/dfencoder/__init__.py +++ b/morpheus/models/dfencoder/__init__.py @@ -50,7 +50,7 @@ # This package uses torch. We need to guarantee that cudf is loaded first so do that here # isort: off -import cudf # noqa: F401 +import cudf # noqa: F401 # pylint:disable=unused-import # isort: on from .ae_module import AEModule diff --git a/morpheus/modules/filter_detections.py b/morpheus/modules/filter_detections.py index daf07760ab..e19e54e5d6 100644 --- a/morpheus/modules/filter_detections.py +++ b/morpheus/modules/filter_detections.py @@ -183,7 +183,7 @@ def filter_slice(multi_message: MultiMessage) -> typing.List[MultiMessage]: elif filter_source == "DATAFRAME": filter_source = FilterSource.DATAFRAME else: - raise Exception(f"Unknown filter source: {filter_source}") + raise RuntimeError(f"Unknown filter source: {filter_source}") if copy: node = builder.make_node(FILTER_DETECTIONS, ops.map(filter_copy)) diff --git a/morpheus/parsers/splunk_notable_parser.py b/morpheus/parsers/splunk_notable_parser.py index b69d26bcdf..eaac27e62a 100644 --- a/morpheus/parsers/splunk_notable_parser.py +++ b/morpheus/parsers/splunk_notable_parser.py @@ -62,7 +62,7 @@ def _process_ip_fields(self, parsed_dataframe: cudf.DataFrame) -> cudf.DataFrame This function replaces src_ip column with src_ip2, if scr_ip is empty and does the same way for dest_ip column. """ for ip in ["src_ip", "dest_ip"]: - log.debug("******* Processing %s *******" % (ip)) + log.debug("******* Processing %s *******", ip) ip2 = ip + "2" ip_len = ip + "_len" # Calculate ip column value length. @@ -73,11 +73,11 @@ def _process_ip_fields(self, parsed_dataframe: cudf.DataFrame) -> cudf.DataFrame parsed_dataframe = parsed_dataframe[parsed_dataframe[ip_len] != 0] if not tmp_dataframe.empty: - log.debug("tmp_dataframe size %s" % (str(tmp_dataframe.shape))) + log.debug("tmp_dataframe size %s", str(tmp_dataframe.shape)) # Assign ip2 column values to empty ip column values. tmp_dataframe[ip] = tmp_dataframe[ip2] if not parsed_dataframe.empty: - log.debug("parsed_dataframe is not empty %s" % (str(parsed_dataframe.shape))) + log.debug("parsed_dataframe is not empty %s", str(parsed_dataframe.shape)) # Concat, if both parsed_dataframe and tmp_df are not empty. parsed_dataframe = cudf.concat([parsed_dataframe, tmp_dataframe]) else: diff --git a/morpheus/parsers/url_parser.py b/morpheus/parsers/url_parser.py index 18a23f5d93..04eea30e47 100644 --- a/morpheus/parsers/url_parser.py +++ b/morpheus/parsers/url_parser.py @@ -129,7 +129,7 @@ def _verify_req_cols(req_cols, allowed_output_cols): """ if req_cols is not None: if not req_cols.issubset(allowed_output_cols): - raise ValueError("Given req_cols must be subset of %s" % (allowed_output_cols)) + raise ValueError(f"Given req_cols must be subset of {allowed_output_cols}") else: req_cols = allowed_output_cols return req_cols diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index f8da2043b5..27ac6eba35 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -16,6 +16,7 @@ import logging import os import signal +import sys import time import typing from collections import OrderedDict @@ -56,7 +57,7 @@ class Pipeline(): """ - def __init__(self, c: Config): + def __init__(self, config: Config): self._source_count: int = None # Maximum number of iterations for progress reporting. None = Unknown/Unlimited self._id_counter = 0 @@ -71,13 +72,13 @@ def __init__(self, c: Config): self._segments: typing.Dict = defaultdict(lambda: {"nodes": set(), "ingress_ports": [], "egress_ports": []}) self._exec_options = mrc.Options() - self._exec_options.topology.user_cpuset = "0-{}".format(c.num_threads - 1) + self._exec_options.topology.user_cpuset = f"0-{config.num_threads - 1}" self._exec_options.engine_factories.default_engine_type = mrc.core.options.EngineType.Thread # Set the default channel size - mrc.Config.default_channel_size = c.edge_buffer_size + mrc.Config.default_channel_size = config.edge_buffer_size - self.batch_size = c.pipeline_batch_size + self.batch_size = config.pipeline_batch_size self._segment_graphs = defaultdict(lambda: networkx.DiGraph()) @@ -126,7 +127,7 @@ def add_stage(self, stage: StageT, segment_id: str = "main") -> StageT: segment_nodes.add(stage) self._sources.add(stage) else: - raise NotImplementedError("add_stage() failed. Unknown node type: {}".format(type(stage))) + raise NotImplementedError(f"add_stage() failed. Unknown node type: {type(stage)}") stage._pipeline = self @@ -237,7 +238,7 @@ def build(self): self._mrc_pipeline = mrc.Pipeline() def inner_build(builder: mrc.Builder, segment_id: str): - logger.info(f"====Building Segment: {segment_id}====") + logger.info("====Building Segment: %s====", segment_id) segment_graph = self._segment_graphs[segment_id] # Check if preallocated columns are requested, this needs to happen before the source stages are built @@ -256,7 +257,7 @@ def inner_build(builder: mrc.Builder, segment_id: str): if (stage.can_build()): stage.build(builder) - if (not all([x.is_built for x in segment_graph.nodes()])): + if (not all(x.is_built for x in segment_graph.nodes())): logger.warning("Cyclic pipeline graph detected! Building with reduced constraints") for stage in segment_graph.nodes(): @@ -275,9 +276,9 @@ def inner_build(builder: mrc.Builder, segment_id: str): logger.info("====Building Segment Complete!====") logger.info("====Building Pipeline====") - for segment_id in self._segments.keys(): - segment_ingress_ports = self._segments[segment_id]["ingress_ports"] - segment_egress_ports = self._segments[segment_id]["egress_ports"] + for segment_id, segment in self._segments.items(): + segment_ingress_ports = segment["ingress_ports"] + segment_egress_ports = segment["egress_ports"] segment_inner_build = partial(inner_build, segment_id=segment_id) self._mrc_pipeline.make_segment(segment_id, [port_info["port_pair"] for port_info in segment_ingress_ports], @@ -311,8 +312,8 @@ def stop(self): """ logger.info("====Stopping Pipeline====") - for s in list(self._sources) + list(self._stages): - s.stop() + for stage in list(self._sources) + list(self._stages): + stage.stop() self._mrc_executor.stop() @@ -330,21 +331,21 @@ async def join(self): raise finally: # Make sure these are always shut down even if there was an error - for s in list(self._sources): - s.stop() + for source in list(self._sources): + source.stop() # First wait for all sources to stop. This only occurs after all messages have been processed fully - for s in list(self._sources): - await s.join() + for source in list(self._sources): + await source.join() # Now that there is no more data, call stop on all stages to ensure shutdown (i.e., for stages that have # their own worker loop thread) - for s in list(self._stages): - s.stop() + for stage in list(self._stages): + stage.stop() # Now call join on all stages - for s in list(self._stages): - await s.join() + for stage in list(self._stages): + await stage.join() async def _build_and_start(self): @@ -362,8 +363,8 @@ async def _build_and_start(self): async def _async_start(self): # Loop over all stages and call on_start if it exists - for s in self._stages: - await s.start_async() + for stage in self._stages: + await stage.start_async() def _on_start(self): @@ -374,11 +375,11 @@ def _on_start(self): # Stop from running this twice self._is_started = True - logger.debug("Starting! Time: {}".format(time.time())) + logger.debug("Starting! Time: %s", time.time()) # Loop over all stages and call on_start if it exists - for s in self._stages: - s.on_start() + for stage in self._stages: + stage.on_start() def visualize(self, filename: str = None, **graph_kwargs): """ @@ -414,11 +415,11 @@ def visualize(self, filename: str = None, **graph_kwargs): start_def_port = ":e" if is_lr else ":s" end_def_port = ":w" if is_lr else ":n" - def has_ports(n: StreamWrapper, is_input): + def has_ports(node: StreamWrapper, is_input): if (is_input): - return len(n.input_ports) > 0 - else: - return len(n.output_ports) > 0 + return len(node.input_ports) > 0 + + return len(node.output_ports) > 0 if not self._is_build_complete: raise RuntimeError("Pipeline.visualize() requires that the Pipeline has been started before generating " @@ -427,31 +428,32 @@ def has_ports(n: StreamWrapper, is_input): "be fixed in a future release.") # Now build up the nodes - for idx, segment_id in enumerate(self._segments): + for segment_id in self._segments: gv_subgraphs[segment_id] = graphviz.Digraph(f"cluster_{segment_id}") gv_subgraph = gv_subgraphs[segment_id] gv_subgraph.attr(label=segment_id) - for n, attrs in typing.cast(typing.Mapping[StreamWrapper, dict], - self._segment_graphs[segment_id].nodes).items(): + for name, attrs in typing.cast(typing.Mapping[StreamWrapper, dict], + self._segment_graphs[segment_id].nodes).items(): node_attrs = attrs.copy() label = "" - show_in_ports = has_ports(n, is_input=True) - show_out_ports = has_ports(n, is_input=False) + show_in_ports = has_ports(name, is_input=True) + show_out_ports = has_ports(name, is_input=False) # Build the ports for the node. Only show ports if there are any # (Would like to have this not show for one port, but the lines get all messed up) if (show_in_ports): - in_port_label = " {{ {} }} | ".format(" | ".join( - [f" input_port: {x.port_number}" for x in n.input_ports])) + tmp_str = " | ".join([f" input_port: {x.port_number}" for x in name.input_ports]) + in_port_label = f" {{ {tmp_str} }} | " label += in_port_label - label += n.unique_name + label += name.unique_name if (show_out_ports): - out_port_label = " | {{ {} }}".format(" | ".join( - [f" output_port: {x.port_number}" for x in n.output_ports])) + tmp_str = " | ".join( + [f" output_port: {x.port_number}" for x in name.output_ports]) + out_port_label = f" | {{ {tmp_str} }}" label += out_port_label if (show_in_ports or show_out_ports): @@ -462,9 +464,9 @@ def has_ports(n: StreamWrapper, is_input): "shape": "record", "fillcolor": "white", }) - # TODO: Eventually allow nodes to have different attributes based on type + # TODO(MDD): Eventually allow nodes to have different attributes based on type # node_attrs.update(n.get_graphviz_attrs()) - gv_subgraph.node(n.unique_name, **node_attrs) + gv_subgraph.node(name.unique_name, **node_attrs) # Build up edges for segment_id in self._segments: @@ -522,7 +524,7 @@ def has_ports(n: StreamWrapper, is_input): style="dashed", label=f"Segment Port: {egress_port['port_pair'][0]}") - for key, gv_subgraph in gv_subgraphs.items(): + for gv_subgraph in gv_subgraphs.values(): gv_graph.subgraph(gv_subgraph) file_format = os.path.splitext(filename)[-1].replace(".", "") @@ -544,7 +546,7 @@ async def run_async(self): def error_handler(_, context: dict): - msg = "Unhandled exception in async loop! Exception: \n{}".format(context["message"]) + msg = f"Unhandled exception in async loop! Exception: \n{context['message']}" exception = context.get("exception", Exception()) logger.critical(msg, exc_info=exception) @@ -564,10 +566,10 @@ def term_signal(): self.stop() else: tqdm.write("Killing") - exit(1) + sys.exit(1) - for s in [signal.SIGINT, signal.SIGTERM]: - loop.add_signal_handler(s, term_signal) + for sig in [signal.SIGINT, signal.SIGTERM]: + loop.add_signal_handler(sig, term_signal) try: await self._build_and_start() diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index a8db6fb437..5a2577c829 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -58,7 +58,7 @@ def is_complete(self): """ A receiver is complete if all input senders are also complete. """ - return all([x.is_complete for x in self._input_senders]) + return all(x.is_complete for x in self._input_senders) @property def is_partial(self): @@ -67,7 +67,7 @@ def is_partial(self): there is a circular pipeline. """ # Its partially complete if any input sender is complete - return any([x.is_complete for x in self._input_senders]) + return any(x.is_complete for x in self._input_senders) @property def in_pair(self): @@ -114,7 +114,7 @@ def get_input_pair(self, builder: mrc.Builder) -> StreamPair: great_ancestor = greatest_ancestor(*[x.out_type for x in self._input_senders if x.is_complete]) if (great_ancestor is None): - # TODO: Add stage, port, and type info to message + # TODO(MDD): Add stage, port, and type info to message raise RuntimeError(("Cannot determine single type for senders of input port. " "Use a merge stage to handle different types of inputs.")) @@ -140,7 +140,7 @@ def link(self, builder: mrc.Builder): great_ancestor = greatest_ancestor(*[x.out_type for x in self._input_senders if x.is_complete]) if (not typing_utils.issubtype(great_ancestor, self._input_type)): - # TODO: Add stage, port, and type info to message + # TODO(MDD): Add stage, port, and type info to message raise RuntimeError( "Invalid linking phase. Input port type does not match predicted type determined during build phase") diff --git a/morpheus/pipeline/single_port_stage.py b/morpheus/pipeline/single_port_stage.py index 9b5845346f..e4b5611714 100644 --- a/morpheus/pipeline/single_port_stage.py +++ b/morpheus/pipeline/single_port_stage.py @@ -63,8 +63,8 @@ def _pre_build(self, builder: mrc.Builder) -> typing.List[StreamPair]: # Check the types of all inputs for x in in_ports_pairs: if (not typing_utils.issubtype(x[1], typing.Union[self.accepted_types()])): - raise RuntimeError("The {} stage cannot handle input of {}. Accepted input types: {}".format( - self.name, x[1], self.accepted_types())) + raise RuntimeError((f"The {self.name} stage cannot handle input of {x[1]}. " + f"Accepted input types: {self.accepted_types()}")) return in_ports_pairs @@ -83,7 +83,7 @@ def _build(self, builder: mrc.Builder, in_ports_streams: typing.List[StreamPair] return [self._build_single(builder, in_ports_streams[0])] - def _post_build_single(self, builder: mrc.Builder, out_pair: StreamPair) -> StreamPair: + def _post_build_single(self, _: mrc.Builder, out_pair: StreamPair) -> StreamPair: return out_pair @typing.final @@ -91,8 +91,10 @@ def _post_build(self, builder: mrc.Builder, out_ports_pair: typing.List[StreamPa ret_val = self._post_build_single(builder, out_ports_pair[0]) - logger.info("Added stage: {}\n └─ {} -> {}".format(str(self), - pretty_print_type_name(self.input_ports[0].in_type), - pretty_print_type_name(ret_val[1]))) + # pylint: disable=logging-format-interpolation + logger.info("Added stage: %s\n └─ %s -> %s", + str(self), + pretty_print_type_name(self.input_ports[0].in_type), + pretty_print_type_name(ret_val[1])) return [ret_val] diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/stream_wrapper.py index be02d72ec2..0367ec8c63 100644 --- a/morpheus/pipeline/stream_wrapper.py +++ b/morpheus/pipeline/stream_wrapper.py @@ -61,8 +61,6 @@ def inner(self: "StreamWrapper", *args, **kwargs): # Save values on self self._init_str = ", ".join(init_pairs) - return - return typing.cast(_DecoratorType, inner) @@ -80,9 +78,9 @@ class StreamWrapper(ABC, collections.abc.Hashable): __ID_COUNTER = AtomicInteger(0) - def __init__(self, c: Config): + def __init__(self, config: Config): # Save the config - self._config = c + self._config = config self._id = StreamWrapper.__ID_COUNTER.get_and_inc() self._pipeline: _pipeline.Pipeline = None @@ -299,13 +297,13 @@ def can_build(self, check_ports=False) -> bool: return False return True - else: - # Check if we can build based on the input ports. We can build - for r in self.input_ports: - if (not r.is_partial): - return False - return True + # Check if we can build based on the input ports. We can build + for receiver in self.input_ports: + if (not receiver.is_partial): + return False + + return True def build(self, builder: mrc.Builder, do_propagate=True): """Build this stage. @@ -380,7 +378,11 @@ def _build(self, builder: mrc.Builder, in_ports_streams: typing.List[StreamPair] """ pass - def _post_build(self, builder: mrc.Builder, out_ports_pair: typing.List[StreamPair]) -> typing.List[StreamPair]: + def _post_build( + self, + builder: mrc.Builder, # pylint: disable=unused-argument + out_ports_pair: typing.List[StreamPair], + ) -> typing.List[StreamPair]: return out_ports_pair def _start(self): diff --git a/morpheus/stages/general/trigger_stage.py b/morpheus/stages/general/trigger_stage.py index 752fe72238..6aaed55648 100644 --- a/morpheus/stages/general/trigger_stage.py +++ b/morpheus/stages/general/trigger_stage.py @@ -19,7 +19,6 @@ from mrc.core import operators as ops from morpheus.cli.register_stage import register_stage -from morpheus.config import Config from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stream_pair import StreamPair @@ -41,9 +40,6 @@ class TriggerStage(SinglePortStage): """ - def __init__(self, c: Config): - super().__init__(c) - @property def name(self) -> str: return "trigger" diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py index 5f92779877..b0bf74889a 100644 --- a/morpheus/stages/inference/inference_stage.py +++ b/morpheus/stages/inference/inference_stage.py @@ -105,7 +105,7 @@ def calc_output_dims(self, x: MultiInferenceMessage) -> typing.Tuple: pass @abstractmethod - def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[TensorMemory], None]): + def process(self, batch: MultiInferenceMessage, callback: typing.Callable[[TensorMemory], None]): """ Main inference processing function. This function will be called once for each mini-batch. Once the inference is complete, the `cb` parameter should be used to set the response value. The callback can be called @@ -115,7 +115,7 @@ def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[TensorMemor ---------- batch : `morpheus.pipeline.messages.MultiInferenceMessage` Mini-batch of inference messages. - cb : typing.Callable[[`morpheus.pipeline.messages.TensorMemory`], None] + callback : typing.Callable[[`morpheus.pipeline.messages.TensorMemory`], None] Callback to set the values for the inference response. """ @@ -236,17 +236,17 @@ def on_next(x: MultiInferenceMessage): completion_future = mrc.Future() - def set_output_fut(resp: TensorMemory, b, batch_future: mrc.Future): + def set_output_fut(resp: TensorMemory, inner_batch, batch_future: mrc.Future): nonlocal outstanding_requests - m = self._convert_one_response(output_message, b, resp) + mess = self._convert_one_response(output_message, inner_batch, resp) outstanding_requests -= 1 - batch_future.set_result(m) + batch_future.set_result(mess) fut_list.append(completion_future) - worker.process(batch, partial(set_output_fut, b=batch, batch_future=completion_future)) + worker.process(batch, partial(set_output_fut, inner_batch=batch, batch_future=completion_future)) for f in fut_list: f.result() @@ -270,17 +270,13 @@ def set_output_fut(resp: TensorMemory, b, batch_future: mrc.Future): return stream, out_type - def _start(self): - - return super()._start() - def stop(self): """ Stops the inference workers and closes the inference queue. """ - for w in self._workers: - w.stop() + for worker in self._workers: + worker.stop() # Now stop the _inf_queue to unblock workers self._inf_queue.close() @@ -294,8 +290,8 @@ async def join(self): self._inf_queue.join() # Join all workers - for w in self._workers: - await w.join() + for worker in self._workers: + await worker.join() return await super().join() diff --git a/morpheus/stages/input/autoencoder_source_stage.py b/morpheus/stages/input/autoencoder_source_stage.py index 72220c199f..d444169722 100644 --- a/morpheus/stages/input/autoencoder_source_stage.py +++ b/morpheus/stages/input/autoencoder_source_stage.py @@ -84,6 +84,7 @@ def __init__(self, SingleOutputSource.__init__(self, c) + self._input_glob = input_glob self._file_type = file_type self._feature_columns = c.ae.feature_columns @@ -110,7 +111,7 @@ def __init__(self, @property def input_count(self) -> int: """Return None for no max input count""" - return self._input_count + return self._input_count if self._input_count is not None else 0 def get_match_pattern(self, glob_split): """Return a file match pattern""" @@ -255,7 +256,7 @@ def files_to_dfs_per_user(x: typing.List[str], pass @staticmethod - def derive_features(df: pd.DataFrame, feature_columns: typing.List[str]): + def derive_features(df: pd.DataFrame, feature_columns: typing.List[str]): # pylint: disable=unused-argument """ If any features are available to be derived, can be implemented by overriding this function. @@ -305,24 +306,24 @@ def _build_user_metadata(self, x: typing.Dict[str, pd.DataFrame]): return user_metas - def _build_source(self, seg: mrc.Builder) -> StreamPair: + def _build_source(self, builder: mrc.Builder) -> StreamPair: # The first source just produces filenames - filename_source = self._watcher.build_node(self.unique_name, seg) + filename_source = self._watcher.build_node(self.unique_name, builder) out_type = typing.List[str] # Supposed to just return a source here return filename_source, out_type - def _post_build_single(self, seg: mrc.Builder, out_pair: StreamPair) -> StreamPair: + def _post_build_single(self, builder: mrc.Builder, out_pair: StreamPair) -> StreamPair: out_stream = out_pair[0] out_type = out_pair[1] # At this point, we have batches of filenames to process. Make a node for processing batches of # filenames into batches of dataframes - post_node = seg.make_node( + post_node = builder.make_node( self.unique_name + "-post", ops.map( partial( @@ -337,9 +338,9 @@ def _post_build_single(self, seg: mrc.Builder, out_pair: StreamPair) -> StreamPa ops.map(self._build_user_metadata), # Finally flatten to single meta ops.flatten()) - seg.make_edge(out_stream, post_node) + builder.make_edge(out_stream, post_node) out_stream = post_node out_type = UserMessageMeta - return super()._post_build_single(seg, (out_stream, out_type)) + return super()._post_build_single(builder, (out_stream, out_type)) diff --git a/morpheus/stages/input/control_message_file_source_stage.py b/morpheus/stages/input/control_message_file_source_stage.py index ee85ef0c21..ca798010f0 100644 --- a/morpheus/stages/input/control_message_file_source_stage.py +++ b/morpheus/stages/input/control_message_file_source_stage.py @@ -25,7 +25,7 @@ from morpheus.pipeline.single_output_source import SingleOutputSource from morpheus.pipeline.stream_pair import StreamPair -logger = logging.getLogger("morpheus.{}".format(__name__)) +logger = logging.getLogger(f"morpheus.{__name__}") class ControlMessageFileSourceStage(SingleOutputSource): diff --git a/morpheus/stages/input/kafka_source_stage.py b/morpheus/stages/input/kafka_source_stage.py index ea4c56dad9..d40b7f3493 100644 --- a/morpheus/stages/input/kafka_source_stage.py +++ b/morpheus/stages/input/kafka_source_stage.py @@ -80,9 +80,9 @@ class KafkaSourceStage(PreallocatorMixin, SingleOutputSource): """ def __init__(self, - c: Config, + config: Config, bootstrap_servers: str, - input_topic: typing.List[str] = ["test_pcap"], + input_topic: typing.List[str] = None, group_id: str = "morpheus", client_id: str = None, poll_interval: str = "10millis", @@ -91,7 +91,10 @@ def __init__(self, auto_offset_reset: AutoOffsetReset = AutoOffsetReset.LATEST, stop_after: int = 0, async_commits: bool = True): - super().__init__(c) + super().__init__(config) + + if (input_topic is None): + input_topic = ["test_pcap"] if isinstance(auto_offset_reset, AutoOffsetReset): auto_offset_reset = auto_offset_reset.value @@ -110,8 +113,8 @@ def __init__(self, # Remove duplicate topics if there are any. self._topics = list(set(input_topic)) - self._max_batch_size = c.pipeline_batch_size - self._max_concurrent = c.num_threads + self._max_batch_size = config.pipeline_batch_size + self._max_concurrent = config.num_threads self._disable_commit = disable_commit self._disable_pre_filtering = disable_pre_filtering self._stop_after = stop_after @@ -160,7 +163,7 @@ def _process_batch(self, consumer, batch): buffer.seek(0) df = cudf.io.read_json(buffer, engine='cudf', lines=True, orient='records') except Exception as e: - logger.error("Error parsing payload into a dataframe : {}".format(e)) + logger.error("Error parsing payload into a dataframe : %s", e) finally: if (not self._disable_commit): for msg in batch: diff --git a/morpheus/stages/output/compare_dataframe_stage.py b/morpheus/stages/output/compare_dataframe_stage.py index 0d1d4cbb0f..dbc3646d7e 100644 --- a/morpheus/stages/output/compare_dataframe_stage.py +++ b/morpheus/stages/output/compare_dataframe_stage.py @@ -26,7 +26,7 @@ from morpheus.config import Config from morpheus.io.deserializers import read_file_to_df from morpheus.stages.output.in_memory_sink_stage import InMemorySinkStage -from morpheus.utils import compare_df +from morpheus.utils import compare_df as compare_df_module from morpheus.utils import concat_df from morpheus.utils.type_aliases import DataFrameType @@ -121,13 +121,13 @@ def get_results(self, clear=True) -> dict: if self._reset_index: combined_df.reset_index(inplace=True) - results = compare_df.compare_df(self._compare_df, - combined_df, - include_columns=self._include_columns, - exclude_columns=self._exclude_columns, - replace_idx=self._index_col, - abs_tol=self._abs_tol, - rel_tol=self._rel_tol) + results = compare_df_module.compare_df(self._compare_df, + combined_df, + include_columns=self._include_columns, + exclude_columns=self._exclude_columns, + replace_idx=self._index_col, + abs_tol=self._abs_tol, + rel_tol=self._rel_tol) if clear: self.clear() diff --git a/morpheus/stages/output/write_to_kafka_stage.py b/morpheus/stages/output/write_to_kafka_stage.py index c7e5bb4472..e068a4588b 100644 --- a/morpheus/stages/output/write_to_kafka_stage.py +++ b/morpheus/stages/output/write_to_kafka_stage.py @@ -90,7 +90,7 @@ def node_fn(obs: mrc.Observable, sub: mrc.Subscriber): def on_next(x: MessageMeta): nonlocal outstanding_requests - def cb(_, msg): + def callback(_, msg): if msg is not None and msg.value() is not None: pass else: @@ -103,13 +103,13 @@ def cb(_, msg): sub.on_error(msg.error()) records = serializers.df_to_json(x.df, strip_newlines=True) - for m in records: + for mess in records: # Push all of the messages while True: try: # this runs asynchronously, in C-K's thread - producer.produce(self._output_topic, m, callback=cb) + producer.produce(self._output_topic, mess, callback=callback) break except BufferError: time.sleep(self._poll_time) @@ -117,7 +117,7 @@ def cb(_, msg): logger.exception(("Error occurred in `to-kafka` stage with broker '%s' " "while committing message:\n%s"), self._kafka_conf["bootstrap.servers"], - m) + mess) break finally: # Try and process some diff --git a/morpheus/stages/postprocess/filter_detections_stage.py b/morpheus/stages/postprocess/filter_detections_stage.py index 28660cc3f8..fb24c7f142 100644 --- a/morpheus/stages/postprocess/filter_detections_stage.py +++ b/morpheus/stages/postprocess/filter_detections_stage.py @@ -108,8 +108,8 @@ def accepted_types(self) -> typing.Tuple: """ if self._filter_source == FilterSource.TENSOR: return (MultiResponseMessage, ) - else: - return (MultiMessage, ) + + return (MultiMessage, ) def supports_cpp_node(self): # Enable support by default @@ -198,9 +198,10 @@ def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> Strea else: self._filter_source = FilterSource.DATAFRAME - logger.debug( - f"filter_source was set to Auto, inferring a filter source of {self._filter_source} based on an input " - f"message type of {message_type}") + logger.debug(("filter_source was set to Auto, inferring a filter source of %s based on an input " + "message type of %s"), + self._filter_source, + message_type) if self._build_cpp_node(): node = _stages.FilterDetectionsStage(builder, diff --git a/morpheus/stages/postprocess/ml_flow_drift_stage.py b/morpheus/stages/postprocess/ml_flow_drift_stage.py index 8ee0808b55..aa3bba666e 100644 --- a/morpheus/stages/postprocess/ml_flow_drift_stage.py +++ b/morpheus/stages/postprocess/ml_flow_drift_stage.py @@ -134,8 +134,8 @@ def _calc_drift(self, x: MultiResponseMessage): shifted = cp.abs(x.get_probs_tensor() - 0.5) + 0.5 # Make sure the labels list is long enough - for x in range(len(self._labels), shifted.shape[1]): - self._labels.append(str(x)) + for label in range(len(self._labels), shifted.shape[1]): + self._labels.append(str(label)) for i in list(range(0, x.count, self._batch_size)): start = i diff --git a/morpheus/stages/postprocess/serialize_stage.py b/morpheus/stages/postprocess/serialize_stage.py index 02487e2585..7c421c8bf0 100644 --- a/morpheus/stages/postprocess/serialize_stage.py +++ b/morpheus/stages/postprocess/serialize_stage.py @@ -42,7 +42,7 @@ class SerializeStage(SinglePortStage): Pipeline configuration instance. include : typing.List[str], default = [], show_default="All Columns", Attributes that are required send to downstream stage. - exclude : typing.List[str] + exclude : typing.List[str], default = [r'^ID$', r'^_ts_'] Attributes that are not required send to downstream stage. fixed_columns : bool When `True` `SerializeStage` will assume that the Dataframe in all messages contain the same columns as the @@ -50,11 +50,17 @@ class SerializeStage(SinglePortStage): """ def __init__(self, - c: Config, - include: typing.List[str] = [], - exclude: typing.List[str] = [r'^ID$', r'^_ts_'], + config: Config, + include: typing.List[str] = None, + exclude: typing.List[str] = None, fixed_columns: bool = True): - super().__init__(c) + super().__init__(config) + + if (include is None): + include = [] + + if (exclude is None): + exclude = [r'^ID$', r'^_ts_'] # Make copies of the arrays to prevent changes after the Regex is compiled self._include_columns = copy.copy(include) @@ -136,7 +142,7 @@ def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> Strea include_columns = None if (self._include_columns is not None and len(self._include_columns) > 0): - include_columns = re.compile("({})".format("|".join(self._include_columns))) + include_columns = re.compile(f"({'|'.join(self._include_columns)})") exclude_columns = [re.compile(x) for x in self._exclude_columns] diff --git a/morpheus/stages/postprocess/timeseries_stage.py b/morpheus/stages/postprocess/timeseries_stage.py index 5fad3eafca..d84a7730e2 100644 --- a/morpheus/stages/postprocess/timeseries_stage.py +++ b/morpheus/stages/postprocess/timeseries_stage.py @@ -54,21 +54,21 @@ def round_seconds(obj: pd.Timestamp) -> pd.Timestamp: return obj.round(freq="S") -def calc_bin(obj: pd.Timestamp, t0: pd.Timestamp, resolution_sec: float) -> int: +def calc_bin(obj: pd.Timestamp, time0: pd.Timestamp, resolution_sec: float) -> int: """ Calculates the bin spacing between the start and stop timestamp at a specified resolution. """ - return round((round_seconds(obj) - t0).total_seconds()) // resolution_sec + return round((round_seconds(obj) - time0).total_seconds()) // resolution_sec def zscore(data): """ Calculate z score of cupy.ndarray. """ - mu = cp.mean(data) + mean = cp.mean(data) std = cp.std(data) - return cp.abs(data - mu) / std + return cp.abs(data - mean) / std def to_periodogram(signal_cp: cp.ndarray): @@ -97,15 +97,15 @@ def to_periodogram(signal_cp: cp.ndarray): signal_cp_std = signal_cp - cp.mean(signal_cp) # take fourier transform of signal - FFT_data = cp.fft.fft(signal_cp_std) + fft_data = cp.fft.fft(signal_cp_std) # create periodogram - prdg = (1 / len(signal_cp)) * ((cp.absolute(FFT_data))**2) + prdg = (1 / len(signal_cp)) * ((cp.absolute(fft_data))**2) return prdg -def fftAD(signalvalues: cp.ndarray, p=90, zt=8, lowpass=None): +def fftAD(signalvalues: cp.ndarray, percentile=90, zthresh=8, lowpass=None): # pylint: disable=invalid-name """ Detect anomalies with fast fourier transform. @@ -113,9 +113,9 @@ def fftAD(signalvalues: cp.ndarray, p=90, zt=8, lowpass=None): ---------- signalvalues : cupy.ndarray Values of time signal (real valued). - p : int, optional + percentile : int, optional Filtering percentile for spectral density based filtering, by default 90. - zt : int, optional + zthresh : int, optional Z-score threshold, can be tuned for datasets and sensitivity, by default 8. lowpass : _type_, optional Filtering percentile for frequency based filtering, by default None. @@ -134,11 +134,11 @@ def fftAD(signalvalues: cp.ndarray, p=90, zt=8, lowpass=None): # lowpass: percentile to keep if lowpass: freqs = cp.arange(len(periodogram)) - bar = int(cp.percentile(freqs, lowpass)) - indices_mask[bar:] = True + freq_perct = int(cp.percentile(freqs, lowpass)) + indices_mask[freq_perct:] = True # p: percentile to delete else: - threshold = cp.percentile(periodogram, p).item() + threshold = cp.percentile(periodogram, percentile).item() indices_mask = (periodogram < threshold) @@ -148,9 +148,9 @@ def fftAD(signalvalues: cp.ndarray, p=90, zt=8, lowpass=None): err = (abs(recon - signalvalues)) - z = zscore(err) + z_score = zscore(err) - return cp.arange(len(signalvalues))[z >= zt] + return cp.arange(len(signalvalues))[z_score >= zthresh] @dataclasses.dataclass @@ -167,7 +167,7 @@ class _TimeSeriesAction: message: MultiResponseMessage = None -class _UserTimeSeries(object): +class _UserTimeSeries: """ Used internally by `TimeSeriesStage` to group data on a per-user basis. """ @@ -210,9 +210,10 @@ def __init__(self, self._t0_epoch: float = None - def _calc_bin_series(self, t: pd.Series) -> pd.Series: + def _calc_bin_series(self, timeseries: pd.Series) -> pd.Series: - return round((t.dt.round(freq="S") - self._t0_epoch).dt.total_seconds()).astype(int) // self._resolution_sec + return round( + (timeseries.dt.round(freq="S") - self._t0_epoch).dt.total_seconds()).astype(int) // self._resolution_sec def _calc_outliers(self, action: _TimeSeriesAction): @@ -226,7 +227,7 @@ def _calc_outliers(self, action: _TimeSeriesAction): # TODO(MDD): Take this out after testing assert cp.sum(signal_cp) == len(action.window), "All points in window are not accounted for in histogram" - is_anomaly = fftAD(signal_cp, p=self._filter_percent, zt=self._zscore_threshold) + is_anomaly = fftAD(signal_cp, percentile=self._filter_percent, zthresh=self._zscore_threshold) if (len(is_anomaly) > 0): @@ -243,6 +244,7 @@ def _calc_outliers(self, action: _TimeSeriesAction): idx = action.message.get_meta().index # Find anomalies that are in the active message + # pylint: disable=singleton-comparison paired_anomalies = anomalies[anomalies == True].index.intersection(idx) # noqa: E712 # Return the anomalies for priting. But only if the current message has anomalies that will get flagged @@ -303,13 +305,13 @@ def _determine_action(self, is_complete: bool) -> typing.Optional[_TimeSeriesAct if (not is_complete): # Not shutting down, so hold message return None - elif (is_complete and self._cold_end): + + if (is_complete and self._cold_end): # Shutting down and we have a cold ending, just empty the message return _TimeSeriesAction(send_message=True, message=self._pending_messages.popleft()) - else: - # Shutting down and hot end - # logger.debug("Hot End. Processing. TS: %s", timeseries_start._repr_base) - pass + + # Shutting down and hot end + # logger.debug("Hot End. Processing. TS: %s", timeseries_start._repr_base) # By this point we have both a front and back buffer. So get ready for a calculation # logger.debug("Perform Calc. TS: %s, WS: %s, MS: %s, ME: %s, WE: %s, TE: %s.", @@ -446,7 +448,7 @@ def __init__(self, self._hot_start = hot_start self._cold_end = cold_end - assert filter_percent >= 0.0 and filter_percent <= 100.0 + assert 0.0 <= filter_percent <= 100.0 self._filter_percent = filter_percent assert zscore_threshold >= 0.0 diff --git a/morpheus/stages/preprocess/train_ae_stage.py b/morpheus/stages/preprocess/train_ae_stage.py index c1346c6347..ce8ed408d0 100644 --- a/morpheus/stages/preprocess/train_ae_stage.py +++ b/morpheus/stages/preprocess/train_ae_stage.py @@ -36,10 +36,10 @@ logger = logging.getLogger(__name__) -class _UserModelManager(object): +class _UserModelManager: def __init__(self, - c: Config, + config: Config, user_id: str, save_model: bool, epochs: int, @@ -51,8 +51,8 @@ def __init__(self, self._history: pd.DataFrame = None self._max_history: int = max_history self._seed: int = seed - self._feature_columns = c.ae.feature_columns - self._feature_scaler = c.ae.feature_scaler + self._feature_columns = config.ae.feature_columns + self._feature_scaler = config.ae.feature_scaler self._epochs = epochs self._save_model = save_model diff --git a/morpheus/utils/module_utils.py b/morpheus/utils/module_utils.py index c7b56703d8..419be997e0 100644 --- a/morpheus/utils/module_utils.py +++ b/morpheus/utils/module_utils.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) -registry = mrc.ModuleRegistry +Registry = mrc.ModuleRegistry mrc_version = [int(i) for i in mrc.__version__.split('.')] @@ -56,7 +56,7 @@ def inner_func(config, **kwargs): if module_id is None or namespace is None: raise TypeError("TypeError: a string-like object is required for module_id and namespace, not 'NoneType'") - if not registry.contains(module_id, namespace): + if not Registry.contains(module_id, namespace): raise ValueError(f"Module '{module_id}' doesn't exist in the namespace '{namespace}'") return func(config, **kwargs) @@ -83,8 +83,8 @@ def register_module(module_id, namespace): def inner_func(func): # Register a module if not exists in the registry. - if not registry.contains(module_id, namespace): - registry.register_module(module_id, namespace, mrc_version, func) + if not Registry.contains(module_id, namespace): + Registry.register_module(module_id, namespace, mrc_version, func) logger.debug("Module '%s' was successfully registered with '%s' namespace.", module_id, namespace) else: logger.debug("Module: '%s' already exists in the given namespace '%s'", module_id, namespace) diff --git a/morpheus/utils/monitor_utils.py b/morpheus/utils/monitor_utils.py index 7bc473828a..e37567d692 100644 --- a/morpheus/utils/monitor_utils.py +++ b/morpheus/utils/monitor_utils.py @@ -69,9 +69,10 @@ def run(self): # Remove accidental long-lived strong reference del instance if instances != self.get_instances(): # pragma: nocover - logging.warn("Set changed size during iteration" + " (see https://github.com/tqdm/tqdm/issues/481)", - TqdmSynchronisationWarning, - stacklevel=2) + logging.warning("Set changed size during iteration" + + " (see https://github.com/tqdm/tqdm/issues/481)", + TqdmSynchronisationWarning, + stacklevel=2) # Remove accidental long-lived strong references del instances @@ -194,7 +195,7 @@ def __init__(self, self._determine_count_fn = determine_count_fn self._tqdm_class = tqdm_class if tqdm_class else MorpheusTqdm - if isinstance(log_level, LogLevels): + if isinstance(log_level, LogLevels): # pylint: disable=isinstance-second-argument-not-valid-type log_level = log_level.value self._log_level = log_level @@ -268,9 +269,9 @@ def progress_sink(self, x: typing.Union[cudf.DataFrame, MultiMessage, MessageMet return x # Do our best to determine the count - n = self._determine_count_fn(x) + count = self._determine_count_fn(x) - self._progress.update(n=n) + self._progress.update(n=count) return x @@ -290,6 +291,8 @@ def auto_count_fn(self, x: typing.Union[cudf.DataFrame, MultiMessage, MessageMet """ + # pylint: disable=too-many-return-statements + if (x is None): return None @@ -299,29 +302,35 @@ def auto_count_fn(self, x: typing.Union[cudf.DataFrame, MultiMessage, MessageMet if (isinstance(x, cudf.DataFrame)): return lambda y: len(y.index) - elif (isinstance(x, MultiMessage)): + + if (isinstance(x, MultiMessage)): return lambda y: y.mess_count - elif (isinstance(x, MessageMeta)): + + if (isinstance(x, MessageMeta)): return lambda y: y.count - elif isinstance(x, ControlMessage): + + if isinstance(x, ControlMessage): def check_df(y): df = y.payload().df if df is not None: return len(df) - else: - return 0 + + return 0 return check_df - elif (isinstance(x, list)): + + if (isinstance(x, list)): item_count_fn = self.auto_count_fn(x[0]) return lambda y: reduce(lambda sum, z, item_count_fn=item_count_fn: sum + item_count_fn(z), y, 0) - elif (isinstance(x, (str, fsspec.core.OpenFile))): + + if (isinstance(x, (str, fsspec.core.OpenFile))): return lambda y: 1 - elif (hasattr(x, "__len__")): + + if (hasattr(x, "__len__")): return len # Return len directly (same as `lambda y: len(y)`) - else: - raise NotImplementedError(f"Unsupported type: {type(x)}") + + raise NotImplementedError(f"Unsupported type: {type(x)}") def sink_on_completed(self): """ diff --git a/pyproject.toml b/pyproject.toml index d32aff6787..e0dbbe46b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ verbose = 2 # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code. -extension-pkg-allow-list = ["mrc", "morpheus._lib", "morpheus._lib.stages"] +extension-pkg-allow-list = ["mrc", "morpheus._lib", "morpheus._lib.messages", "morpheus._lib.stages"] # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may @@ -244,7 +244,26 @@ function-naming-style = "snake_case" # function-rgx = # Good variable names which should always be accepted, separated by a comma. -good-names = ["ae", "df", "e", "f", "fh", "i", "ip", "j", "k", "ex", "Run", "_", "x", "y", "X", "Y"] +good-names = [ + "_", + "ae", + "df", + "e", + "ex", + "f", + "fh", + "fn", + "i", + "ip", + "j", + "k", + "Run", + "ts", + "x", + "X", + "y", + "Y", +] # Good variable names regexes, separated by a comma. If names match any regex, # they will always be accepted @@ -604,7 +623,9 @@ contextmanager-decorators = ["contextlib.contextmanager"] # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E1101 when accessed. Python regular # expressions are accepted. -# generated-members = + +# Add pytorch members to the list of dynamically set members. Only until pylint 3.0 supports pyi files +generated-members = "torch.*" # Tells whether missing members accessed in mixin class should be ignored. A # class is considered mixin if its name matches the mixin-class-rgx option. diff --git a/tests/conftest.py b/tests/conftest.py index 93a9c159ff..12364b9c13 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,7 @@ import subprocess import sys import time +import types import typing import warnings from collections import namedtuple @@ -378,27 +379,104 @@ def restore_sys_path(): @pytest.fixture(scope="function") -def import_mod(request: pytest.FixtureRequest, restore_sys_path): +def import_mod(request: pytest.FixtureRequest, + restore_sys_path) -> typing.Generator[types.ModuleType | list[types.ModuleType], None, None]: + # pylint: disable=missing-param-doc + # pylint: disable=differing-param-doc + # pylint: disable=missing-type-doc + # pylint: disable=differing-type-doc + """ + Allows direct import of a module by specifying its path. This is useful for testing examples that import modules in + examples or other non-installed directories. + + Parameters + ---------- + modules : str | list[str] + The modules to import. Modules can be supplied as a list or multiple arguments. + sys_path : str | int, + When + + Yields + ------ + Iterator[typing.Generator[types.ModuleType | list[types.ModuleType], None, None]] + Imported modules. If more than one module is supplied, or the only argument is a list, the modules will be + returned as a list. + + Example + ------- + ``` + @pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'example/stage.py')) + def test_python_test(import_mod: types.ModuleType): + # Imported with sys.path.append(os.path.dirname(TEST_DIRS.examples_dir, 'example/stage.py')) + ... + + @pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'example/stage.py'), sys_path=-2) + def test_python_test(import_mod: types.ModuleType): + # Imported with sys.path.append(os.path.join(TEST_DIRS.examples_dir, 'example/stage.py', '../..')) + ... + + @pytest.mark.import_mod([os.path.join(TEST_DIRS.examples_dir, 'example/stage.py')], sys_path=TEST_DIRS.examples_dir) + def test_python_test(import_mod: list[types.ModuleType]): + # Imported with sys.path.append(TEST_DIRS.examples_dir) + ... + ``` + """ + marker = request.node.get_closest_marker("import_mod") if marker is not None: - mod_paths = marker.args[0] - if not isinstance(mod_paths, list): - mod_paths = [mod_paths] + mod_paths = sum([x if isinstance(x, list) else [x] for x in marker.args], []) + + mod_kwargs = marker.kwargs + + is_list = len(marker.args) > 1 or isinstance(marker.args[0], list) modules = [] module_names = [] + for mod_path in mod_paths: - mod_dir, mod_fname = os.path.split(mod_path) - mod_name, _ = os.path.splitext(mod_fname) + # Ensure everything is absolute to avoid issues with relative paths + mod_path = os.path.abspath(mod_path) + + # See if its a file or directory + is_file = os.path.isfile(mod_path) + + # Get the base directory that we should import from. If not specified, use the directory of the module + sys_path = mod_kwargs.get("sys_path", os.path.dirname(mod_path)) + + # If sys_path is an integer, use it to get the path relative to the module by number of directories. i.e. if + # sys_path=-1, then sys_path=os.path.dirname(mod_path). If sys_path=-2, then + # sys_path=os.path.dirname(os.path.dirname(mod_path)) + if (isinstance(sys_path, int)): + sys_path = os.path.join("/", *mod_path.split(os.path.sep)[:sys_path]) + + # Get the path relative to the sys_path, ignore the extension if its a file + mod_name = os.path.relpath(mod_path if not is_file else os.path.splitext(mod_path)[0], start=sys_path) - sys.path.append(mod_dir) - module_names.append(mod_name) - mod = importlib.import_module(mod_name) - assert mod.__file__ == mod_path + # Convert all / to . + mod_name = mod_name.replace(os.path.sep, ".") - modules.append(mod) + # Add to the sys path so this can be imported + sys.path.append(sys_path) - yield modules + try: + + # Import the module + mod = importlib.import_module(mod_name) + + if (is_file): + assert mod.__file__ == mod_path + + modules.append(mod) + module_names.append(mod_name) + except ImportError as e: + + raise ImportError(f"Failed to import module {mod_path} as {mod_name} from path {sys_path}") from e + + # Only yield 1 if we only imported 1 + if (is_list): + yield modules + else: + yield modules[0] # Un-import modules we previously imported, this allows for multiple examples to contain a `messages.py` for mod in module_names: @@ -547,7 +625,7 @@ def _camouflage_is_running(): logger = logging.getLogger(f"morpheus.{__name__}") root_dir = TEST_DIRS.mock_triton_servers_dir - startup_timeout = 5 + startup_timeout = 10 shutdown_timeout = 5 launch_camouflage = os.environ.get('MORPHEUS_NO_LAUNCH_CAMOUFLAGE') is None diff --git a/tests/dfencoder/test_autoencoder.py b/tests/dfencoder/test_autoencoder.py index e316558f19..43ac175138 100755 --- a/tests/dfencoder/test_autoencoder.py +++ b/tests/dfencoder/test_autoencoder.py @@ -30,6 +30,8 @@ from utils import TEST_DIRS from utils.dataset_manager import DatasetManager +# pylint: disable=redefined-outer-name + # Only pandas and Python is supported pytestmark = [pytest.mark.use_pandas, pytest.mark.use_python] @@ -83,13 +85,13 @@ def train_df(dataset_pandas: DatasetManager) -> typing.Iterator[pd.DataFrame]: def compare_numeric_features(features, expected_features): assert sorted(features.keys()) == sorted(expected_features.keys()) - for (ft, expected_vals) in expected_features.items(): - ae_vals = features[ft] + for (feature, expected_vals) in expected_features.items(): + ae_vals = features[feature] assert round(ae_vals['mean'], 2) == expected_vals['mean'], \ - f"Mean value of feature:{ft} does not match {round(ae_vals['mean'], 2)}!= {expected_vals['mean']}" + f"Mean value of feature:{feature} does not match {round(ae_vals['mean'], 2)}!= {expected_vals['mean']}" assert round(ae_vals['std'], 2) == expected_vals['std'], \ - f"Mean value of feature:{ft} does not match {round(ae_vals['std'], 2)}!= {expected_vals['std']}" + f"Mean value of feature:{feature} does not match {round(ae_vals['std'], 2)}!= {expected_vals['std']}" assert isinstance(ae_vals['scaler'], expected_vals['scaler_cls']) @@ -107,58 +109,58 @@ def test_ohe(): def test_compute_embedding_size(): - for (input, expected) in [(0, 0), (5, 4), (20, 9), (40000, 600)]: - assert ae_module._compute_embedding_size(input) == expected + for (inp, expected) in [(0, 0), (5, 4), (20, 9), (40000, 600)]: + assert ae_module._compute_embedding_size(inp) == expected def test_complete_layer_constructor(): - cc = ae_module.CompleteLayer(4, 5) - assert len(cc.layers) == 1 - assert isinstance(cc.layers[0], torch.nn.Linear) - assert cc.layers[0].in_features == 4 - assert cc.layers[0].out_features == 5 - - cc = ae_module.CompleteLayer(4, 5, activation='tanh') - assert len(cc.layers) == 2 - assert cc.layers[1] is torch.tanh - - cc = ae_module.CompleteLayer(4, 5, dropout=0.2) - assert len(cc.layers) == 2 - assert isinstance(cc.layers[1], torch.nn.Dropout) - assert cc.layers[1].p == 0.2 - - cc = ae_module.CompleteLayer(6, 11, activation='sigmoid', dropout=0.3) - assert len(cc.layers) == 3 - assert isinstance(cc.layers[0], torch.nn.Linear) - assert cc.layers[0].in_features == 6 - assert cc.layers[0].out_features == 11 - assert cc.layers[1] is torch.sigmoid - assert isinstance(cc.layers[2], torch.nn.Dropout) - assert cc.layers[2].p == 0.3 + layer = ae_module.CompleteLayer(4, 5) + assert len(layer.layers) == 1 + assert isinstance(layer.layers[0], torch.nn.Linear) + assert layer.layers[0].in_features == 4 + assert layer.layers[0].out_features == 5 + + layer = ae_module.CompleteLayer(4, 5, activation='tanh') + assert len(layer.layers) == 2 + assert layer.layers[1] is torch.tanh + + layer = ae_module.CompleteLayer(4, 5, dropout=0.2) + assert len(layer.layers) == 2 + assert isinstance(layer.layers[1], torch.nn.Dropout) + assert layer.layers[1].p == 0.2 + + layer = ae_module.CompleteLayer(6, 11, activation='sigmoid', dropout=0.3) + assert len(layer.layers) == 3 + assert isinstance(layer.layers[0], torch.nn.Linear) + assert layer.layers[0].in_features == 6 + assert layer.layers[0].out_features == 11 + assert layer.layers[1] is torch.sigmoid + assert isinstance(layer.layers[2], torch.nn.Dropout) + assert layer.layers[2].p == 0.3 def test_complete_layer_interpret_activation(): - cc = ae_module.CompleteLayer(4, 5) - assert cc.interpret_activation('elu') is torch.nn.functional.elu + layer = ae_module.CompleteLayer(4, 5) + assert layer.interpret_activation('elu') is torch.nn.functional.elu # Test for bad activation, this really does raise the base Exception class. with pytest.raises(Exception): - cc.interpret_activation() + layer.interpret_activation() with pytest.raises(Exception): - cc.interpret_activation("does_not_exist") + layer.interpret_activation("does_not_exist") - cc = ae_module.CompleteLayer(6, 11, activation='sigmoid') - cc.interpret_activation() is torch.sigmoid + layer = ae_module.CompleteLayer(6, 11, activation='sigmoid') + assert layer.interpret_activation() is torch.sigmoid @pytest.mark.usefixtures("manual_seed") def test_complete_layer_forward(): # Setting dropout probability to 0. The results of dropout our deterministic, but are only # consistent when run on the same GPU. - cc = ae_module.CompleteLayer(3, 5, activation='tanh', dropout=0) - t = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], dtype=torch.float32) - results = cc.forward(t) + layer = ae_module.CompleteLayer(3, 5, activation='tanh', dropout=0) + tensor = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], dtype=torch.float32) + results = layer.forward(tensor) expected = torch.tensor([[0.7223, 0.7902, 0.9647, 0.5613, 0.9163], [0.9971, 0.9897, 0.9988, 0.8317, 0.9992], [1.0000, 0.9995, 1.0000, 0.9417, 1.0000], [1.0000, 1.0000, 1.0000, 0.9806, 1.0000]], dtype=torch.float32) @@ -274,13 +276,13 @@ def test_auto_encoder_fit(train_ae: autoencoder.AutoEncoder, train_df: pd.DataFr all_feature_names = sorted(NUMERIC_COLS + BIN_COLS + CAT_COLS) assert sorted(train_ae.feature_loss_stats.keys()) == all_feature_names - for ft in train_ae.feature_loss_stats.values(): - assert isinstance(ft['scaler'], scalers.StandardScaler) + for feature in train_ae.feature_loss_stats.values(): + assert isinstance(feature['scaler'], scalers.StandardScaler) assert isinstance(train_ae.optim, torch.optim.SGD) assert isinstance(train_ae.lr_decay, torch.optim.lr_scheduler.ExponentialLR) assert train_ae.lr_decay.gamma == 0.99 - train_ae.optim is train_ae.lr_decay.optimizer + assert train_ae.optim is train_ae.lr_decay.optimizer def test_auto_encoder_fit_early_stopping(train_df: pd.DataFrame): @@ -299,7 +301,8 @@ class MockHelper: """A helper class for mocking the `_validate_dataframe` method in the `AutoEncoder` class.""" def __init__(self, orig_losses, swapped_loss=1.0): - """ Initialization. + """ + Initialization. Parameters: ----------- @@ -314,7 +317,7 @@ def __init__(self, orig_losses, swapped_loss=1.0): # counter to keep track of the number of times the mocked `_validate_dataframe` method has been called self.count = 0 - def mocked_validate_dataframe(self, *args, **kwargs): + def mocked_validate_dataframe(self, *args, **kwargs): # pylint: disable=unused-argument """ A mocked version of the `_validate_dataframe` method in the `AutoEncoder` class for testing early stopping. @@ -361,7 +364,7 @@ def test_auto_encoder_get_anomaly_score_losses(train_ae: autoencoder.AutoEncoder row_cnt = 10 # create a dummy DataFrame with categorical features data = { - 'num_1': [i for i in range(row_cnt)], + 'num_1': list(range(row_cnt)), 'num_2': [i / 2 for i in range(row_cnt)], 'num_3': [i / 2 for i in range(row_cnt)], 'bool_1': [i % 2 == 0 for i in range(row_cnt)], @@ -385,7 +388,7 @@ def test_auto_encoder_get_anomaly_score_losses_no_cat_feats(train_ae: autoencode # create a dummy DataFrame with numerical and boolean features only row_cnt = 10 data = { - 'num_1': [i for i in range(row_cnt)], + 'num_1': list(range(row_cnt)), 'bool_1': [i % 2 == 0 for i in range(row_cnt)], 'bool_2': [i % 3 == 0 for i in range(row_cnt)] } @@ -410,13 +413,13 @@ def test_auto_encoder_prepare_df(train_ae: autoencoder.AutoEncoder, train_df: pd assert isinstance(prepared_df, EncoderDataFrame) - for (i, ft) in enumerate(NUMERIC_COLS): + for feature in NUMERIC_COLS: scaler = scalers.StandardScaler() - scaler.fit(train_df[ft].values) - expected_values = scaler.transform(train_df[ft].values.copy()) + scaler.fit(train_df[feature].values) + expected_values = scaler.transform(train_df[feature].values.copy()) - assert (prepared_df[ft].values == expected_values).all(), \ - f"Values for feature {ft} do not match {prepared_df[ft]} != {expected_values}" + assert (prepared_df[feature].values == expected_values).all(), \ + f"Values for feature {feature} do not match {prepared_df[feature]} != {expected_values}" # Bin features should remain the same when the input is already boolean, this DF only has one assert (prepared_df.ts_anomaly == train_df.ts_anomaly).all() @@ -447,11 +450,11 @@ def test_auto_encoder_get_results(train_ae: autoencoder.AutoEncoder, train_df: p train_ae.fit(train_df, epochs=1) results = train_ae.get_results(train_df) - for ft in sorted(NUMERIC_COLS + BIN_COLS + CAT_COLS): - assert ft in results.columns - assert f'{ft}_pred' in results.columns - assert f'{ft}_loss' in results.columns - assert f'{ft}_z_loss' in results.columns + for feature in sorted(NUMERIC_COLS + BIN_COLS + CAT_COLS): + assert feature in results.columns + assert f'{feature}_pred' in results.columns + assert f'{feature}_loss' in results.columns + assert f'{feature}_z_loss' in results.columns assert 'max_abs_z' in results.columns assert 'mean_abs_z' in results.columns diff --git a/tests/dfencoder/test_dfencoder_distributed_e2e.py b/tests/dfencoder/test_dfencoder_distributed_e2e.py index 351882dba2..2ecad2e7c7 100644 --- a/tests/dfencoder/test_dfencoder_distributed_e2e.py +++ b/tests/dfencoder/test_dfencoder_distributed_e2e.py @@ -200,7 +200,8 @@ def _run_test(rank, world_size): # make sure the user baseline is modeled well enough so the minimum and median z scores # from inference are in range assert min(inf_res.mean_abs_z) < 1 - assert (np.median(inf_res.mean_abs_z) < 100 - ) # expect median mean_abs_z to be < 50. Using 100 to leave some room for variability + + # expect median mean_abs_z to be < 50. Using 100 to leave some room for variability + assert (np.median(inf_res.mean_abs_z) < 100) cleanup_dist() diff --git a/tests/dfencoder/test_dfencoder_e2e.py b/tests/dfencoder/test_dfencoder_e2e.py index dd95cbe071..2fa9dcb3d6 100644 --- a/tests/dfencoder/test_dfencoder_e2e.py +++ b/tests/dfencoder/test_dfencoder_e2e.py @@ -120,9 +120,9 @@ def test_dfencoder_e2e(): # Make sure model converges (low loss) for loss_type in LOSS_TYPES: ft_losses = getattr(model.logger, f"{loss_type}_fts") - for ft, losses_l in ft_losses.items(): + for feature, losses_l in ft_losses.items(): losses = losses_l[1] - assert min(losses) < LOSS_TARGETS[loss_type][ft] * LOSS_TOLERANCE_RATIO + assert min(losses) < LOSS_TARGETS[loss_type][feature] * LOSS_TOLERANCE_RATIO # Inference inf_res = model.get_results(inference_df) @@ -135,5 +135,6 @@ def test_dfencoder_e2e(): # make sure the user baseline is modeled well enough so the minimum and median z scores # from inference are in range assert min(inf_res.mean_abs_z) < 1 - assert (np.median(inf_res.mean_abs_z) < 100 - ) # expect median mean_abs_z to be < 50. Using 100 to leave some room for variability + + # expect median mean_abs_z to be < 50. Using 100 to leave some room for variability + assert (np.median(inf_res.mean_abs_z) < 100) diff --git a/tests/examples/developer_guide/test_pass_thru.py b/tests/examples/developer_guide/test_pass_thru.py index c7fb5b6f5b..a9be04164e 100644 --- a/tests/examples/developer_guide/test_pass_thru.py +++ b/tests/examples/developer_guide/test_pass_thru.py @@ -31,26 +31,27 @@ def _check_pass_thru(config: Config, filter_probs_df: typing.Union[pd.DataFrame, cudf.DataFrame], - PassThruStageCls: SinglePortStage): - stage = PassThruStageCls(config) + pass_thru_stage_cls: SinglePortStage): + stage = pass_thru_stage_cls(config) meta = MessageMeta(filter_probs_df) - mm = MultiMessage(meta=meta) + multi = MultiMessage(meta=meta) - assert stage.on_data(mm) is mm + assert stage.on_data(multi) is multi -@pytest.mark.import_mod([os.path.join(TEST_DIRS.examples_dir, 'developer_guide/1_simple_python_stage/pass_thru.py')]) +@pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'developer_guide/1_simple_python_stage/pass_thru.py')) def test_pass_thru_ex1(config: Config, filter_probs_df: typing.Union[pd.DataFrame, cudf.DataFrame], - import_mod: typing.List[types.ModuleType]): - pass_thru = import_mod[0] + import_mod: types.ModuleType): + pass_thru = import_mod _check_pass_thru(config, filter_probs_df, pass_thru.PassThruStage) -@pytest.mark.import_mod([os.path.join(TEST_DIRS.examples_dir, 'developer_guide/3_simple_cpp_stage/pass_thru.py')]) +@pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'developer_guide/3_simple_cpp_stage/_lib/pass_thru.py'), + sys_path=-2) def test_pass_thru_ex3(config: Config, filter_probs_df: typing.Union[pd.DataFrame, cudf.DataFrame], - import_mod: typing.List[types.ModuleType]): - pass_thru = import_mod[0] + import_mod: types.ModuleType): + pass_thru = import_mod _check_pass_thru(config, filter_probs_df, pass_thru.PassThruStage) diff --git a/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py b/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py index 6abe3230a6..792b19fd1b 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py @@ -31,7 +31,9 @@ def test_constructor(config: Config): from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage - date_conversion_func = lambda x: x # noqa E731 + def date_conversion_func(x): + return x + stage = DFPFileBatcherStage(config, date_conversion_func, 'M', @@ -77,7 +79,7 @@ def test_on_data(config: Config): stage = DFPFileBatcherStage(config, date_conversion_func) - assert stage.on_data([]) == [] + assert not stage.on_data([]) test_data_dir = os.path.join(TEST_DIRS.tests_data_dir, 'appshield', 'snapshot-1') file_specs = fsspec.open_files(os.path.join(test_data_dir, '*.json')) @@ -100,12 +102,13 @@ def test_on_data(config: Config): expected_10_26_files = sorted(f.path for f in fsspec.open_files(os.path.join(test_data_dir, '*_2022-01-30_10-26*.json'))) - (b1, b2) = batches - assert sorted(f.path for f in b1[0]) == expected_10_25_files - assert b1[1] == 2 + batch1 = batches[0] + batch2 = batches[1] + assert sorted(f.path for f in batch1[0]) == expected_10_25_files + assert batch1[1] == 2 - assert sorted(f.path for f in b2[0]) == expected_10_26_files - assert b2[1] == 2 + assert sorted(f.path for f in batch2[0]) == expected_10_26_files + assert batch2[1] == 2 # Test with a start time that excludes some files stage = DFPFileBatcherStage(config, diff --git a/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py b/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py index 00d51b6f6d..578ecded6b 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py @@ -24,6 +24,8 @@ from morpheus.utils.logger import set_log_level from utils.dataset_manager import DatasetManager +# pylint: disable=redefined-outer-name + @pytest.fixture(autouse=True) def mock_mlflow_client(): @@ -47,7 +49,7 @@ def test_constructor(config: Config, mock_mlflow_client: mock.MagicMock, mock_mo assert isinstance(stage, SinglePortStage) assert stage._client is mock_mlflow_client assert stage._fallback_user == config.ae.fallback_username - assert stage._model_cache == {} + assert not stage._model_cache assert stage._model_manager is mock_model_manager mock_mlflow_client.assert_called_once() @@ -73,7 +75,7 @@ def test_get_model(config: Config, mock_mlflow_client: mock.MagicMock, mock_mode [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]) def test_on_data( config: Config, - mock_mlflow_client: mock.MagicMock, + mock_mlflow_client: mock.MagicMock, # pylint: disable=unused-argument mock_model_manager: mock.MagicMock, dfp_multi_message: "MultiDFPMessage", # noqa: F821 morpheus_log_level: int, @@ -83,7 +85,7 @@ def test_on_data( set_log_level(morpheus_log_level) - expected_results = [i for i in range(1000, dfp_multi_message.mess_count + 1000)] + expected_results = list(range(1000, dfp_multi_message.mess_count + 1000)) expected_df = dfp_multi_message.get_meta_dataframe().copy(deep=True) expected_df["results"] = expected_results @@ -112,7 +114,7 @@ def test_on_data( @pytest.mark.parametrize("raise_error", [True, False]) def test_on_data_get_model_error( config: Config, - mock_mlflow_client: mock.MagicMock, + mock_mlflow_client: mock.MagicMock, # pylint: disable=unused-argument mock_model_manager: mock.MagicMock, dfp_multi_message: "MultiDFPMessage", # noqa: F821 raise_error: bool): diff --git a/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py b/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py index ca97e07af2..c904f46720 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py +++ b/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py @@ -27,6 +27,8 @@ from utils import TEST_DIRS from utils.dataset_manager import DatasetManager +# pylint: disable=redefined-outer-name + MockedRequests = namedtuple("MockedRequests", ["get", "patch", "response"]) MockedMLFlow = namedtuple("MockedMLFlow", [ @@ -46,7 +48,7 @@ @pytest.fixture -def databricks_env(restore_environ): +def databricks_env(restore_environ): # pylint: disable=unused-argument env = {'DATABRICKS_HOST': 'https://test_host', 'DATABRICKS_TOKEN': 'test_token'} os.environ.update(env) yield env @@ -158,18 +160,20 @@ def verify_apply_model_permissions(mock_requests: MockedRequests, databricks_env: dict, databricks_permissions: OrderedDict, experiment_name: str): - expected_headers = {"Authorization": "Bearer {DATABRICKS_TOKEN}".format(**databricks_env)} + expected_headers = {"Authorization": f"Bearer {databricks_env['DATABRICKS_TOKEN']}"} mock_requests.get.assert_called_once_with( - url="{DATABRICKS_HOST}/api/2.0/mlflow/databricks/registered-models/get".format(**databricks_env), + url=f"{databricks_env['DATABRICKS_HOST']}/api/2.0/mlflow/databricks/registered-models/get", headers=expected_headers, - params={"name": experiment_name}) + params={"name": experiment_name}, + timeout=10) expected_acl = [{'group_name': group, 'permission_level': pl} for (group, pl) in databricks_permissions.items()] mock_requests.patch.assert_called_once_with( - url="{DATABRICKS_HOST}/api/2.0/preview/permissions/registered-models/test_id".format(**databricks_env), + url=f"{databricks_env['DATABRICKS_HOST']}/api/2.0/preview/permissions/registered-models/test_id", headers=expected_headers, - json={'access_control_list': expected_acl}) + json={'access_control_list': expected_acl}, + timeout=10) def test_apply_model_permissions(config: Config, databricks_env: dict, mock_requests: MockedRequests): @@ -285,10 +289,12 @@ def test_on_data(config: Config, "Batch size": 100, "Start Epoch": min_time, "End Epoch": max_time, - "Log Count": len(df)}) + "Log Count": len(df) + }) - mock_mlflow.log_metrics.assert_called_once_with({"embedding-test-num_embeddings": 101, - "embedding-test-embedding_dim": 102}) + mock_mlflow.log_metrics.assert_called_once_with({ + "embedding-test-num_embeddings": 101, "embedding-test-embedding_dim": 102 + }) mock_model.prepare_df.assert_called_once() mock_model.get_anomaly_score.assert_called_once() diff --git a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py index 0473b4218a..0ae8bca2b5 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py @@ -59,7 +59,7 @@ def test_process_events_on_data(mock_datetime: mock.MagicMock, # on_data is a thin wrapper around process_events, tests should be the same for non-empty messages if use_on_data: - stage.on_data(dfp_multi_ae_message) is dfp_multi_ae_message + assert stage.on_data(dfp_multi_ae_message) is dfp_multi_ae_message else: stage._process_events(dfp_multi_ae_message) @@ -69,10 +69,7 @@ def test_process_events_on_data(mock_datetime: mock.MagicMock, assert result_df['v2'][10] == 'NaN' -def test_on_data_none( - config: Config, - dfp_message_meta: "DFPMessageMeta" # noqa: F821 -): +def test_on_data_none(config: Config): from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage stage = DFPPostprocessingStage(config) assert stage.on_data(None) is None diff --git a/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py b/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py index 1679a4518e..bfdde6cf0e 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py @@ -49,7 +49,7 @@ def test_constructor(config: Config): assert stage._min_increment == 7 assert stage._max_history == 100 assert stage._cache_dir.startswith('/test/path/cache') - assert stage._user_cache_map == {} + assert not stage._user_cache_map def test_get_user_cache_hit(config: Config): @@ -78,7 +78,7 @@ def test_get_user_cache_miss(config: Config): assert results.timestamp_column == 'test_timestamp_col' with stage._get_user_cache('test_user') as results2: - results2 is results + assert results2 is results def test_build_window_no_new( @@ -92,7 +92,7 @@ def test_build_window_no_new( mock_cache = build_mock_user_cache() mock_cache.append_dataframe.return_value = False stage._user_cache_map[dfp_message_meta.user_id] = mock_cache - stage._build_window(dfp_message_meta) is None + assert stage._build_window(dfp_message_meta) is None def test_build_window_not_enough_data( @@ -105,7 +105,7 @@ def test_build_window_not_enough_data( mock_cache = build_mock_user_cache(count=3) stage._user_cache_map[dfp_message_meta.user_id] = mock_cache - stage._build_window(dfp_message_meta) is None + assert stage._build_window(dfp_message_meta) is None def test_build_window_min_increment( @@ -118,7 +118,7 @@ def test_build_window_min_increment( mock_cache = build_mock_user_cache(count=5, total_count=30, last_train_count=25) stage._user_cache_map[dfp_message_meta.user_id] = mock_cache - stage._build_window(dfp_message_meta) is None + assert stage._build_window(dfp_message_meta) is None def test_build_window_invalid( diff --git a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py index 50376bd3a4..5d36034026 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py @@ -32,9 +32,9 @@ def test_constructor(config: Config): assert isinstance(stage, SinglePortStage) assert not stage._include_generic assert stage._include_individual - assert stage._skip_users == [] - assert stage._only_users == [] - assert stage._user_index_map == {} + assert not stage._skip_users + assert not stage._only_users + assert not stage._user_index_map stage = DFPSplitUsersStage(config, include_generic=True, @@ -46,7 +46,7 @@ def test_constructor(config: Config): assert not stage._include_individual assert stage._skip_users == ['a', 'b'] assert stage._only_users == ['c', 'd'] - assert stage._user_index_map == {} + assert not stage._user_index_map @pytest.mark.parametrize('include_generic', [True, False]) @@ -76,8 +76,8 @@ def test_extract_users(config: Config, expected_data = {} with open(input_file, encoding='UTF-8') as fh: for line in fh: - d = json.loads(line) - user_id = d['From'] + data = json.loads(line) + user_id = data['From'] if user_id in skip_users: continue @@ -85,10 +85,10 @@ def test_extract_users(config: Config, continue if include_generic: - all_data.append(d) + all_data.append(data) if include_individual: - expected_data[user_id] = [d] + expected_data[user_id] = [data] if include_generic: expected_data[config.ae.fallback_username] = all_data @@ -117,4 +117,4 @@ def test_extract_users_none_to_empty(config: Config): from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage stage = DFPSplitUsersStage(config, include_generic=True, include_individual=True) - assert stage.extract_users(None) == [] + assert not stage.extract_users(None) diff --git a/tests/examples/digital_fingerprinting/test_dfp_training.py b/tests/examples/digital_fingerprinting/test_dfp_training.py index 4a4ee7821d..b038fd4721 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_training.py +++ b/tests/examples/digital_fingerprinting/test_dfp_training.py @@ -84,7 +84,7 @@ def test_on_data(mock_train_test_split: mock.MagicMock, mock_train_test_split.assert_called_once() assert len(mock_train_test_split.call_args.args) == 1 dataset_pandas.assert_compare_df(mock_train_test_split.call_args.args[0], train_df) - mock_train_test_split.call_args.kwargs == {'test_size': validation_size, 'shuffle': False} + assert mock_train_test_split.call_args.kwargs == {'test_size': validation_size, 'shuffle': False} else: expected_run_validation = False expected_val_data = None @@ -94,7 +94,7 @@ def test_on_data(mock_train_test_split: mock.MagicMock, assert len(mock_ae.fit.call_args.args) == 1 dataset_pandas.assert_compare_df(mock_ae.fit.call_args.args[0], train_df) - mock_ae.fit.call_args.kwargs == { + assert mock_ae.fit.call_args.kwargs == { 'epochs': stage._epochs, 'val_data': expected_val_data, 'run_validation': expected_run_validation } diff --git a/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py b/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py index 2571b4fb17..f6f77d3db3 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py +++ b/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py @@ -22,16 +22,18 @@ from morpheus.pipeline.single_port_stage import SinglePortStage from utils.dataset_manager import DatasetManager +# pylint: disable=redefined-outer-name + @pytest.fixture def dfp_multi_message(config, dfp_multi_message): # Fill in some values for columns that the stage is looking for with dfp_multi_message.meta.mutable_dataframe() as df: step = (len(df) + 1) * 100 - df["mean_abs_z"] = [i for i in range(0, len(df) * step, step)] + df["mean_abs_z"] = list(range(0, len(df) * step, step)) for (i, col) in enumerate(sorted(config.ae.feature_columns)): step = i + 1 * 100 - df[f"{col}_z_loss"] = [k for k in range(0, len(df) * step, step)] + df[f"{col}_z_loss"] = list(range(0, len(df) * step, step)) yield dfp_multi_message @@ -61,7 +63,7 @@ def test_constructor(config: Config): assert stage._period == 'M' assert stage._output_dir == '/fake/test/dir' assert stage._output_prefix == 'test_prefix' - assert stage._output_filenames == [] + assert not stage._output_filenames def test_postprocess( @@ -88,7 +90,7 @@ def test_write_to_files( from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage stage = DFPVizPostprocStage(config, period='min', output_dir=tmp_path, output_prefix='test_prefix_') - stage._write_to_files(dfp_multi_message) is dfp_multi_message + assert stage._write_to_files(dfp_multi_message) is dfp_multi_message # The times in the DF have a 30 second step, so the number of unique minutes is half the length of the DF num_expected_periods = len(expected_df) // 2 diff --git a/tests/examples/gnn_fraud_detection_pipeline/conftest.py b/tests/examples/gnn_fraud_detection_pipeline/conftest.py index ed8e690878..630701ae69 100644 --- a/tests/examples/gnn_fraud_detection_pipeline/conftest.py +++ b/tests/examples/gnn_fraud_detection_pipeline/conftest.py @@ -21,6 +21,8 @@ from utils import TEST_DIRS from utils import import_or_skip +# pylint: disable=unused-argument, redefined-outer-name + SKIP_REASON = ("Tests for the gnn_fraud_detection_pipeline example require a number of packages not installed in the " "Morpheus development environment. See `examples/gnn_fraud_detection_pipeline/README.md` for details on " "installing these additional dependencies") @@ -132,9 +134,11 @@ def test_data(): assert len(expected_edges) == 20 # ensuring test data & assumptions are correct - yield dict(index=index, - client_data=client_data, - merchant_data=merchant_data, - df=df, - expected_nodes=expected_nodes, - expected_edges=expected_edges) + yield { + "index": index, + "client_data": client_data, + "merchant_data": merchant_data, + "df": df, + "expected_nodes": expected_nodes, + "expected_edges": expected_edges + } diff --git a/tests/examples/gnn_fraud_detection_pipeline/test_graph_construction_stage.py b/tests/examples/gnn_fraud_detection_pipeline/test_graph_construction_stage.py index d39c00d994..8cb53bed0f 100644 --- a/tests/examples/gnn_fraud_detection_pipeline/test_graph_construction_stage.py +++ b/tests/examples/gnn_fraud_detection_pipeline/test_graph_construction_stage.py @@ -46,17 +46,17 @@ def test_constructor(self, config: Config, training_file: str, import_mod: typin def _check_graph( self, stellargraph: types.ModuleType, - sg: "stellargraph.StellarGraph", # noqa: F821 + graph: "stellargraph.StellarGraph", # noqa: F821 expected_nodes, expected_edges): - assert isinstance(sg, stellargraph.StellarGraph) - sg.check_graph_for_ml(features=True, expensive_check=True) # this will raise if it doesn't pass - assert not sg.is_directed() + assert isinstance(graph, stellargraph.StellarGraph) + graph.check_graph_for_ml(features=True, expensive_check=True) # this will raise if it doesn't pass + assert not graph.is_directed() - nodes = sg.nodes() + nodes = graph.nodes() assert set(nodes) == expected_nodes - edges = sg.edges() + edges = graph.edges() assert set(edges) == expected_edges def test_graph_construction(self, @@ -70,7 +70,7 @@ def test_graph_construction(self, merchant_features = pd.DataFrame({0: 1}, index=test_data['merchant_data']) # Call _graph_construction - sg = graph_construction_stage.FraudGraphConstructionStage._graph_construction( + graph = graph_construction_stage.FraudGraphConstructionStage._graph_construction( nodes={ 'client': df.client_node, 'merchant': df.merchant_node, 'transaction': df.index }, @@ -84,15 +84,15 @@ def test_graph_construction(self, "merchant": merchant_features }) - self._check_graph(stellargraph, sg, test_data['expected_nodes'], test_data['expected_edges']) + self._check_graph(stellargraph, graph, test_data['expected_nodes'], test_data['expected_edges']) def test_build_graph_features(self, import_mod: typing.List[types.ModuleType], stellargraph: types.ModuleType, test_data: dict): graph_construction_stage = import_mod[0] - sg = graph_construction_stage.FraudGraphConstructionStage._build_graph_features(test_data['df']) - self._check_graph(stellargraph, sg, test_data['expected_nodes'], test_data['expected_edges']) + graph = graph_construction_stage.FraudGraphConstructionStage._build_graph_features(test_data['df']) + self._check_graph(stellargraph, graph, test_data['expected_nodes'], test_data['expected_edges']) def test_process_message(self, config: Config, @@ -108,8 +108,8 @@ def test_process_message(self, # Since we used the first 5 rows as the training data, send the second 5 as inference data meta = MessageMeta(cudf.DataFrame(df)) - mm = MultiMessage(meta=meta, mess_offset=5, mess_count=5) - fgmm = stage._process_message(mm) + multi = MultiMessage(meta=meta, mess_offset=5, mess_count=5) + fgmm = stage._process_message(multi) assert isinstance(fgmm, graph_construction_stage.FraudGraphMultiMessage) assert fgmm.meta is meta diff --git a/tests/examples/gnn_fraud_detection_pipeline/test_graph_sage_stage.py b/tests/examples/gnn_fraud_detection_pipeline/test_graph_sage_stage.py index b8a449de48..768622cdec 100644 --- a/tests/examples/gnn_fraud_detection_pipeline/test_graph_sage_stage.py +++ b/tests/examples/gnn_fraud_detection_pipeline/test_graph_sage_stage.py @@ -27,11 +27,12 @@ @pytest.mark.use_python class TestGraphSageStage: - def test_constructor(self, - config: Config, - hinsage_model: str, - gnn_fraud_detection_pipeline: types.ModuleType, - tensorflow): + def test_constructor( + self, + config: Config, + hinsage_model: str, + gnn_fraud_detection_pipeline: types.ModuleType, # pylint: disable=unused-argument + tensorflow): from gnn_fraud_detection_pipeline.stages.graph_sage_stage import GraphSAGEStage stage = GraphSAGEStage(config, model_hinsage_file=hinsage_model, @@ -46,12 +47,13 @@ def test_constructor(self, assert stage._record_id == "test_id" assert stage._target_node == "test_node" - def test_inductive_step_hinsage(self, - config: Config, - hinsage_model: str, - gnn_fraud_detection_pipeline: types.ModuleType, - test_data: dict, - dataset_pandas: DatasetManager): + def test_inductive_step_hinsage( + self, + config: Config, + hinsage_model: str, + gnn_fraud_detection_pipeline: types.ModuleType, # pylint: disable=unused-argument + test_data: dict, + dataset_pandas: DatasetManager): from gnn_fraud_detection_pipeline.stages.graph_construction_stage import FraudGraphConstructionStage from gnn_fraud_detection_pipeline.stages.graph_sage_stage import GraphSAGEStage @@ -70,19 +72,20 @@ def test_inductive_step_hinsage(self, assert results.index.to_arrow().to_pylist() == test_data['index'] dataset_pandas.assert_compare_df(results, expected_df) - def test_process_message(self, - config: Config, - hinsage_model: str, - gnn_fraud_detection_pipeline: types.ModuleType, - test_data: dict, - dataset_pandas: DatasetManager): + def test_process_message( + self, + config: Config, + hinsage_model: str, + gnn_fraud_detection_pipeline: types.ModuleType, # pylint: disable=unused-argument + test_data: dict, + dataset_pandas: DatasetManager): from gnn_fraud_detection_pipeline.stages.graph_construction_stage import FraudGraphConstructionStage from gnn_fraud_detection_pipeline.stages.graph_construction_stage import FraudGraphMultiMessage from gnn_fraud_detection_pipeline.stages.graph_sage_stage import GraphSAGEMultiMessage from gnn_fraud_detection_pipeline.stages.graph_sage_stage import GraphSAGEStage expected_df = dataset_pandas['examples/gnn_fraud_detection_pipeline/inductive_emb.csv'] - expected_df.rename(lambda x: "ind_emb_{}".format(x), axis=1, inplace=True) + expected_df.rename(lambda x: f"ind_emb_{x}", axis=1, inplace=True) df = test_data['df'] meta = MessageMeta(cudf.DataFrame(df)) diff --git a/tests/examples/log_parsing/conftest.py b/tests/examples/log_parsing/conftest.py index f95e71617d..a9d9c21f5e 100644 --- a/tests/examples/log_parsing/conftest.py +++ b/tests/examples/log_parsing/conftest.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +17,7 @@ @pytest.fixture -def config(config): +def config(config): # pylint: disable=redefined-outer-name """ The log_parsing pipelie requires NLP mode. Set this here so all the tests don't need to set it themselves. """ diff --git a/tests/examples/log_parsing/test_inference.py b/tests/examples/log_parsing/test_inference.py index 875e343d22..31c629b2a3 100644 --- a/tests/examples/log_parsing/test_inference.py +++ b/tests/examples/log_parsing/test_inference.py @@ -192,14 +192,13 @@ def test_log_parsing_inference_stage_get_inference_worker(config: Config, import @pytest.mark.use_python -@pytest.mark.usefixtures("manual_seed") +@pytest.mark.usefixtures("manual_seed", "config") @pytest.mark.import_mod([ os.path.join(TEST_DIRS.examples_dir, 'log_parsing', 'inference.py'), os.path.join(TEST_DIRS.examples_dir, 'log_parsing', 'messages.py') ]) @pytest.mark.parametrize("mess_offset,mess_count,offset,count", [(0, 5, 0, 5), (5, 5, 0, 5)]) -def test_log_parsing_inference_stage_convert_one_response(config: Config, - import_mod: typing.List[types.ModuleType], +def test_log_parsing_inference_stage_convert_one_response(import_mod: typing.List[types.ModuleType], filter_probs_df: typing.Union[pd.DataFrame, cudf.DataFrame], mess_offset, mess_count, diff --git a/tests/examples/log_parsing/test_pipe.py b/tests/examples/log_parsing/test_pipe.py index 466804e67f..f8e2eda7d3 100755 --- a/tests/examples/log_parsing/test_pipe.py +++ b/tests/examples/log_parsing/test_pipe.py @@ -121,7 +121,7 @@ def _run_mocked_pipeline(config: Config, dataset_cudf: DatasetManager, import_mo mock_infer_result = mock.MagicMock() mock_infer_result.as_numpy.side_effect = inf_results - def async_infer(callback=None, **k): + def async_infer(callback=None, **_): callback(mock_infer_result, None) mock_triton_client.async_infer.side_effect = async_infer diff --git a/tests/examples/ransomware_detection/conftest.py b/tests/examples/ransomware_detection/conftest.py index 5753634e62..0d9a52efac 100644 --- a/tests/examples/ransomware_detection/conftest.py +++ b/tests/examples/ransomware_detection/conftest.py @@ -22,6 +22,8 @@ from utils import TEST_DIRS from utils import import_or_skip +# pylint: disable=redefined-outer-name + SKIP_REASON = ("Tests for the ransomware_detection example require a number of packages not installed in the Morpheus " "development environment. See `examples/ransomware_detection/README.md` " "for details on installing these additional dependencies") @@ -72,5 +74,6 @@ def interested_plugins(): # from common.... # For this reason we need to ensure that the examples/ransomware_detection dir is in the sys.path first @pytest.fixture(autouse=True) -def ransomware_detection_in_sys_path(request: pytest.FixtureRequest, restore_sys_path, reset_plugins, example_dir): +@pytest.mark.usefixtures("request", "restore_sys_path", "reset_plugins") +def ransomware_detection_in_sys_path(example_dir): sys.path.append(example_dir) diff --git a/tests/examples/ransomware_detection/test_preprocessing.py b/tests/examples/ransomware_detection/test_preprocessing.py index af86df90b1..7e638fde64 100644 --- a/tests/examples/ransomware_detection/test_preprocessing.py +++ b/tests/examples/ransomware_detection/test_preprocessing.py @@ -27,6 +27,7 @@ @pytest.mark.use_python class TestPreprocessingRWStage: + # pylint: disable=no-name-in-module def test_constructor(self, config: Config, rwd_conf: dict): from stages.preprocessing import PreprocessingRWStage @@ -35,7 +36,7 @@ def test_constructor(self, config: Config, rwd_conf: dict): assert isinstance(stage, PreprocessBaseStage) assert stage._feature_columns == rwd_conf['model_features'] assert stage._features_len == len(rwd_conf['model_features']) - assert stage._snapshot_dict == {} + assert not stage._snapshot_dict assert len(stage._padding_data) == len(rwd_conf['model_features']) * 6 for i in stage._padding_data: assert i == 0 @@ -50,8 +51,15 @@ def test_sliding_window_offsets(self, config: Config, rwd_conf: dict): results = stage._sliding_window_offsets(ids, len(ids), window=window) assert results == [(0, 3), (1, 4), (2, 5), (3, 6), (4, 7), (7, 10)] + def test_sliding_window_non_consequtive(self, config: Config, rwd_conf: dict): # Non-consecutive ids don't create sliding windows - stage._sliding_window_offsets(list(reversed(ids)), len(ids), window=window) == [] + from stages.preprocessing import PreprocessingRWStage + + stage = PreprocessingRWStage(config, feature_columns=rwd_conf['model_features'], sliding_window=6) + + window = 3 + ids = [17, 19, 21, 23, 31, 33] + assert len(stage._sliding_window_offsets(list(reversed(ids)), len(ids), window=window)) == 0 def test_sliding_window_offsets_errors(self, config: Config, rwd_conf: dict): from stages.preprocessing import PreprocessingRWStage @@ -141,16 +149,20 @@ def test_merge_curr_and_prev_snapshots(self, config: Config, rwd_conf: dict, dat dataset_pandas.assert_compare_df(df.fillna(''), expected_df) def test_pre_process_batch(self, config: Config, rwd_conf: dict, dataset_pandas: DatasetManager): + + # Pylint currently fails to work with classmethod: https://github.com/pylint-dev/pylint/issues/981 + # pylint: disable=no-member + from stages.preprocessing import PreprocessingRWStage df = dataset_pandas['examples/ransomware_detection/dask_results.csv'] df['source_pid_process'] = 'appshield_' + df.pid_process expected_df = df.copy(deep=True).fillna('') meta = AppShieldMessageMeta(df=df, source='tests') - mm = MultiMessage(meta=meta) + multi = MultiMessage(meta=meta) sliding_window = 4 stage = PreprocessingRWStage(config, feature_columns=rwd_conf['model_features'], sliding_window=sliding_window) - results = stage._pre_process_batch(mm) + results: MultiInferenceFILMessage = stage._pre_process_batch(multi) assert isinstance(results, MultiInferenceFILMessage) expected_df['sequence'] = ['dummy' for _ in range(len(expected_df))] diff --git a/tests/io/test_loader_registry.py b/tests/io/test_loader_registry.py index c964215252..6a7c92f8f6 100644 --- a/tests/io/test_loader_registry.py +++ b/tests/io/test_loader_registry.py @@ -17,8 +17,8 @@ import cudf # Morpheus.common is required to register pre-made loaders -import morpheus.common # noqa: F401 -import morpheus.messages as messages +import morpheus.common # noqa: F401 # pylint:disable=unused-import +from morpheus import messages from morpheus.messages import DataLoaderRegistry diff --git a/tests/modules/test_from_control_message.py b/tests/modules/test_from_control_message.py index b920fb43b8..ca2895fbb9 100644 --- a/tests/modules/test_from_control_message.py +++ b/tests/modules/test_from_control_message.py @@ -21,8 +21,8 @@ # When segment modules are imported, they're added to the module registry. # To avoid flake8 warnings about unused code, the noqa flag is used during import. -import morpheus.loaders # noqa: F401 -import morpheus.modules # noqa: F401 +import morpheus.loaders # noqa: F401 # pylint:disable=unused-import +import morpheus.modules # noqa: F401 # pylint:disable=unused-import from morpheus.pipeline.pipeline import Pipeline from morpheus.stages.general.linear_modules_stage import LinearModulesStage from morpheus.stages.input.control_message_file_source_stage import ControlMessageFileSourceStage @@ -33,6 +33,8 @@ from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from utils import TEST_DIRS +# pylint: disable=redefined-outer-name + @pytest.fixture(scope="function") def filename(request): @@ -68,7 +70,7 @@ def test_get_module(): assert fn_constructor is not None config = {} - module_instance = fn_constructor("FromControlMessageTest", config) # noqa: F841 -- we don't need to use it + fn_constructor("FromControlMessageTest", config) @pytest.mark.use_cpp diff --git a/tests/test_add_classifications_stage.py b/tests/test_add_classifications_stage.py index 5b90235d1b..a016d67d0b 100755 --- a/tests/test_add_classifications_stage.py +++ b/tests/test_add_classifications_stage.py @@ -27,24 +27,28 @@ from utils.dataset_manager import DatasetManager -def test_constructor(config: Config): +@pytest.fixture(name="config") +def config_fixture(config: Config): config.class_labels = ['frogs', 'lizards', 'toads'] + yield config + - ac = AddClassificationsStage(config) - assert ac._class_labels == ['frogs', 'lizards', 'toads'] - assert ac._labels == ['frogs', 'lizards', 'toads'] - assert ac._idx2label == {0: 'frogs', 1: 'lizards', 2: 'toads'} - assert ac.name == "add-class" +def test_constructor(config: Config): + stage = AddClassificationsStage(config) + assert stage._class_labels == ['frogs', 'lizards', 'toads'] + assert stage._labels == ['frogs', 'lizards', 'toads'] + assert stage._idx2label == {0: 'frogs', 1: 'lizards', 2: 'toads'} + assert stage.name == "add-class" # Just ensure that we get a valid non-empty tuple - accepted_types = ac.accepted_types() + accepted_types = stage.accepted_types() assert isinstance(accepted_types, tuple) assert len(accepted_types) > 0 - ac = AddClassificationsStage(config, threshold=1.3, labels=['lizards'], prefix='test_') - assert ac._class_labels, ['frogs', 'lizards', 'toads'] - assert ac._labels, ['lizards'] - assert ac._idx2label, {1: 'test_lizards'} + stage = AddClassificationsStage(config, threshold=1.3, labels=['lizards'], prefix='test_') + assert stage._class_labels == ['frogs', 'lizards', 'toads'] + assert stage._labels == ['lizards'] + assert stage._idx2label == {1: 'test_lizards'} with pytest.raises(AssertionError): AddClassificationsStage(config, labels=['missing']) diff --git a/tests/test_add_classifications_stage_pipe.py b/tests/test_add_classifications_stage_pipe.py index 67872d2f6a..27e45949e8 100755 --- a/tests/test_add_classifications_stage_pipe.py +++ b/tests/test_add_classifications_stage_pipe.py @@ -52,7 +52,7 @@ def test_add_classifications_stage_pipe(config, filter_probs_df): pipe.add_stage(DeserializeStage(config)) pipe.add_stage(ConvMsg(config, filter_probs_df)) pipe.add_stage(AddClassificationsStage(config, threshold=threshold)) - pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) + pipe.add_stage(SerializeStage(config, include=[f"^{c}$" for c in config.class_labels])) comp_stage = pipe.add_stage( CompareDataFrameStage(config, build_expected(filter_probs_df.to_pandas(), threshold, config.class_labels))) pipe.run() @@ -75,7 +75,7 @@ def test_add_classifications_stage_multi_segment_pipe(config, filter_probs_df): pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(AddClassificationsStage(config, threshold=threshold)) pipe.add_segment_boundary(MultiResponseMessage) - pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) + pipe.add_stage(SerializeStage(config, include=[f"^{c}$" for c in config.class_labels])) pipe.add_segment_boundary(MessageMeta) comp_stage = pipe.add_stage( CompareDataFrameStage(config, build_expected(filter_probs_df.to_pandas(), threshold, config.class_labels))) diff --git a/tests/test_add_scores_stage.py b/tests/test_add_scores_stage.py index e08642d7bc..664b613efc 100755 --- a/tests/test_add_scores_stage.py +++ b/tests/test_add_scores_stage.py @@ -32,21 +32,21 @@ def test_constructor(config: Config): config.class_labels = ['frogs', 'lizards', 'toads'] config.feature_length = 12 - a = AddScoresStage(config) - assert a._class_labels == ['frogs', 'lizards', 'toads'] - assert a._labels == ['frogs', 'lizards', 'toads'] - assert a._idx2label == {0: 'frogs', 1: 'lizards', 2: 'toads'} - assert a.name == "add-scores" + stage = AddScoresStage(config) + assert stage._class_labels == ['frogs', 'lizards', 'toads'] + assert stage._labels == ['frogs', 'lizards', 'toads'] + assert stage._idx2label == {0: 'frogs', 1: 'lizards', 2: 'toads'} + assert stage.name == "add-scores" # Just ensure that we get a valid non-empty tuple - accepted_types = a.accepted_types() + accepted_types = stage.accepted_types() assert isinstance(accepted_types, tuple) assert len(accepted_types) > 0 - a = AddScoresStage(config, labels=['lizards'], prefix='test_') - assert a._class_labels == ['frogs', 'lizards', 'toads'] - assert a._labels == ['lizards'] - assert a._idx2label == {1: 'test_lizards'} + stage = AddScoresStage(config, labels=['lizards'], prefix='test_') + assert stage._class_labels == ['frogs', 'lizards', 'toads'] + assert stage._labels == ['lizards'] + assert stage._idx2label == {1: 'test_lizards'} with pytest.raises(AssertionError): AddScoresStage(config, labels=['missing']) diff --git a/tests/test_add_scores_stage_pipe.py b/tests/test_add_scores_stage_pipe.py index db073f27b7..bbe2000027 100755 --- a/tests/test_add_scores_stage_pipe.py +++ b/tests/test_add_scores_stage_pipe.py @@ -59,7 +59,7 @@ def test_add_scores_stage_pipe(config: Config, pipe.add_stage(DeserializeStage(config)) pipe.add_stage(ConvMsg(config, order=order, columns=list(input_df.columns))) pipe.add_stage(AddScoresStage(config)) - pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) + pipe.add_stage(SerializeStage(config, include=[f"^{c}$" for c in config.class_labels])) comp_stage = pipe.add_stage(CompareDataFrameStage(config, expected_df)) pipe.run() @@ -84,7 +84,7 @@ def test_add_scores_stage_multi_segment_pipe(config: Config, dataset_cudf: Datas pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(AddScoresStage(config)) pipe.add_segment_boundary(MultiResponseMessage) - pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) + pipe.add_stage(SerializeStage(config, include=[f"^{c}$" for c in config.class_labels])) pipe.add_segment_boundary(MessageMeta) comp_stage = pipe.add_stage(CompareDataFrameStage(config, expected_df)) pipe.run() diff --git a/tests/test_appshield_source_stage.py b/tests/test_appshield_source_stage.py index 6290eeae26..ebfcbeea76 100755 --- a/tests/test_appshield_source_stage.py +++ b/tests/test_appshield_source_stage.py @@ -137,8 +137,8 @@ def test_read_file_to_df(cols_exclude, expected_df): 'appshield', 'snapshot-1', 'envars_2022-01-30_10-26-01.017250.json') - file = open(input_file, 'r', encoding='latin1') - output_df = AppShieldSourceStage.read_file_to_df(file, cols_exclude) + with open(input_file, 'r', encoding='latin1') as file: + output_df = AppShieldSourceStage.read_file_to_df(file, cols_exclude) assert list(output_df.columns) == ['PID', 'Process'] assert_frame_equal(output_df, expected_df) @@ -175,7 +175,8 @@ def test_load_meta_cols(plugin, expected_new_columns): 'envars_2022-01-30_10-26-01.017250.json') filepath_split = input_file.split('/') - data = json.load(open(input_file, 'r', encoding='latin1')) + with open(input_file, 'r', encoding='latin1') as file: + data = json.load(file) input_df = pd.DataFrame(columns=data['titles'], data=data['data']) output_df = AppShieldSourceStage.load_meta_cols(filepath_split, plugin, input_df) diff --git a/tests/test_concat_df.py b/tests/test_concat_df.py index 1438dfacee..ae624a7761 100644 --- a/tests/test_concat_df.py +++ b/tests/test_concat_df.py @@ -14,15 +14,16 @@ # limitations under the License. import pandas as pd +import pytest -from morpheus.config import Config from morpheus.messages import MessageMeta from morpheus.messages import MultiMessage from morpheus.utils import concat_df from utils.dataset_manager import DatasetManager -def test_concat_df(config: Config, dataset: DatasetManager): +@pytest.mark.usefixtures("config") +def test_concat_df(dataset: DatasetManager): meta = MessageMeta(dataset["filter_probs.csv"]) messages = [ meta, diff --git a/tests/test_conftest.py b/tests/test_conftest.py index cd6b210267..7af8e23c5d 100644 --- a/tests/test_conftest.py +++ b/tests/test_conftest.py @@ -20,10 +20,11 @@ import cudf -from morpheus.config import Config from morpheus.config import CppConfig from utils.dataset_manager import DatasetManager +# pylint: disable=redefined-outer-name + @pytest.fixture(scope="function") def cpp_from_marker(request: pytest.FixtureRequest) -> bool: @@ -74,15 +75,15 @@ def test_dataset_both(dataset: DatasetManager): def test_dataset_manager_singleton(df_type: typing.Literal["cudf", "pandas"]): - dm = DatasetManager(df_type=df_type) - assert dm.default_df_type == df_type - assert getattr(dm, df_type) is dm - assert DatasetManager(df_type=df_type) is dm + dataset_manager = DatasetManager(df_type=df_type) + assert dataset_manager.default_df_type == df_type + assert getattr(dataset_manager, df_type) is dataset_manager + assert DatasetManager(df_type=df_type) is dataset_manager alt_type = DatasetManager.get_alt_df_type(df_type=df_type) assert df_type != alt_type - assert DatasetManager(alt_type) is not dm - assert getattr(dm, alt_type) is not dm + assert DatasetManager(alt_type) is not dataset_manager + assert getattr(dataset_manager, alt_type) is not dataset_manager def test_dataset_dftype(dataset: DatasetManager): @@ -136,22 +137,26 @@ def test_mark_both(cpp_from_marker: bool): # === Marks and Config === @pytest.mark.use_cpp -def test_mark_and_config_use_cpp(config: Config): +@pytest.mark.usefixtures("config") +def test_mark_and_config_use_cpp(): assert CppConfig.get_should_use_cpp() @pytest.mark.use_python -def test_mark_and_config_use_python(config: Config): +@pytest.mark.usefixtures("config") +def test_mark_and_config_use_python(): assert not CppConfig.get_should_use_cpp() @pytest.mark.use_cpp @pytest.mark.use_python -def test_mark_and_config_both(config: Config, cpp_from_marker: bool): +@pytest.mark.usefixtures("config") +def test_mark_and_config_both(cpp_from_marker: bool): assert CppConfig.get_should_use_cpp() == cpp_from_marker -def test_mark_and_config_neither(config: Config, cpp_from_marker: bool): +@pytest.mark.usefixtures("config") +def test_mark_and_config_neither(cpp_from_marker: bool): assert CppConfig.get_should_use_cpp() == cpp_from_marker @@ -179,11 +184,13 @@ def test_fixture_neither(use_cpp: bool): # === Config Fixture === -def test_config_fixture_no_cpp(config_no_cpp: Config): +@pytest.mark.usefixtures("config_no_cpp") +def test_config_fixture_no_cpp(): assert not CppConfig.get_should_use_cpp() -def test_config_fixture_only_cpp(config_only_cpp: Config): +@pytest.mark.usefixtures("config_only_cpp") +def test_config_fixture_only_cpp(): assert CppConfig.get_should_use_cpp() diff --git a/tests/test_deserialize_stage_pipe.py b/tests/test_deserialize_stage_pipe.py index 68e55e30cb..ed2cc9eb49 100755 --- a/tests/test_deserialize_stage_pipe.py +++ b/tests/test_deserialize_stage_pipe.py @@ -28,7 +28,8 @@ @pytest.mark.use_cudf -def test_fixing_non_unique_indexes(use_cpp: bool, dataset: DatasetManager): +@pytest.mark.usefixtures("use_cpp") +def test_fixing_non_unique_indexes(dataset: DatasetManager): # Set 2 ids equal to others df = dataset.dup_index(dataset["filter_probs.csv"], count=2) diff --git a/tests/test_file_in_out.py b/tests/test_file_in_out.py index 61e7d10c6d..5940648039 100755 --- a/tests/test_file_in_out.py +++ b/tests/test_file_in_out.py @@ -182,6 +182,7 @@ def test_file_rw_index_pipe(tmp_path, config, input_file): "include_header": True }), (os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.jsonlines"), {})], ids=["CSV", "CSV_ID", "JSON"]) +@pytest.mark.usefixtures("use_cpp") def test_file_roundtrip(tmp_path, input_file, extra_kwargs): # Output file should be same type as input diff --git a/tests/test_inference_worker.py b/tests/test_inference_worker.py index 151688be41..65aacd9e4b 100755 --- a/tests/test_inference_worker.py +++ b/tests/test_inference_worker.py @@ -18,26 +18,30 @@ import pytest -from morpheus.config import Config from morpheus.stages.inference import inference_stage from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue from utils.inference_worker import IW def test_constructor(): - pq = ProducerConsumerQueue() - iw = inference_stage.InferenceWorker(pq) - assert iw._inf_queue is pq + queue = ProducerConsumerQueue() + worker = inference_stage.InferenceWorker(queue) + assert worker._inf_queue is queue # Call empty methods - iw.init() - iw.stop() + worker.init() + worker.stop() @pytest.mark.use_python -def test_build_output_message(config: Config): - pq = ProducerConsumerQueue() - iw = IW(pq) +@pytest.mark.usefixtures("config") +def test_build_output_message(): + + # Pylint currently fails to work with classmethod: https://github.com/pylint-dev/pylint/issues/981 + # pylint: disable=no-member + + queue = ProducerConsumerQueue() + worker = IW(queue) mock_message = mock.MagicMock() mock_message.meta = mock.MagicMock() @@ -49,7 +53,7 @@ def test_build_output_message(config: Config): mock_message.count = 10 mock_message.offset = 12 - response = iw.build_output_message(mock_message) + response = worker.build_output_message(mock_message) assert response.count == 2 assert response.mess_offset == 11 assert response.mess_count == 2 @@ -65,7 +69,7 @@ def test_build_output_message(config: Config): mock_message.count = 2 mock_message.offset = 12 - response = iw.build_output_message(mock_message) + response = worker.build_output_message(mock_message) assert response.count == 2 assert response.mess_offset == 11 assert response.mess_count == 2 diff --git a/tests/test_ip.py b/tests/test_ip.py index 4aa29836e2..810d14d26a 100644 --- a/tests/test_ip.py +++ b/tests/test_ip.py @@ -14,90 +14,90 @@ import cudf -import morpheus.parsers.ip as ip +from morpheus.parsers import ip def test_ip_to_int(): - input = cudf.Series(["5.79.97.178", "94.130.74.45"]) + input_df = cudf.Series(["5.79.97.178", "94.130.74.45"]) expected = cudf.Series([89088434, 1585596973]) - actual = ip.ip_to_int(input) + actual = ip.ip_to_int(input_df) assert actual.equals(expected) def test_int_to_ip(): - input = cudf.Series([89088434, 1585596973]) + input_df = cudf.Series([89088434, 1585596973]) expected = cudf.Series(["5.79.97.178", "94.130.74.45"]) - actual = ip.int_to_ip(input) + actual = ip.int_to_ip(input_df) assert actual.equals(expected) def test_is_ip(): - input = cudf.Series(["5.79.97.178", "1.2.3.4", "5", "5.79", "5.79.97", "5.79.97.178.100"]) + input_df = cudf.Series(["5.79.97.178", "1.2.3.4", "5", "5.79", "5.79.97", "5.79.97.178.100"]) expected = cudf.Series([True, True, False, False, False, False]) - actual = ip.is_ip(input) + actual = ip.is_ip(input_df) assert actual.equals(expected) def test_is_reserved(): - input = cudf.Series(["240.0.0.0", "255.255.255.255", "5.79.97.178"]) + input_df = cudf.Series(["240.0.0.0", "255.255.255.255", "5.79.97.178"]) expected = cudf.Series([True, True, False]) - actual = ip.is_reserved(input) + actual = ip.is_reserved(input_df) assert actual.equals(expected) def test_is_loopback(): - input = cudf.Series(["127.0.0.1", "5.79.97.178"]) + input_df = cudf.Series(["127.0.0.1", "5.79.97.178"]) expected = cudf.Series([True, False]) - actual = ip.is_loopback(input) + actual = ip.is_loopback(input_df) assert actual.equals(expected) def test_is_link_local(): - input = cudf.Series(["169.254.0.0", "5.79.97.178"]) + input_df = cudf.Series(["169.254.0.0", "5.79.97.178"]) expected = cudf.Series([True, False]) - actual = ip.is_link_local(input) + actual = ip.is_link_local(input_df) assert actual.equals(expected) def test_is_unspecified(): - input = cudf.Series(["0.0.0.0", "5.79.97.178"]) + input_df = cudf.Series(["0.0.0.0", "5.79.97.178"]) expected = cudf.Series([True, False]) - actual = ip.is_unspecified(input) + actual = ip.is_unspecified(input_df) assert actual.equals(expected) def test_is_multicast(): - input = cudf.Series(["224.0.0.0", "239.255.255.255", "5.79.97.178"]) + input_df = cudf.Series(["224.0.0.0", "239.255.255.255", "5.79.97.178"]) expected = cudf.Series([True, True, False]) - actual = ip.is_multicast(input) + actual = ip.is_multicast(input_df) assert actual.equals(expected) def test_is_private(): - input = cudf.Series(["0.0.0.0", "5.79.97.178"]) + input_df = cudf.Series(["0.0.0.0", "5.79.97.178"]) expected = cudf.Series([True, False]) - actual = ip.is_private(input) + actual = ip.is_private(input_df) assert actual.equals(expected) def test_is_global(): - input = cudf.Series(["0.0.0.0", "5.79.97.178"]) + input_df = cudf.Series(["0.0.0.0", "5.79.97.178"]) expected = cudf.Series([False, True]) - actual = ip.is_global(input) + actual = ip.is_global(input_df) assert actual.equals(expected) def test_netmask(): - input = cudf.Series(["5.79.97.178", "94.130.74.45"]) + input_df = cudf.Series(["5.79.97.178", "94.130.74.45"]) expected = cudf.Series(["255.255.128.0", "255.255.128.0"]) - actual = ip.netmask(input, 17) + actual = ip.netmask(input_df, 17) assert actual.equals(expected) def test_hostmask(): - input = cudf.Series(["5.79.97.178", "94.130.74.45"]) + input_df = cudf.Series(["5.79.97.178", "94.130.74.45"]) expected = cudf.Series(["0.0.127.255", "0.0.127.255"]) - actual = ip.hostmask(input, 17) + actual = ip.hostmask(input_df, 17) assert actual.equals(expected) diff --git a/tests/test_kafka_source_stage_pipe.py b/tests/test_kafka_source_stage_pipe.py index 27b40440e5..98713c6971 100644 --- a/tests/test_kafka_source_stage_pipe.py +++ b/tests/test_kafka_source_stage_pipe.py @@ -134,8 +134,8 @@ def _offset_checker(self, x): new_offsets = self._client.list_consumer_group_offsets(self._group_id) if self._offsets is not None: - for (tp, prev_offset) in self._offsets.items(): - new_offset = new_offsets[tp] + for (tpoint, prev_offset) in self._offsets.items(): + new_offset = new_offsets[tpoint] assert new_offset.offset >= prev_offset.offset diff --git a/tests/test_linear_modules_stage.py b/tests/test_linear_modules_stage.py index 5df477c807..b89fcdf9f1 100755 --- a/tests/test_linear_modules_stage.py +++ b/tests/test_linear_modules_stage.py @@ -64,7 +64,7 @@ def test_build_single_before_module_registration(config): def register_test_module(): registry = mrc.ModuleRegistry - def module_init_fn(builder: mrc.Builder): + def module_init_fn(_: mrc.Builder): pass registry.register_module("TestSimpleModule", "test_morpheus_modules", mrc_version, module_init_fn) diff --git a/tests/test_multi_message.py b/tests/test_multi_message.py index 5431f85f31..08984b2436 100644 --- a/tests/test_multi_message.py +++ b/tests/test_multi_message.py @@ -146,7 +146,8 @@ def test_get_meta(filter_probs_df: typing.Union[cudf.DataFrame, pd.DataFrame]): _test_get_meta(filter_probs_df) -def test_get_meta_dup_index(use_cpp: bool, dataset: DatasetManager): +@pytest.mark.usefixtures("use_cpp") +def test_get_meta_dup_index(dataset: DatasetManager): # Duplicate some indices before creating the meta df = dataset.replace_index(dataset["filter_probs.csv"], replace_ids={3: 1, 5: 4}) @@ -155,7 +156,8 @@ def test_get_meta_dup_index(use_cpp: bool, dataset: DatasetManager): _test_get_meta(df) -def test_set_meta(use_cpp: bool, dataset: DatasetManager): +@pytest.mark.usefixtures("use_cpp") +def test_set_meta(dataset: DatasetManager): df_saved = dataset.pandas["filter_probs.csv"] meta = MessageMeta(dataset["filter_probs.csv"]) @@ -229,11 +231,13 @@ def _test_set_meta_new_column(df: typing.Union[cudf.DataFrame, pd.DataFrame], df DatasetManager.assert_df_equal(multi.get_meta(["v2", "new_column2"]), val_to_set) -def test_set_meta_new_column(use_cpp: bool, dataset: DatasetManager): +@pytest.mark.usefixtures("use_cpp") +def test_set_meta_new_column(dataset: DatasetManager): _test_set_meta_new_column(dataset["filter_probs.csv"], dataset.default_df_type) -def test_set_meta_new_column_dup_index(use_cpp: bool, dataset: DatasetManager): +@pytest.mark.usefixtures("use_cpp") +def test_set_meta_new_column_dup_index(dataset: DatasetManager): # Duplicate some indices before creating the meta df = dataset.replace_index(dataset["filter_probs.csv"], replace_ids={3: 4, 5: 4}) @@ -262,9 +266,9 @@ def test_set_meta_issue_286(filter_probs_df: cudf.DataFrame, use_series: bool): def _test_copy_ranges(df: typing.Union[cudf.DataFrame, pd.DataFrame]): meta = MessageMeta(df) - mm = MultiMessage(meta=meta) + multi = MultiMessage(meta=meta) - mm2 = mm.copy_ranges([(2, 6)]) + mm2 = multi.copy_ranges([(2, 6)]) assert len(mm2.meta.df) == 4 assert mm2.meta.count == 4 assert len(mm2.get_meta()) == 4 @@ -275,7 +279,7 @@ def _test_copy_ranges(df: typing.Union[cudf.DataFrame, pd.DataFrame]): DatasetManager.assert_df_equal(mm2.get_meta(), df.iloc[2:6]) # slice two different ranges of rows - mm3 = mm.copy_ranges([(2, 6), (12, 15)]) + mm3 = multi.copy_ranges([(2, 6), (12, 15)]) assert len(mm3.meta.df) == 7 assert mm3.meta.count == 7 assert len(mm3.get_meta()) == 7 @@ -300,7 +304,8 @@ def test_copy_ranges(filter_probs_df: typing.Union[cudf.DataFrame, pd.DataFrame] _test_copy_ranges(filter_probs_df) -def test_copy_ranges_dup_index(use_cpp: bool, dataset: DatasetManager): +@pytest.mark.usefixtures("use_cpp") +def test_copy_ranges_dup_index(dataset: DatasetManager): # Duplicate some indices before creating the meta df = dataset.dup_index(dataset["filter_probs.csv"], count=4) @@ -423,7 +428,8 @@ def test_get_slice_values(filter_probs_df: cudf.DataFrame): _test_get_slice_values(filter_probs_df) -def test_get_slice_values_dup_index(use_cpp: bool, dataset: DatasetManager): +@pytest.mark.usefixtures("use_cpp") +def test_get_slice_values_dup_index(dataset: DatasetManager): # Duplicate some indices before creating the meta df = dataset.dup_index(dataset["filter_probs.csv"], count=4) @@ -480,6 +486,9 @@ def compare_slice(message_class, **kwargs): def test_from_message(filter_probs_df: cudf.DataFrame): + # Pylint currently fails to work with classmethod: https://github.com/pylint-dev/pylint/issues/981 + # pylint: disable=no-member + meta = MessageMeta(filter_probs_df) multi = MultiMessage(meta=meta, mess_offset=3, mess_count=10) @@ -700,7 +709,12 @@ def test_tensor_constructor(filter_probs_df: cudf.DataFrame): memory=TensorMemory(count=mess_len, tensors={"id_tensor": invalid_id_tensor})) -def test_tensor_slicing(use_cpp: bool, dataset: DatasetManager): +@pytest.mark.usefixtures("use_cpp") +def test_tensor_slicing(dataset: DatasetManager): + + # Pylint currently fails to work with classmethod: https://github.com/pylint-dev/pylint/issues/981 + # pylint: disable=no-member + filter_probs_df = dataset["filter_probs.csv"] mess_len = len(filter_probs_df) @@ -714,8 +728,8 @@ def test_tensor_slicing(use_cpp: bool, dataset: DatasetManager): probs = cp.random.rand(tensor_count, 2) seq_ids = cp.zeros((tensor_count, 3), dtype=cp.int32) - for i, r in enumerate(repeat_counts): - seq_ids[sum(repeat_counts[:i]):sum(repeat_counts[:i]) + r] = cp.ones((r, 3), int) * i + for i, repeat in enumerate(repeat_counts): + seq_ids[sum(repeat_counts[:i]):sum(repeat_counts[:i]) + repeat] = cp.ones((repeat, 3), int) * i # First with no offsets memory = InferenceMemory(count=tensor_count, tensors={"seq_ids": seq_ids, "probs": probs}) diff --git a/tests/test_multi_port_modules_stage.py b/tests/test_multi_port_modules_stage.py index 388cd781c4..dd2006a81e 100755 --- a/tests/test_multi_port_modules_stage.py +++ b/tests/test_multi_port_modules_stage.py @@ -21,10 +21,11 @@ # When segment modules are imported, they're added to the module registry. # To avoid flake8 warnings about unused code, the noqa flag is used during import. -import modules.multiplexer # noqa: F401 +import modules.multiplexer # noqa: F401 # pylint:disable=unused-import from morpheus.stages.general.multi_port_modules_stage import MultiPortModulesStage +# pylint: disable=redefined-outer-name @pytest.fixture(scope="function") def unregistered_module_conf(): unregistered_module_conf = { diff --git a/tests/test_multi_port_pipeline.py b/tests/test_multi_port_pipeline.py index 3c8b06ce4c..ee94552f3b 100755 --- a/tests/test_multi_port_pipeline.py +++ b/tests/test_multi_port_pipeline.py @@ -18,7 +18,7 @@ # When segment modules are imported, they're added to the module registry. # To avoid flake8 warnings about unused code, the noqa flag is used during import. -import modules.multiplexer # noqa: F401 +import modules.multiplexer # noqa: F401 # pylint:disable=unused-import from morpheus.pipeline.pipeline import Pipeline from morpheus.stages.general.multi_port_modules_stage import MultiPortModulesStage from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage diff --git a/tests/test_phishing.py b/tests/test_phishing.py index 97f0131ed9..2fd430bd5c 100755 --- a/tests/test_phishing.py +++ b/tests/test_phishing.py @@ -68,7 +68,7 @@ def test_email_no_cpp(mock_triton_client, config, tmp_path): mock_infer_result = mock.MagicMock() mock_infer_result.as_numpy.side_effect = inf_results - def async_infer(callback=None, **k): + def async_infer(callback=None, **_): callback(mock_infer_result, None) mock_triton_client.async_infer.side_effect = async_infer diff --git a/tests/test_phishing_kafka.py b/tests/test_phishing_kafka.py index 2dbf86e5fb..59729eb827 100755 --- a/tests/test_phishing_kafka.py +++ b/tests/test_phishing_kafka.py @@ -83,7 +83,7 @@ def test_email_no_cpp(mock_triton_client: mock.MagicMock, mock_infer_result = mock.MagicMock() mock_infer_result.as_numpy.side_effect = inf_results - def async_infer(callback=None, **k): + def async_infer(callback=None, **_): callback(mock_infer_result, None) mock_triton_client.async_infer.side_effect = async_infer @@ -133,7 +133,7 @@ def async_infer(callback=None, **k): output_buf = StringIO() for rec in kafka_consumer: - output_buf.write("{}\n".format(rec.value.decode("utf-8"))) + output_buf.write(rec.value.decode("utf-8") + "\n") output_buf.seek(0) output_df = pandas.read_json(output_buf, lines=True) @@ -200,7 +200,7 @@ def test_email_cpp(dataset_pandas: DatasetManager, output_buf = StringIO() for rec in kafka_consumer: - output_buf.write("{}\n".format(rec.value.decode("utf-8"))) + output_buf.write(f"{rec.value.decode('utf-8')}\n") output_buf.seek(0) output_df = pandas.read_json(output_buf, lines=True) diff --git a/tests/test_pipe_viz.py b/tests/test_pipe_viz.py index 3ac5ec9fcc..69d3a2f745 100755 --- a/tests/test_pipe_viz.py +++ b/tests/test_pipe_viz.py @@ -34,6 +34,7 @@ from utils.stages.conv_msg import ConvMsg +# pylint: disable=redefined-outer-name @pytest.mark.use_cudf @pytest.fixture(name="viz_pipeline", scope="function") def viz_pipeline_fixture(config, filter_probs_df): diff --git a/tests/test_preallocation_pipe.py b/tests/test_preallocation_pipe.py index 207fb3117a..e0f9493f06 100755 --- a/tests/test_preallocation_pipe.py +++ b/tests/test_preallocation_pipe.py @@ -61,13 +61,13 @@ def accepted_types(self): def supports_cpp_node(self): return False - def _check_prealloc(self, m): - df = m.get_meta() + def _check_prealloc(self, message): + df = message.get_meta() for label in self._class_labels: assert label in df.columns assert df[label].dtype == self._expected_type - return m + return message def _build_single(self, builder: mrc.Builder, input_stream): stream = builder.make_node(self.unique_name, ops.map(self._check_prealloc)) @@ -89,7 +89,7 @@ def test_preallocation(config, filter_probs_df, probs_type): pipe.add_stage(DeserializeStage(config)) pipe.add_stage(ConvMsg(config, columns=list(filter_probs_df.columns), probs_type=probs_np_type)) pipe.add_stage(CheckPreAlloc(config, probs_type=probs_type)) - pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) + pipe.add_stage(SerializeStage(config, include=[f"^{c}$" for c in config.class_labels])) comp_stage = pipe.add_stage(CompareDataFrameStage(config, expected_df)) assert len(mem_src.get_needed_columns()) == 0 @@ -125,7 +125,7 @@ def test_preallocation_multi_segment_pipe(config, filter_probs_df, probs_type): (_, boundary_ingress) = pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(CheckPreAlloc(config, probs_type=probs_type)) pipe.add_segment_boundary(MultiResponseMessage) - pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) + pipe.add_stage(SerializeStage(config, include=[f"^{c}$" for c in config.class_labels])) pipe.add_segment_boundary(MessageMeta) comp_stage = pipe.add_stage(CompareDataFrameStage(config, expected_df)) @@ -134,7 +134,7 @@ def test_preallocation_multi_segment_pipe(config, filter_probs_df, probs_type): pipe.run() assert len(mem_src.get_needed_columns()) == 0 - boundary_ingress.get_needed_columns() == { + assert boundary_ingress.get_needed_columns() == { 'frogs': probs_type, 'lizards': probs_type, 'toads': probs_type, 'turtles': probs_type } @@ -153,7 +153,7 @@ def test_preallocation_error(config, filter_probs_df): pipe.add_stage(DeserializeStage(config)) pipe.add_stage(ConvMsg(config, columns=list(filter_probs_df.columns), probs_type='f4')) add_scores = pipe.add_stage(AddScoresStage(config)) - pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) + pipe.add_stage(SerializeStage(config, include=[f"^{c}$" for c in config.class_labels])) mem_sink = pipe.add_stage(InMemorySinkStage(config)) assert len(mem_src.get_needed_columns()) == 0 diff --git a/tests/test_sid_kafka.py b/tests/test_sid_kafka.py index 48edc8870d..fea98eb793 100755 --- a/tests/test_sid_kafka.py +++ b/tests/test_sid_kafka.py @@ -82,7 +82,7 @@ def test_minibert_no_cpp(mock_triton_client: mock.MagicMock, mock_infer_result = mock.MagicMock() mock_infer_result.as_numpy.side_effect = inf_results - def async_infer(callback=None, **k): + def async_infer(callback=None, **_): callback(mock_infer_result, None) mock_triton_client.async_infer.side_effect = async_infer @@ -132,7 +132,7 @@ def async_infer(callback=None, **k): output_buf = StringIO() for rec in kafka_consumer: - output_buf.write("{}\n".format(rec.value.decode("utf-8"))) + output_buf.write(f"{rec.value.decode('utf-8')}\n") output_buf.seek(0) output_df = pandas.read_json(output_buf, lines=True) @@ -202,7 +202,7 @@ def test_minibert_cpp(dataset_pandas: DatasetManager, output_buf = StringIO() for rec in kafka_consumer: - output_buf.write("{}\n".format(rec.value.decode("utf-8"))) + output_buf.write(f"{rec.value.decode('utf-8')}\n") output_buf.seek(0) output_df = pandas.read_json(output_buf, lines=True) diff --git a/tests/test_url_parser.py b/tests/test_url_parser.py index 871dba6c5a..a98fdc4b0d 100644 --- a/tests/test_url_parser.py +++ b/tests/test_url_parser.py @@ -18,27 +18,31 @@ from morpheus.parsers import url_parser -input_df = DataFrame({ - "url": [ - "http://www.google.com", - "gmail.com", - "github.com", - "https://pandas.pydata.org", - "http://www.worldbank.org.kg/", - "waiterrant.blogspot.com", - "http://forums.news.cnn.com.ac/", - "http://forums.news.cnn.ac/", - "ftp://b.cnn.com/", - "a.news.uk", - "a.news.co.uk", - "https://a.news.co.uk", - "107-193-100-2.lightspeed.cicril.sbcglobal.net", - "a23-44-13-2.deploy.static.akamaitechnologies.com", - ] -}) +# pylint: disable=redefined-outer-name + + +@pytest.fixture +def input_df(): + return DataFrame({ + "url": [ + "http://www.google.com", + "gmail.com", + "github.com", + "https://pandas.pydata.org", + "http://www.worldbank.org.kg/", + "waiterrant.blogspot.com", + "http://forums.news.cnn.com.ac/", + "http://forums.news.cnn.ac/", + "ftp://b.cnn.com/", + "a.news.uk", + "a.news.co.uk", + "https://a.news.co.uk", + "107-193-100-2.lightspeed.cicril.sbcglobal.net", + "a23-44-13-2.deploy.static.akamaitechnologies.com", + ] + }) -@pytest.mark.parametrize("input_df", [input_df]) def test_parse_1(input_df): expected_output_df = DataFrame({ "domain": [ @@ -79,7 +83,6 @@ def test_parse_1(input_df): assert expected_output_df.equals(output_df) -@pytest.mark.parametrize("input_df", [input_df]) def test_parse_2(input_df): expected_output_df = DataFrame({ "hostname": [ @@ -152,10 +155,9 @@ def test_parse_2(input_df): assert expected_output_df.equals(output_df) -@pytest.mark.parametrize("input_df", [input_df]) def test_parse_invalid_req_cols(input_df): - expected_error = ValueError("Given req_cols must be subset of %s" % - ('["hostname", "subdomain", "domain", "suffix"]')) + expected_error = ValueError( + "Given req_cols must be subset of [\"hostname\", \"subdomain\", \"domain\", \"suffix\"]") with pytest.raises(ValueError) as actual_error: url_parser.parse(input_df["url"], req_cols={"test"}) assert actual_error == expected_error diff --git a/tests/utils/inference_worker.py b/tests/utils/inference_worker.py index 4ce4274ea3..737b13a581 100644 --- a/tests/utils/inference_worker.py +++ b/tests/utils/inference_worker.py @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + +from morpheus.messages import MultiInferenceMessage +from morpheus.messages import TensorMemory from morpheus.stages.inference import inference_stage @@ -25,3 +29,6 @@ def calc_output_dims(self, _): # Intentionally calling the abc empty method for coverage super().calc_output_dims(_) return (1, 2) + + def process(self, _: MultiInferenceMessage, __: typing.Callable[[TensorMemory], None]): + raise NotImplementedError diff --git a/tests/utils/stages/conv_msg.py b/tests/utils/stages/conv_msg.py index 24c3e5aac1..8ceac4f3a3 100755 --- a/tests/utils/stages/conv_msg.py +++ b/tests/utils/stages/conv_msg.py @@ -72,7 +72,7 @@ def accepted_types(self) -> typing.Tuple: def supports_cpp_node(self) -> bool: return False - def _conv_message(self, m: MultiMessage) -> MultiResponseMessage: + def _conv_message(self, message: MultiMessage) -> MultiResponseMessage: if self._expected_data is not None: if (isinstance(self._expected_data, cudf.DataFrame)): df = self._expected_data.copy(deep=True) @@ -81,9 +81,9 @@ def _conv_message(self, m: MultiMessage) -> MultiResponseMessage: else: if self._columns is not None: - df = m.get_meta(self._columns) + df = message.get_meta(self._columns) else: - df = m.get_meta() + df = message.get_meta() if self._empty_probs: probs = cp.zeros([len(df), 3], 'float') @@ -91,7 +91,7 @@ def _conv_message(self, m: MultiMessage) -> MultiResponseMessage: probs = cp.array(df.values, dtype=self._probs_type, copy=True, order=self._order) memory = ResponseMemory(count=len(probs), tensors={'probs': probs}) - return MultiResponseMessage.from_message(m, memory=memory) + return MultiResponseMessage.from_message(message, memory=memory) def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: stream = builder.make_node(self.unique_name, ops.map(self._conv_message)) diff --git a/tests/utils/test_directories.py b/tests/utils/test_directories.py index ad2397c897..d2d832b48c 100644 --- a/tests/utils/test_directories.py +++ b/tests/utils/test_directories.py @@ -18,7 +18,7 @@ import morpheus -class TestDirectories(object): +class TestDirectories: def __init__(self, cur_file=__file__) -> None: self.tests_dir = os.path.dirname(os.path.dirname(cur_file))