From b68d7692329ebce66b77eaff212c996c03f65254 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Mon, 8 Jan 2024 09:44:10 -0800 Subject: [PATCH] Misc test improvements (#1447) * Update `morpheus/modules/payload_batcher.py` to avoid sending a single-element-list to `cudf.DataFrame.groupby`, this avoids a warning about an upcoming behavior change. * Ignore warnings emitted from merlin/nvt regarding tensorflow not being installed * Ignore warnings about `distutils` being deprecated while running tests. * Mark `tests/common/test_http_server.py` as a slow test (takes ~1m) * Update `tests/modules/test_payload_batcher.py` to expect/filter a warning ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Devin Robison (https://github.com/drobison00) - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1447 --- morpheus/modules/payload_batcher.py | 11 ++++++++-- morpheus/utils/column_info.py | 9 ++++++-- morpheus/utils/schema_transforms.py | 31 ++++++++++++++++----------- pyproject.toml | 7 +++++- tests/common/test_http_server.py | 3 ++- tests/modules/test_payload_batcher.py | 8 ++++++- tests/test_cli.py | 12 ++++++++++- 7 files changed, 60 insertions(+), 21 deletions(-) diff --git a/morpheus/modules/payload_batcher.py b/morpheus/modules/payload_batcher.py index 18e4a70506..ca62a252bd 100644 --- a/morpheus/modules/payload_batcher.py +++ b/morpheus/modules/payload_batcher.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -147,7 +147,14 @@ def _batch_dataframe_by_group(df: cudf.DataFrame) -> typing.List[cudf.DataFrame] # Period object conversion is not supported in cudf df[period_column] = df[period_column].to_pandas().dt.to_period(period).astype('str') - groups = df.groupby(group_by_columns) + if len(group_by_columns) == 1: + # Avoid warning from cudf regardning an upcoming change of behavior when applying a groupby to a single + # element list. + group_by_columns_ = group_by_columns[0] + else: + group_by_columns_ = group_by_columns + + groups = df.groupby(group_by_columns_) dfs = [] for _, group in groups: diff --git a/morpheus/utils/column_info.py b/morpheus/utils/column_info.py index 80f7e69694..783bbb88c6 100644 --- a/morpheus/utils/column_info.py +++ b/morpheus/utils/column_info.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,10 +17,15 @@ import logging import re import typing +import warnings from datetime import datetime from functools import partial -import nvtabular as nvt +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + import nvtabular as nvt + import pandas as pd import cudf diff --git a/morpheus/utils/schema_transforms.py b/morpheus/utils/schema_transforms.py index 8abbccf9c3..2fd93482cb 100644 --- a/morpheus/utils/schema_transforms.py +++ b/morpheus/utils/schema_transforms.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ import logging import os import typing +import warnings -import nvtabular as nvt import pandas as pd import cudf @@ -27,17 +27,22 @@ from morpheus.utils.nvt.extensions import morpheus_ext from morpheus.utils.nvt.schema_converters import create_and_attach_nvt_workflow -if os.environ.get("MORPHEUS_IN_SPHINX_BUILD") is None: - # Apply patches to NVT - # TODO(Devin): Can be removed, once numpy mappings are updated in Merlin - # ======================================================================== - patches.patch_numpy_dtype_registry() - # ======================================================================== - - # Add morpheus conversion mappings - # ======================================================================== - morpheus_ext.register_morpheus_extensions() - # ========================================================================= +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + import nvtabular as nvt + + if os.environ.get("MORPHEUS_IN_SPHINX_BUILD") is None: + # Apply patches to NVT + # TODO(Devin): Can be removed, once numpy mappings are updated in Merlin + # ======================================================================== + patches.patch_numpy_dtype_registry() + # ======================================================================== + + # Add morpheus conversion mappings + # ======================================================================== + morpheus_ext.register_morpheus_extensions() + # ========================================================================= logger = logging.getLogger(__name__) diff --git a/pyproject.toml b/pyproject.toml index e52c590aba..c33d4e0e88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,12 @@ filterwarnings = [ 'ignore:`np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe', 'ignore:Warning the df property returns a copy, please use the copy_dataframe method or the mutable_dataframe context manager to modify the DataFrame in-place instead.', 'ignore:`np.MachAr` is deprecated \(NumPy 1.22\):DeprecationWarning', - 'ignore:Please use `spmatrix` from the `scipy.sparse` namespace, the `scipy.sparse.base` namespace is deprecated:DeprecationWarning', + 'ignore:Please use `spmatrix` from the `scipy.sparse` namespace, the `scipy.sparse.base` namespace is deprecated:DeprecationWarning', + + # Deprecation warning from any project using distutils, currently known sources of this are: + # GPUtils https://github.com/anderskm/gputil/issues/48 + # PySpark https://issues.apache.org/jira/browse/SPARK-45390 + 'ignore:The distutils package is deprecated and slated for removal in Python 3.12. Use setuptools or check PEP 632 for potential alternatives', ] testpaths = ["tests"] diff --git a/tests/common/test_http_server.py b/tests/common/test_http_server.py index 322d64d687..26eeb8adbb 100644 --- a/tests/common/test_http_server.py +++ b/tests/common/test_http_server.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -35,6 +35,7 @@ def make_parse_fn(status: HTTPStatus = HTTPStatus.OK, return mock_parse_fn +@pytest.mark.slow @pytest.mark.parametrize("endpoint", ["/test", "test/", "/a/b/c/d"]) @pytest.mark.parametrize("port", [8088, 9090]) @pytest.mark.parametrize("method", ["GET", "POST", "PUT"]) diff --git a/tests/modules/test_payload_batcher.py b/tests/modules/test_payload_batcher.py index 25a405923e..47f43849d7 100644 --- a/tests/modules/test_payload_batcher.py +++ b/tests/modules/test_payload_batcher.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -135,8 +135,10 @@ def test_custom_params(config, expected_count, expected_exception): + expected_warning = False if timestamp_column_name: filter_probs_df["timestamp"] = TIMESTAMPS + expected_warning = timestamp_pattern is None pipe = Pipeline(config) @@ -182,6 +184,10 @@ def test_custom_params(config, if expected_exception: with pytest.raises(type(expected_exception), match=str(expected_exception)): pipe.run() + elif expected_warning: + with pytest.warns(UserWarning): + pipe.run() + assert len(sink_stage.get_messages()) == expected_count else: pipe.run() assert len(sink_stage.get_messages()) == expected_count diff --git a/tests/test_cli.py b/tests/test_cli.py index aef467bf84..f3d5ff10f1 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,6 +16,7 @@ import os import shutil +import warnings from unittest import mock import click @@ -129,6 +130,15 @@ def mlflow_uri(tmp_path): mlflow.end_run() +@pytest.fixture(scope="function", autouse=True) +def config_warning_fixture(): + # morpheus.cli.utils._apply_to_config method will warn about any keyword arguments that don't match a config option + # this isn't triggered in normal production code, but is triggered in the cli tests. + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="No config option matches for.*", category=UserWarning) + yield + + @pytest.mark.reload_modules(commands) @pytest.mark.usefixtures("chdir_tmpdir", "reload_modules") @pytest.mark.use_python