From f6345dff19d594759e1f94b6fba4ef75e50efa05 Mon Sep 17 00:00:00 2001 From: wei-chenglai Date: Fri, 8 Nov 2024 22:15:28 -0500 Subject: [PATCH] KEP-2170: Add unit and E2E tests for model and dataset initializers Signed-off-by: wei-chenglai --- .github/workflows/integration-tests.yaml | 3 +- .github/workflows/test-python.yaml | 4 +- pkg/initializer_v2/test/__init__.py | 0 pkg/initializer_v2/test/conftest.py | 52 +++++++ pkg/initializer_v2/test/e2e/__init__.py | 0 pkg/initializer_v2/test/e2e/test_dataset.py | 107 ++++++++++++++ pkg/initializer_v2/test/e2e/test_model.py | 113 +++++++++++++++ pkg/initializer_v2/test/unit/__init__.py | 0 .../test/unit/dataset/__init__.py | 0 .../test/unit/dataset/test_dataset.py | 86 +++++++++++ .../test/unit/dataset/test_dataset_config.py | 16 +++ .../unit/dataset/test_dataset_hugginface.py | 136 ++++++++++++++++++ .../test/unit/model/__init__.py | 0 .../test/unit/model/test_model.py | 86 +++++++++++ .../test/unit/model/test_model_config.py | 16 +++ .../test/unit/model/test_model_huggingface.py | 135 +++++++++++++++++ pkg/initializer_v2/test/unit/test_utils.py | 25 ++++ 17 files changed, 777 insertions(+), 2 deletions(-) create mode 100644 pkg/initializer_v2/test/__init__.py create mode 100644 pkg/initializer_v2/test/conftest.py create mode 100644 pkg/initializer_v2/test/e2e/__init__.py create mode 100644 pkg/initializer_v2/test/e2e/test_dataset.py create mode 100644 pkg/initializer_v2/test/e2e/test_model.py create mode 100644 pkg/initializer_v2/test/unit/__init__.py create mode 100644 pkg/initializer_v2/test/unit/dataset/__init__.py create mode 100644 pkg/initializer_v2/test/unit/dataset/test_dataset.py create mode 100644 pkg/initializer_v2/test/unit/dataset/test_dataset_config.py create mode 100644 pkg/initializer_v2/test/unit/dataset/test_dataset_hugginface.py create mode 100644 pkg/initializer_v2/test/unit/model/__init__.py create mode 100644 pkg/initializer_v2/test/unit/model/test_model.py create mode 100644 pkg/initializer_v2/test/unit/model/test_model_config.py create mode 100644 pkg/initializer_v2/test/unit/model/test_model_huggingface.py create mode 100644 pkg/initializer_v2/test/unit/test_utils.py diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml index ca2b543fc7..78a4a065dd 100644 --- a/.github/workflows/integration-tests.yaml +++ b/.github/workflows/integration-tests.yaml @@ -95,8 +95,9 @@ jobs: - name: Run tests run: | - pip install pytest + pip install pytest urllib3 huggingface_hub python3 -m pip install -e sdk/python; pytest -s sdk/python/test --log-cli-level=debug --namespace=default + pytest pkg/initializer_v2/test/e2e env: GANG_SCHEDULER_NAME: ${{ matrix.gang-scheduler-name }} diff --git a/.github/workflows/test-python.yaml b/.github/workflows/test-python.yaml index 9a706461b7..2999c76e01 100644 --- a/.github/workflows/test-python.yaml +++ b/.github/workflows/test-python.yaml @@ -32,4 +32,6 @@ jobs: pip install -U './sdk/python[huggingface]' - name: Run unit test for training sdk - run: pytest ./sdk/python/kubeflow/training/api/training_client_test.py + run: | + pytest ./sdk/python/kubeflow/training/api/training_client_test.py + pytest ./pkg/initializer_v2/test/unit diff --git a/pkg/initializer_v2/test/__init__.py b/pkg/initializer_v2/test/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkg/initializer_v2/test/conftest.py b/pkg/initializer_v2/test/conftest.py new file mode 100644 index 0000000000..c208a7f4a7 --- /dev/null +++ b/pkg/initializer_v2/test/conftest.py @@ -0,0 +1,52 @@ +import os +import sys + +import pytest + +# Add project root to path if needed +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))) + + +@pytest.fixture +def mock_env_vars(): + """Fixture to set and clean up environment variables""" + original_env = dict(os.environ) + + def _set_env_vars(**kwargs): + for key, value in kwargs.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = str(value) + return os.environ + + yield _set_env_vars + + # Cleanup + os.environ.clear() + os.environ.update(original_env) + + +@pytest.fixture +def huggingface_model_instance(): + """Fixture for HuggingFace Model instance""" + from pkg.initializer_v2.model.huggingface import HuggingFace + + return HuggingFace() + + +@pytest.fixture +def huggingface_dataset_instance(): + """Fixture for HuggingFace Dataset instance""" + from pkg.initializer_v2.dataset.huggingface import HuggingFace + + return HuggingFace() + + +@pytest.fixture +def real_hf_token(): + """Fixture to provide real HuggingFace token for E2E tests""" + token = os.getenv("HUGGINGFACE_TOKEN") + # if not token: + # pytest.skip("HUGGINGFACE_TOKEN environment variable not set") + return token diff --git a/pkg/initializer_v2/test/e2e/__init__.py b/pkg/initializer_v2/test/e2e/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkg/initializer_v2/test/e2e/test_dataset.py b/pkg/initializer_v2/test/e2e/test_dataset.py new file mode 100644 index 0000000000..5bd03826cf --- /dev/null +++ b/pkg/initializer_v2/test/e2e/test_dataset.py @@ -0,0 +1,107 @@ +import os +import runpy +import shutil +import tempfile + +import pytest + +import pkg.initializer_v2.utils.utils as utils +from sdk.python.kubeflow.storage_initializer.constants import VOLUME_PATH_DATASET + + +class TestDatasetE2E: + """E2E tests for dataset initialization""" + + @pytest.fixture(autouse=True) + def setup_teardown(self, monkeypatch): + """Setup and teardown for each test""" + # Create temporary directory for dataset downloads + current_dir = os.path.dirname(os.path.abspath(__file__)) + self.temp_dir = tempfile.mkdtemp(dir=current_dir) + os.environ[VOLUME_PATH_DATASET] = self.temp_dir + + # Store original environment + self.original_env = dict(os.environ) + + # Monkeypatch the constant in the module + import sdk.python.kubeflow.storage_initializer.constants as constants + + monkeypatch.setattr(constants, "VOLUME_PATH_DATASET", self.temp_dir) + + yield + + # Cleanup + shutil.rmtree(self.temp_dir, ignore_errors=True) + os.environ.clear() + os.environ.update(self.original_env) + + def verify_dataset_files(self, expected_files): + """Verify downloaded dataset files""" + if expected_files: + actual_files = set(os.listdir(self.temp_dir)) + missing_files = set(expected_files) - actual_files + assert not missing_files, f"Missing expected files: {missing_files}" + + @pytest.mark.parametrize( + "test_name, provider, test_case", + [ + # Public HuggingFace dataset test + ( + "HuggingFace - Public dataset", + "huggingface", + { + "storage_uri": "hf://karpathy/tiny_shakespeare", + "access_token": None, + "expected_files": ["tiny_shakespeare.py"], + "expected_error": None, + }, + ), + # Private HuggingFace dataset test + # ( + # "HuggingFace - Private dataset", + # "huggingface", + # { + # "storage_uri": "hf://username/private-dataset", + # "use_real_token": True, + # "expected_files": ["config.json", "dataset.safetensors"], + # "expected_error": None + # } + # ), + # Invalid HuggingFace dataset test + ( + "HuggingFace - Invalid dataset", + "huggingface", + { + "storage_uri": "hf://invalid/nonexistent-dataset", + "access_token": None, + "expected_files": None, + "expected_error": Exception, + }, + ), + ], + ) + def test_dataset_download(self, test_name, provider, test_case, real_hf_token): + """Test end-to-end dataset download for different providers""" + print(f"\nRunning E2E test for {provider}: {test_name}") + + # Setup environment variables based on test case + os.environ[utils.STORAGE_URI_ENV] = test_case["storage_uri"] + expected_files = test_case.get("expected_files") + + # Handle token/credentials + if test_case.get("use_real_token"): + os.environ["ACCESS_TOKEN"] = real_hf_token + elif test_case.get("access_token"): + os.environ["ACCESS_TOKEN"] = test_case["access_token"] + + # Run the main script + if test_case["expected_error"]: + with pytest.raises(test_case["expected_error"]): + runpy.run_module( + "pkg.initializer_v2.dataset.__main__", run_name="__main__" + ) + else: + runpy.run_module("pkg.initializer_v2.dataset.__main__", run_name="__main__") + self.verify_dataset_files(expected_files) + + print("Test execution completed") diff --git a/pkg/initializer_v2/test/e2e/test_model.py b/pkg/initializer_v2/test/e2e/test_model.py new file mode 100644 index 0000000000..bacae4ab49 --- /dev/null +++ b/pkg/initializer_v2/test/e2e/test_model.py @@ -0,0 +1,113 @@ +import os +import runpy +import shutil +import tempfile + +import pytest + +import pkg.initializer_v2.utils.utils as utils +from sdk.python.kubeflow.storage_initializer.constants import VOLUME_PATH_MODEL + + +class TestModelE2E: + """E2E tests for model initialization""" + + @pytest.fixture(autouse=True) + def setup_teardown(self, monkeypatch): + """Setup and teardown for each test""" + # Create temporary directory for model downloads + current_dir = os.path.dirname(os.path.abspath(__file__)) + self.temp_dir = tempfile.mkdtemp(dir=current_dir) + print(self.temp_dir) + os.environ[VOLUME_PATH_MODEL] = self.temp_dir + + # Store original environment + self.original_env = dict(os.environ) + + # Monkeypatch the constant in the module + import sdk.python.kubeflow.storage_initializer.constants as constants + + monkeypatch.setattr(constants, "VOLUME_PATH_MODEL", self.temp_dir) + + yield + + # Cleanup + shutil.rmtree(self.temp_dir, ignore_errors=True) + os.environ.clear() + os.environ.update(self.original_env) + + def verify_model_files(self, expected_files): + """Verify downloaded model files""" + if expected_files: + actual_files = set(os.listdir(self.temp_dir)) + missing_files = set(expected_files) - actual_files + assert not missing_files, f"Missing expected files: {missing_files}" + + @pytest.mark.parametrize( + "test_name, provider, test_case", + [ + # Public HuggingFace model test + ( + "HuggingFace - Public model", + "huggingface", + { + "storage_uri": "hf://hf-internal-testing/tiny-random-bert", + "access_token": None, + "expected_files": [ + "config.json", + "model.safetensors", + "tokenizer.json", + "tokenizer_config.json", + ], + "expected_error": None, + }, + ), + # Private HuggingFace model test + # ( + # "HuggingFace - Private model", + # "huggingface", + # { + # "storage_uri": "hf://username/private-model", + # "use_real_token": True, + # "expected_files": ["config.json", "model.safetensors"], + # "expected_error": None + # } + # ), + # Invalid HuggingFace model test + ( + "HuggingFace - Invalid model", + "huggingface", + { + "storage_uri": "hf://invalid/nonexistent-model", + "access_token": None, + "expected_files": None, + "expected_error": Exception, + }, + ), + ], + ) + def test_model_download(self, test_name, provider, test_case, real_hf_token): + """Test end-to-end model download for different providers""" + print(f"\nRunning E2E test for {provider}: {test_name}") + + # Setup environment variables based on test case + os.environ[utils.STORAGE_URI_ENV] = test_case["storage_uri"] + expected_files = test_case.get("expected_files") + + # Handle token/credentials + if test_case.get("use_real_token"): + os.environ["ACCESS_TOKEN"] = real_hf_token + elif test_case.get("access_token"): + os.environ["ACCESS_TOKEN"] = test_case["access_token"] + + # Run the main script + if test_case["expected_error"]: + with pytest.raises(test_case["expected_error"]): + runpy.run_module( + "pkg.initializer_v2.model.__main__", run_name="__main__" + ) + else: + runpy.run_module("pkg.initializer_v2.model.__main__", run_name="__main__") + self.verify_model_files(expected_files) + + print("Test execution completed") diff --git a/pkg/initializer_v2/test/unit/__init__.py b/pkg/initializer_v2/test/unit/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkg/initializer_v2/test/unit/dataset/__init__.py b/pkg/initializer_v2/test/unit/dataset/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkg/initializer_v2/test/unit/dataset/test_dataset.py b/pkg/initializer_v2/test/unit/dataset/test_dataset.py new file mode 100644 index 0000000000..9b06625239 --- /dev/null +++ b/pkg/initializer_v2/test/unit/dataset/test_dataset.py @@ -0,0 +1,86 @@ +import runpy +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.mark.parametrize( + "test_name, test_case", + [ + ( + "Successful download with HuggingFace provider", + { + "storage_uri": "hf://dataset/path", + "access_token": "test_token", + "mock_config_error": False, + "expected_error": None, + }, + ), + ( + "Missing storage URI environment variable", + { + "storage_uri": None, + "access_token": None, + "mock_config_error": False, + "expected_error": Exception, + }, + ), + ( + "Invalid storage URI scheme", + { + "storage_uri": "invalid://dataset/path", + "access_token": None, + "mock_config_error": False, + "expected_error": Exception, + }, + ), + ( + "Config loading failure", + { + "storage_uri": "hf://dataset/path", + "access_token": None, + "mock_config_error": True, + "expected_error": Exception, + }, + ), + ], +) +def test_dataset_main(test_name, test_case, mock_env_vars): + """Test main script with different scenarios""" + print(f"Running test: {test_name}") + + # Setup mock environment variables + env_vars = { + "STORAGE_URI": test_case["storage_uri"], + "ACCESS_TOKEN": test_case["access_token"], + } + mock_env_vars(**env_vars) + + # Setup mock HuggingFace instance + mock_hf_instance = MagicMock() + if test_case["mock_config_error"]: + mock_hf_instance.load_config.side_effect = Exception + + with patch( + "pkg.initializer_v2.dataset.huggingface.HuggingFace", + return_value=mock_hf_instance, + ) as mock_hf: + + # Execute test + if test_case["expected_error"]: + with pytest.raises(test_case["expected_error"]): + runpy.run_module( + "pkg.initializer_v2.dataset.__main__", run_name="__main__" + ) + else: + runpy.run_module("pkg.initializer_v2.dataset.__main__", run_name="__main__") + + # Verify HuggingFace instance methods were called + mock_hf_instance.load_config.assert_called_once() + mock_hf_instance.download_dataset.assert_called_once() + + # Verify HuggingFace class instantiation + if test_case["storage_uri"] and test_case["storage_uri"].startswith("hf://"): + mock_hf.assert_called_once() + + print("Test execution completed") diff --git a/pkg/initializer_v2/test/unit/dataset/test_dataset_config.py b/pkg/initializer_v2/test/unit/dataset/test_dataset_config.py new file mode 100644 index 0000000000..2e134ed816 --- /dev/null +++ b/pkg/initializer_v2/test/unit/dataset/test_dataset_config.py @@ -0,0 +1,16 @@ +from pkg.initializer_v2.dataset.config import HuggingFaceDatasetConfig + + +def test_huggingface_dataset_config_creation(): + """Test HuggingFaceModelInputConfig creation with different parameters""" + # Test with required parameters only + config = HuggingFaceDatasetConfig(storage_uri="hf://dataset/path") + assert config.storage_uri == "hf://dataset/path" + assert config.access_token is None + + # Test with all parameters + config = HuggingFaceDatasetConfig( + storage_uri="hf://dataset/path", access_token="dummy_token" + ) + assert config.storage_uri == "hf://dataset/path" + assert config.access_token == "dummy_token" diff --git a/pkg/initializer_v2/test/unit/dataset/test_dataset_hugginface.py b/pkg/initializer_v2/test/unit/dataset/test_dataset_hugginface.py new file mode 100644 index 0000000000..c9ecc0978f --- /dev/null +++ b/pkg/initializer_v2/test/unit/dataset/test_dataset_hugginface.py @@ -0,0 +1,136 @@ +from unittest.mock import MagicMock, patch + +import pytest + +import pkg.initializer_v2.utils.utils as utils +from sdk.python.kubeflow.storage_initializer.constants import VOLUME_PATH_DATASET + + +# Test cases for config loading +@pytest.mark.parametrize( + "test_name, test_config, expected", + [ + ( + "Full config with token", + {"storage_uri": "hf://dataset/path", "access_token": "test_token"}, + {"storage_uri": "hf://dataset/path", "access_token": "test_token"}, + ), + ( + "Minimal config without token", + {"storage_uri": "hf://dataset/path"}, + {"storage_uri": "hf://dataset/path", "access_token": None}, + ), + ], +) +def test_load_config(test_name, test_config, expected, huggingface_dataset_instance): + """Test config loading with different configurations""" + print(f"Running test: {test_name}") + + with patch.object(utils, "get_config_from_env", return_value=test_config): + huggingface_dataset_instance.load_config() + assert ( + huggingface_dataset_instance.config.storage_uri == expected["storage_uri"] + ) + assert ( + huggingface_dataset_instance.config.access_token == expected["access_token"] + ) + + print("Test execution completed") + + +@pytest.mark.parametrize( + "test_name, test_case", + [ + ( + "Successful download with token", + { + "config": { + "storage_uri": "hf://username/dataset-name", + "access_token": "test_token", + }, + "should_login": True, + "expected_repo_id": "username/dataset-name", + "mock_login_side_effect": None, + "mock_download_side_effect": None, + "expected_error": None, + }, + ), + ( + "Successful download without token", + { + "config": {"storage_uri": "hf://org/dataset-v1", "access_token": None}, + "should_login": False, + "expected_repo_id": "org/dataset-v1", + "mock_login_side_effect": None, + "mock_download_side_effect": None, + "expected_error": None, + }, + ), + ( + "Login failure", + { + "config": { + "storage_uri": "hf://username/dataset-name", + "access_token": "test_token", + }, + "should_login": True, + "expected_repo_id": "username/dataset-name", + "mock_login_side_effect": Exception, + "mock_download_side_effect": None, + "expected_error": Exception, + }, + ), + ( + "Download failure", + { + "config": { + "storage_uri": "hf://invalid/repo/name", + "access_token": None, + }, + "should_login": False, + "expected_repo_id": "invalid/repo/name", + "mock_login_side_effect": None, + "mock_download_side_effect": Exception, + "expected_error": Exception, + }, + ), + ], +) +def test_download_dataset(test_name, test_case, huggingface_dataset_instance): + """Test dataset download with different configurations""" + + print(f"Running test: {test_name}") + + huggingface_dataset_instance.config = MagicMock(**test_case["config"]) + + with patch("huggingface_hub.login") as mock_login, patch( + "huggingface_hub.snapshot_download" + ) as mock_download: + + # Configure mock behavior + if test_case["mock_login_side_effect"]: + mock_login.side_effect = test_case["mock_login_side_effect"] + if test_case["mock_download_side_effect"]: + mock_download.side_effect = test_case["mock_download_side_effect"] + + # Execute test + if test_case["expected_error"]: + with pytest.raises(test_case["expected_error"]): + huggingface_dataset_instance.download_dataset() + else: + huggingface_dataset_instance.download_dataset() + + # Verify login behavior + if test_case["should_login"]: + mock_login.assert_called_once_with(test_case["config"]["access_token"]) + else: + mock_login.assert_not_called() + + # Verify download parameters + if test_case["expected_repo_id"]: + mock_download.assert_called_once_with( + repo_id=test_case["expected_repo_id"], + local_dir=VOLUME_PATH_DATASET, + repo_type="dataset", + ) + print("Test execution completed") diff --git a/pkg/initializer_v2/test/unit/model/__init__.py b/pkg/initializer_v2/test/unit/model/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkg/initializer_v2/test/unit/model/test_model.py b/pkg/initializer_v2/test/unit/model/test_model.py new file mode 100644 index 0000000000..4d0d17dd98 --- /dev/null +++ b/pkg/initializer_v2/test/unit/model/test_model.py @@ -0,0 +1,86 @@ +import runpy +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.mark.parametrize( + "test_name, test_case", + [ + ( + "Successful download with HuggingFace provider", + { + "storage_uri": "hf://model/path", + "access_token": "test_token", + "mock_config_error": False, + "expected_error": None, + }, + ), + ( + "Missing storage URI environment variable", + { + "storage_uri": None, + "access_token": None, + "mock_config_error": False, + "expected_error": Exception, + }, + ), + ( + "Invalid storage URI scheme", + { + "storage_uri": "invalid://model/path", + "access_token": None, + "mock_config_error": False, + "expected_error": Exception, + }, + ), + ( + "Config loading failure", + { + "storage_uri": "hf://model/path", + "access_token": None, + "mock_config_error": True, + "expected_error": Exception, + }, + ), + ], +) +def test_model_main(test_name, test_case, mock_env_vars): + """Test main script with different scenarios""" + print(f"Running test: {test_name}") + + # Setup mock environment variables + env_vars = { + "STORAGE_URI": test_case["storage_uri"], + "ACCESS_TOKEN": test_case["access_token"], + } + mock_env_vars(**env_vars) + + # Setup mock HuggingFace instance + mock_hf_instance = MagicMock() + if test_case["mock_config_error"]: + mock_hf_instance.load_config.side_effect = Exception + + with patch( + "pkg.initializer_v2.model.huggingface.HuggingFace", + return_value=mock_hf_instance, + ) as mock_hf: + + # Execute test + if test_case["expected_error"]: + with pytest.raises(test_case["expected_error"]): + runpy.run_module( + "pkg.initializer_v2.model.__main__", run_name="__main__" + ) + else: + runpy.run_module("pkg.initializer_v2.model.__main__", run_name="__main__") + + # Verify HuggingFace instance methods were called + mock_hf_instance.load_config.assert_called_once() + mock_hf_instance.download_model.assert_called_once() + + # Verify HuggingFace class instantiation + if test_case["storage_uri"] and test_case["storage_uri"].startswith("hf://"): + mock_hf.assert_called_once() + + print("Test execution completed") diff --git a/pkg/initializer_v2/test/unit/model/test_model_config.py b/pkg/initializer_v2/test/unit/model/test_model_config.py new file mode 100644 index 0000000000..55de5fa2dc --- /dev/null +++ b/pkg/initializer_v2/test/unit/model/test_model_config.py @@ -0,0 +1,16 @@ +from pkg.initializer_v2.model.config import HuggingFaceModelInputConfig + + +def test_huggingface_model_config_creation(): + """Test HuggingFaceModelInputConfig creation with different parameters""" + # Test with required parameters only + config = HuggingFaceModelInputConfig(storage_uri="hf://model/path") + assert config.storage_uri == "hf://model/path" + assert config.access_token is None + + # Test with all parameters + config = HuggingFaceModelInputConfig( + storage_uri="hf://model/path", access_token="dummy_token" + ) + assert config.storage_uri == "hf://model/path" + assert config.access_token == "dummy_token" diff --git a/pkg/initializer_v2/test/unit/model/test_model_huggingface.py b/pkg/initializer_v2/test/unit/model/test_model_huggingface.py new file mode 100644 index 0000000000..70666718e2 --- /dev/null +++ b/pkg/initializer_v2/test/unit/model/test_model_huggingface.py @@ -0,0 +1,135 @@ +from unittest.mock import MagicMock, patch + +import pytest + +import pkg.initializer_v2.utils.utils as utils +from sdk.python.kubeflow.storage_initializer.constants import VOLUME_PATH_MODEL + + +# Test cases for config loading +@pytest.mark.parametrize( + "test_name, test_config, expected", + [ + ( + "Full config with token", + {"storage_uri": "hf://model/path", "access_token": "test_token"}, + {"storage_uri": "hf://model/path", "access_token": "test_token"}, + ), + ( + "Minimal config without token", + {"storage_uri": "hf://model/path"}, + {"storage_uri": "hf://model/path", "access_token": None}, + ), + ], +) +def test_load_config(test_name, test_config, expected, huggingface_model_instance): + """Test config loading with different configurations""" + print(f"Running test: {test_name}") + + with patch.object(utils, "get_config_from_env", return_value=test_config): + huggingface_model_instance.load_config() + assert huggingface_model_instance.config.storage_uri == expected["storage_uri"] + assert ( + huggingface_model_instance.config.access_token == expected["access_token"] + ) + + print("Test execution completed") + + +@pytest.mark.parametrize( + "test_name, test_case", + [ + ( + "Successful download with token", + { + "config": { + "storage_uri": "hf://username/model-name", + "access_token": "test_token", + }, + "should_login": True, + "expected_repo_id": "username/model-name", + "mock_login_side_effect": None, + "mock_download_side_effect": None, + "expected_error": None, + }, + ), + ( + "Successful download without token", + { + "config": {"storage_uri": "hf://org/model-v1", "access_token": None}, + "should_login": False, + "expected_repo_id": "org/model-v1", + "mock_login_side_effect": None, + "mock_download_side_effect": None, + "expected_error": None, + }, + ), + ( + "Login failure", + { + "config": { + "storage_uri": "hf://username/model-name", + "access_token": "test_token", + }, + "should_login": True, + "expected_repo_id": "username/model-name", + "mock_login_side_effect": Exception, + "mock_download_side_effect": None, + "expected_error": Exception, + }, + ), + ( + "Download failure", + { + "config": { + "storage_uri": "hf://invalid/repo/name", + "access_token": None, + }, + "should_login": False, + "expected_repo_id": "invalid/repo/name", + "mock_login_side_effect": None, + "mock_download_side_effect": Exception, + "expected_error": Exception, + }, + ), + ], +) +def test_download_model(test_name, test_case, huggingface_model_instance): + """Test model download with different configurations""" + + print(f"Running test: {test_name}") + + huggingface_model_instance.config = MagicMock(**test_case["config"]) + + with patch("huggingface_hub.login") as mock_login, patch( + "huggingface_hub.snapshot_download" + ) as mock_download: + + # Configure mock behavior + if test_case["mock_login_side_effect"]: + mock_login.side_effect = test_case["mock_login_side_effect"] + if test_case["mock_download_side_effect"]: + mock_download.side_effect = test_case["mock_download_side_effect"] + + # Execute test + if test_case["expected_error"]: + with pytest.raises(test_case["expected_error"]): + huggingface_model_instance.download_model() + else: + huggingface_model_instance.download_model() + + # Verify login behavior + if test_case["should_login"]: + mock_login.assert_called_once_with(test_case["config"]["access_token"]) + else: + mock_login.assert_not_called() + + # Verify download parameters + if test_case["expected_repo_id"]: + mock_download.assert_called_once_with( + repo_id=test_case["expected_repo_id"], + local_dir=VOLUME_PATH_MODEL, + allow_patterns=["*.json", "*.safetensors", "*.model"], + ignore_patterns=["*.msgpack", "*.h5", "*.bin", ".pt", ".pth"], + ) + print("Test execution completed") diff --git a/pkg/initializer_v2/test/unit/test_utils.py b/pkg/initializer_v2/test/unit/test_utils.py new file mode 100644 index 0000000000..866ae9a446 --- /dev/null +++ b/pkg/initializer_v2/test/unit/test_utils.py @@ -0,0 +1,25 @@ +import pytest + +from pkg.initializer_v2.model.config import HuggingFaceModelInputConfig +from pkg.initializer_v2.utils.utils import get_config_from_env + + +@pytest.mark.parametrize( + "config_class,env_vars,expected", + [ + ( + HuggingFaceModelInputConfig, + {"STORAGE_URI": "hf://test", "ACCESS_TOKEN": "token"}, + {"storage_uri": "hf://test", "access_token": "token"}, + ), + ( + HuggingFaceModelInputConfig, + {"STORAGE_URI": "hf://test"}, + {"storage_uri": "hf://test", "access_token": None}, + ), + ], +) +def test_get_config_from_env(mock_env_vars, config_class, env_vars, expected): + mock_env_vars(**env_vars) + result = get_config_from_env(config_class) + assert result == expected