Refactor decoder tests (#641)

* test: remove staging_test fixture * test(decoder): regroup decoder generation tests * fix(decoder): save checkpoint after export from local * test: regroup decoder tests * fix(tgi): temporarily pin ravif version * ci: run decoder tests
huggingface · Jun 26, 2024 · 09cc51b · 09cc51b
1 parent 0916a11
commit 09cc51b
Show file tree

Hide file tree

Showing 13 changed files with 357 additions and 266 deletions.
diff --git a/.github/workflows/test_inf2.yml b/.github/workflows/test_inf2.yml
@@ -52,7 +52,11 @@ jobs:
         run: |
           source aws_neuron_venv_pytorch/bin/activate
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/cli
-      - name: Run generation tests
+      - name: Run decoder tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/decoder
+      - name: Run other generation tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/generation
diff --git a/optimum/neuron/modeling_decoder.py b/optimum/neuron/modeling_decoder.py
@@ -445,21 +445,19 @@ def forward(self, *args, **kwargs):
     def _save_pretrained(self, save_directory: Union[str, Path]):
         dst_checkpoint_path, dst_compiled_path = self._get_neuron_dirs(save_directory)
 
-        def copy_dir_to_path(src_dir: Union[str, Path, TemporaryDirectory], dst_path: Union[str, Path]):
-            if isinstance(src_dir, TemporaryDirectory):
-                shutil.copytree(src_dir.name, dst_path, dirs_exist_ok=True)
-            elif not os.path.samefile(src_dir, dst_path):
-                os.symlink(dst_path, src_dir)
-
-        # Copy checkpoint directory (it always exists)
-        copy_dir_to_path(self.checkpoint_dir, dst_checkpoint_path)
+        neuron_config = getattr(self.config, "neuron")
+        checkpoint_id = neuron_config.get("checkpoint_id", None)
+        if checkpoint_id is None:
+            # Model was exported from a local path, so we need to save the checkpoint
+            shutil.copytree(self.checkpoint_dir, dst_checkpoint_path, dirs_exist_ok=True)
         self.checkpoint_dir = dst_checkpoint_path
+
         # Save or create compiled directory
         if self.compiled_dir is None:
             # The compilation artifacts have never been saved, do it now
             self.model.save(dst_compiled_path)
         else:
-            copy_dir_to_path(self.compiled_dir, dst_compiled_path)
+            shutil.copytree(self.compiled_dir, dst_compiled_path)
         self.compiled_dir = dst_compiled_path
         self.generation_config.save_pretrained(save_directory)
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -25,7 +25,6 @@
     set_custom_cache_repo_name_in_hf_home,
     set_neuron_cache_path,
 )
-from optimum.utils.testing_utils import TOKEN, USER
 
 from .utils import OPTIMUM_INTERNAL_TESTING_CACHE_REPO, get_random_string
 
@@ -88,40 +87,6 @@ def inf_diffuser_model(request):
     return request.param
 
 
-@pytest.fixture(scope="module")
-def staging_test():
-    custom_cache_repo_name = "optimum-neuron-cache-testing"
-    custom_cache_repo = f"{USER}/{custom_cache_repo_name}"
-    custom_private_cache_repo = f"{custom_cache_repo}-private"
-
-    orig_token = get_token()
-    orig_custom_cache_repo = load_custom_cache_repo_name_from_hf_home()
-
-    seed = get_random_string(5)
-    custom_cache_repo_with_seed = f"{custom_cache_repo}-{seed}"
-    custom_private_cache_repo_with_seed = f"{custom_private_cache_repo}-{seed}"
-
-    login(token=TOKEN)
-    # We do not set which cache repo to use because there are two, it is up to the test to define that.
-
-    create_repo(custom_cache_repo_with_seed, repo_type="model", exist_ok=True)
-    create_repo(custom_private_cache_repo_with_seed, repo_type="model", exist_ok=True, private=True)
-
-    yield
-
-    delete_repo(custom_cache_repo_with_seed, repo_type="model")
-    delete_repo(custom_private_cache_repo_with_seed, repo_type="model")
-
-    if orig_token is not None:
-        login(token=orig_token)
-    else:
-        logout()
-    if orig_custom_cache_repo is not None:
-        set_custom_cache_repo_name_in_hf_home(orig_custom_cache_repo, check_repo=False)
-    else:
-        delete_custom_cache_repo_name_from_hf_home()
-
-
 def _hub_test(create_local_cache: bool = False):
     orig_token = get_token()
     orig_custom_cache_repo = load_custom_cache_repo_name_from_hf_home()

diff --git a/tests/decoder/conftest.py b/tests/decoder/conftest.py
@@ -0,0 +1,122 @@
+import copy
+import logging
+import subprocess
+import sys
+from tempfile import TemporaryDirectory
+
+import huggingface_hub
+import pytest
+from transformers import AutoTokenizer
+
+from optimum.neuron import NeuronModelForCausalLM
+from optimum.neuron.utils import synchronize_hub_cache
+from optimum.neuron.version import __sdk_version__ as sdk_version
+from optimum.neuron.version import __version__ as version
+
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="[%(asctime)s] %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] %(message)s",
+    stream=sys.stdout,
+)
+logger = logging.getLogger(__file__)
+
+OPTIMUM_CACHE_REPO_ID = "optimum-internal-testing/neuron-testing-cache"
+
+# All model configurations below will be added to the neuron_model_config fixture
+DECODER_MODEL_CONFIGURATIONS = {
+    "gpt2": {
+        "model_id": "gpt2",
+        "export_kwargs": {"batch_size": 4, "sequence_length": 1024, "num_cores": 2, "auto_cast_type": "fp16"},
+    },
+    "llama": {
+        "model_id": "princeton-nlp/Sheared-LLaMA-1.3B",
+        "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "fp16"},
+    },
+    "mistral": {
+        "model_id": "optimum/mistral-1.1b-testing",
+        "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "bf16"},
+    },
+}
+
+
+def _get_hub_neuron_model_id(config_name: str):
+    return f"optimum-internal-testing/neuron-testing-{version}-{sdk_version}-{config_name}"
+
+
+def _export_model(model_id, export_kwargs, neuron_model_path):
+    export_command = ["optimum-cli", "export", "neuron", "-m", model_id, "--task", "text-generation"]
+    for kwarg, value in export_kwargs.items():
+        export_command.append(f"--{kwarg}")
+        export_command.append(str(value))
+    export_command.append(neuron_model_path)
+    logger.info(f"Exporting {model_id} with {export_kwargs}")
+    try:
+        subprocess.run(export_command, check=True)
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Failed to export model: {e}")
+        return
+
+
+@pytest.fixture(scope="session", params=DECODER_MODEL_CONFIGURATIONS.keys())
+def neuron_decoder_config(request):
+    """Expose a pre-trained neuron decoder model
+
+    The fixture first makes sure the following model artifacts are present on the hub:
+    - exported neuron model under optimum-internal-testing/neuron-testing-<version>-<name>,
+    - cached artifacts under optimum-internal-testing/neuron-testing-cache.
+    If not, it will export the model and push it to the hub.
+
+    It then fetches the model locally and return a dictionary containing:
+    - a configuration name,
+    - the original model id,
+    - the export parameters,
+    - the neuron model id,
+    - the neuron model local path.
+
+    For each exposed model, the local directory is maintained for the duration of the
+    test session and cleaned up afterwards.
+    The hub model artifacts are never cleaned up and persist accross sessions.
+    They must be cleaned up manually when the optimum-neuron version changes.
+
+    """
+    config_name = request.param
+    model_config = copy.deepcopy(DECODER_MODEL_CONFIGURATIONS[request.param])
+    model_id = model_config["model_id"]
+    export_kwargs = model_config["export_kwargs"]
+    neuron_model_id = _get_hub_neuron_model_id(config_name)
+    with TemporaryDirectory() as neuron_model_path:
+        hub = huggingface_hub.HfApi()
+        if hub.repo_exists(neuron_model_id):
+            logger.info(f"Fetching {neuron_model_id} from the HuggingFace hub")
+            hub.snapshot_download(neuron_model_id, local_dir=neuron_model_path)
+        else:
+            _export_model(model_id, export_kwargs, neuron_model_path)
+            tokenizer = AutoTokenizer.from_pretrained(model_id)
+            tokenizer.save_pretrained(neuron_model_path)
+            del tokenizer
+            # Create the test model on the hub
+            hub.create_repo(neuron_model_id, private=True)
+            hub.upload_folder(
+                folder_path=neuron_model_path,
+                repo_id=neuron_model_id,
+                ignore_patterns=[NeuronModelForCausalLM.CHECKPOINT_DIR + "/*"],
+            )
+            # Make sure it is cached
+            synchronize_hub_cache(cache_repo_id=OPTIMUM_CACHE_REPO_ID)
+        # Add dynamic parameters to the model configuration
+        model_config["neuron_model_path"] = neuron_model_path
+        model_config["neuron_model_id"] = neuron_model_id
+        # Also add model configuration name to allow tests to adapt their expectations
+        model_config["name"] = config_name
+        # Yield instead of returning to keep a reference to the temporary directory.
+        # It will go out of scope and be released only once all tests needing the fixture
+        # have been completed.
+        logger.info(f"{config_name} ready for testing ...")
+        yield model_config
+        logger.info(f"Done with {config_name}")
+
+
+@pytest.fixture(scope="module")
+def neuron_decoder_path(neuron_decoder_config):
+    yield neuron_decoder_config["neuron_model_path"]
diff --git a/tests/decoder/test_decoder_export.py b/tests/decoder/test_decoder_export.py
@@ -0,0 +1,87 @@
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from tempfile import TemporaryDirectory
+
+import pytest
+from transformers import AutoModelForCausalLM
+
+from optimum.neuron import NeuronModelForCausalLM
+from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx
+
+
+DECODER_MODEL_ARCHITECTURES = ["bloom", "gpt2", "llama", "mistral", "mixtral", "opt"]
+DECODER_MODEL_NAMES = {
+    "bloom": "hf-internal-testing/tiny-random-BloomForCausalLM",
+    "gpt2": "hf-internal-testing/tiny-random-gpt2",
+    "llama": "dacorvo/tiny-random-llama",
+    "mistral": "dacorvo/tiny-random-MistralForCausalLM",
+    "mixtral": "dacorvo/Mixtral-tiny",
+    "opt": "hf-internal-testing/tiny-random-OPTForCausalLM",
+}
+
+
+@pytest.fixture(
+    scope="session", params=[DECODER_MODEL_NAMES[model_arch] for model_arch in DECODER_MODEL_ARCHITECTURES]
+)
+def export_decoder_id(request):
+    return request.param
+
+
+def check_neuron_model(neuron_model, batch_size=None, sequence_length=None, num_cores=None, auto_cast_type=None):
+    neuron_config = getattr(neuron_model.config, "neuron", None)
+    assert neuron_config
+    if batch_size:
+        assert neuron_config["batch_size"] == batch_size
+    if sequence_length:
+        assert neuron_config["sequence_length"] == sequence_length
+    if num_cores:
+        assert neuron_config["num_cores"] == num_cores
+    if auto_cast_type:
+        assert neuron_config["auto_cast_type"] == auto_cast_type
+
+
+@pytest.mark.parametrize(
+    "batch_size, sequence_length, num_cores, auto_cast_type",
+    [
+        [1, 100, 2, "fp32"],
+        [1, 100, 2, "fp16"],
+        [2, 100, 2, "fp16"],
+    ],
+)
+@is_inferentia_test
+@requires_neuronx
+@pytest.mark.parametrize("local", [True, False], ids=["local", "from_hub"])
+def test_decoder_export_save_reload(local, export_decoder_id, batch_size, sequence_length, num_cores, auto_cast_type):
+    export_kwargs = {
+        "batch_size": batch_size,
+        "sequence_length": sequence_length,
+        "num_cores": num_cores,
+        "auto_cast_type": auto_cast_type,
+    }
+    with TemporaryDirectory() as model_path:
+        if local:
+            with TemporaryDirectory() as tmpdir:
+                model = AutoModelForCausalLM.from_pretrained(export_decoder_id)
+                model.save_pretrained(tmpdir)
+                model = NeuronModelForCausalLM.from_pretrained(tmpdir, export=True, **export_kwargs)
+                model.save_pretrained(model_path)
+        else:
+            model = NeuronModelForCausalLM.from_pretrained(export_decoder_id, export=True, **export_kwargs)
+            model.save_pretrained(model_path)
+        check_neuron_model(model, **export_kwargs)
+        del model
+        model = NeuronModelForCausalLM.from_pretrained(model_path)
+        check_neuron_model(model, **export_kwargs)