Skip to content

Commit

Permalink
Refactor decoder tests (#641)
Browse files Browse the repository at this point in the history
* test: remove staging_test fixture

* test(decoder): regroup decoder generation tests

* fix(decoder): save checkpoint after export from local

* test: regroup decoder tests

* fix(tgi): temporarily pin ravif version

* ci: run decoder tests
  • Loading branch information
dacorvo authored Jun 26, 2024
1 parent 0916a11 commit 09cc51b
Show file tree
Hide file tree
Showing 13 changed files with 357 additions and 266 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/test_inf2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ jobs:
run: |
source aws_neuron_venv_pytorch/bin/activate
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/cli
- name: Run generation tests
- name: Run decoder tests
run: |
source aws_neuron_venv_pytorch/bin/activate
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/decoder
- name: Run other generation tests
run: |
source aws_neuron_venv_pytorch/bin/activate
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/generation
16 changes: 7 additions & 9 deletions optimum/neuron/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,21 +445,19 @@ def forward(self, *args, **kwargs):
def _save_pretrained(self, save_directory: Union[str, Path]):
dst_checkpoint_path, dst_compiled_path = self._get_neuron_dirs(save_directory)

def copy_dir_to_path(src_dir: Union[str, Path, TemporaryDirectory], dst_path: Union[str, Path]):
if isinstance(src_dir, TemporaryDirectory):
shutil.copytree(src_dir.name, dst_path, dirs_exist_ok=True)
elif not os.path.samefile(src_dir, dst_path):
os.symlink(dst_path, src_dir)

# Copy checkpoint directory (it always exists)
copy_dir_to_path(self.checkpoint_dir, dst_checkpoint_path)
neuron_config = getattr(self.config, "neuron")
checkpoint_id = neuron_config.get("checkpoint_id", None)
if checkpoint_id is None:
# Model was exported from a local path, so we need to save the checkpoint
shutil.copytree(self.checkpoint_dir, dst_checkpoint_path, dirs_exist_ok=True)
self.checkpoint_dir = dst_checkpoint_path

# Save or create compiled directory
if self.compiled_dir is None:
# The compilation artifacts have never been saved, do it now
self.model.save(dst_compiled_path)
else:
copy_dir_to_path(self.compiled_dir, dst_compiled_path)
shutil.copytree(self.compiled_dir, dst_compiled_path)
self.compiled_dir = dst_compiled_path
self.generation_config.save_pretrained(save_directory)

Expand Down
35 changes: 0 additions & 35 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
set_custom_cache_repo_name_in_hf_home,
set_neuron_cache_path,
)
from optimum.utils.testing_utils import TOKEN, USER

from .utils import OPTIMUM_INTERNAL_TESTING_CACHE_REPO, get_random_string

Expand Down Expand Up @@ -88,40 +87,6 @@ def inf_diffuser_model(request):
return request.param


@pytest.fixture(scope="module")
def staging_test():
custom_cache_repo_name = "optimum-neuron-cache-testing"
custom_cache_repo = f"{USER}/{custom_cache_repo_name}"
custom_private_cache_repo = f"{custom_cache_repo}-private"

orig_token = get_token()
orig_custom_cache_repo = load_custom_cache_repo_name_from_hf_home()

seed = get_random_string(5)
custom_cache_repo_with_seed = f"{custom_cache_repo}-{seed}"
custom_private_cache_repo_with_seed = f"{custom_private_cache_repo}-{seed}"

login(token=TOKEN)
# We do not set which cache repo to use because there are two, it is up to the test to define that.

create_repo(custom_cache_repo_with_seed, repo_type="model", exist_ok=True)
create_repo(custom_private_cache_repo_with_seed, repo_type="model", exist_ok=True, private=True)

yield

delete_repo(custom_cache_repo_with_seed, repo_type="model")
delete_repo(custom_private_cache_repo_with_seed, repo_type="model")

if orig_token is not None:
login(token=orig_token)
else:
logout()
if orig_custom_cache_repo is not None:
set_custom_cache_repo_name_in_hf_home(orig_custom_cache_repo, check_repo=False)
else:
delete_custom_cache_repo_name_from_hf_home()


def _hub_test(create_local_cache: bool = False):
orig_token = get_token()
orig_custom_cache_repo = load_custom_cache_repo_name_from_hf_home()
Expand Down
122 changes: 122 additions & 0 deletions tests/decoder/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import copy
import logging
import subprocess
import sys
from tempfile import TemporaryDirectory

import huggingface_hub
import pytest
from transformers import AutoTokenizer

from optimum.neuron import NeuronModelForCausalLM
from optimum.neuron.utils import synchronize_hub_cache
from optimum.neuron.version import __sdk_version__ as sdk_version
from optimum.neuron.version import __version__ as version


logging.basicConfig(
level=logging.INFO,
format="[%(asctime)s] %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] %(message)s",
stream=sys.stdout,
)
logger = logging.getLogger(__file__)

OPTIMUM_CACHE_REPO_ID = "optimum-internal-testing/neuron-testing-cache"

# All model configurations below will be added to the neuron_model_config fixture
DECODER_MODEL_CONFIGURATIONS = {
"gpt2": {
"model_id": "gpt2",
"export_kwargs": {"batch_size": 4, "sequence_length": 1024, "num_cores": 2, "auto_cast_type": "fp16"},
},
"llama": {
"model_id": "princeton-nlp/Sheared-LLaMA-1.3B",
"export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "fp16"},
},
"mistral": {
"model_id": "optimum/mistral-1.1b-testing",
"export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "bf16"},
},
}


def _get_hub_neuron_model_id(config_name: str):
return f"optimum-internal-testing/neuron-testing-{version}-{sdk_version}-{config_name}"


def _export_model(model_id, export_kwargs, neuron_model_path):
export_command = ["optimum-cli", "export", "neuron", "-m", model_id, "--task", "text-generation"]
for kwarg, value in export_kwargs.items():
export_command.append(f"--{kwarg}")
export_command.append(str(value))
export_command.append(neuron_model_path)
logger.info(f"Exporting {model_id} with {export_kwargs}")
try:
subprocess.run(export_command, check=True)
except subprocess.CalledProcessError as e:
logger.error(f"Failed to export model: {e}")
return


@pytest.fixture(scope="session", params=DECODER_MODEL_CONFIGURATIONS.keys())
def neuron_decoder_config(request):
"""Expose a pre-trained neuron decoder model
The fixture first makes sure the following model artifacts are present on the hub:
- exported neuron model under optimum-internal-testing/neuron-testing-<version>-<name>,
- cached artifacts under optimum-internal-testing/neuron-testing-cache.
If not, it will export the model and push it to the hub.
It then fetches the model locally and return a dictionary containing:
- a configuration name,
- the original model id,
- the export parameters,
- the neuron model id,
- the neuron model local path.
For each exposed model, the local directory is maintained for the duration of the
test session and cleaned up afterwards.
The hub model artifacts are never cleaned up and persist accross sessions.
They must be cleaned up manually when the optimum-neuron version changes.
"""
config_name = request.param
model_config = copy.deepcopy(DECODER_MODEL_CONFIGURATIONS[request.param])
model_id = model_config["model_id"]
export_kwargs = model_config["export_kwargs"]
neuron_model_id = _get_hub_neuron_model_id(config_name)
with TemporaryDirectory() as neuron_model_path:
hub = huggingface_hub.HfApi()
if hub.repo_exists(neuron_model_id):
logger.info(f"Fetching {neuron_model_id} from the HuggingFace hub")
hub.snapshot_download(neuron_model_id, local_dir=neuron_model_path)
else:
_export_model(model_id, export_kwargs, neuron_model_path)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.save_pretrained(neuron_model_path)
del tokenizer
# Create the test model on the hub
hub.create_repo(neuron_model_id, private=True)
hub.upload_folder(
folder_path=neuron_model_path,
repo_id=neuron_model_id,
ignore_patterns=[NeuronModelForCausalLM.CHECKPOINT_DIR + "/*"],
)
# Make sure it is cached
synchronize_hub_cache(cache_repo_id=OPTIMUM_CACHE_REPO_ID)
# Add dynamic parameters to the model configuration
model_config["neuron_model_path"] = neuron_model_path
model_config["neuron_model_id"] = neuron_model_id
# Also add model configuration name to allow tests to adapt their expectations
model_config["name"] = config_name
# Yield instead of returning to keep a reference to the temporary directory.
# It will go out of scope and be released only once all tests needing the fixture
# have been completed.
logger.info(f"{config_name} ready for testing ...")
yield model_config
logger.info(f"Done with {config_name}")


@pytest.fixture(scope="module")
def neuron_decoder_path(neuron_decoder_config):
yield neuron_decoder_config["neuron_model_path"]
87 changes: 87 additions & 0 deletions tests/decoder/test_decoder_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from tempfile import TemporaryDirectory

import pytest
from transformers import AutoModelForCausalLM

from optimum.neuron import NeuronModelForCausalLM
from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx


DECODER_MODEL_ARCHITECTURES = ["bloom", "gpt2", "llama", "mistral", "mixtral", "opt"]
DECODER_MODEL_NAMES = {
"bloom": "hf-internal-testing/tiny-random-BloomForCausalLM",
"gpt2": "hf-internal-testing/tiny-random-gpt2",
"llama": "dacorvo/tiny-random-llama",
"mistral": "dacorvo/tiny-random-MistralForCausalLM",
"mixtral": "dacorvo/Mixtral-tiny",
"opt": "hf-internal-testing/tiny-random-OPTForCausalLM",
}


@pytest.fixture(
scope="session", params=[DECODER_MODEL_NAMES[model_arch] for model_arch in DECODER_MODEL_ARCHITECTURES]
)
def export_decoder_id(request):
return request.param


def check_neuron_model(neuron_model, batch_size=None, sequence_length=None, num_cores=None, auto_cast_type=None):
neuron_config = getattr(neuron_model.config, "neuron", None)
assert neuron_config
if batch_size:
assert neuron_config["batch_size"] == batch_size
if sequence_length:
assert neuron_config["sequence_length"] == sequence_length
if num_cores:
assert neuron_config["num_cores"] == num_cores
if auto_cast_type:
assert neuron_config["auto_cast_type"] == auto_cast_type


@pytest.mark.parametrize(
"batch_size, sequence_length, num_cores, auto_cast_type",
[
[1, 100, 2, "fp32"],
[1, 100, 2, "fp16"],
[2, 100, 2, "fp16"],
],
)
@is_inferentia_test
@requires_neuronx
@pytest.mark.parametrize("local", [True, False], ids=["local", "from_hub"])
def test_decoder_export_save_reload(local, export_decoder_id, batch_size, sequence_length, num_cores, auto_cast_type):
export_kwargs = {
"batch_size": batch_size,
"sequence_length": sequence_length,
"num_cores": num_cores,
"auto_cast_type": auto_cast_type,
}
with TemporaryDirectory() as model_path:
if local:
with TemporaryDirectory() as tmpdir:
model = AutoModelForCausalLM.from_pretrained(export_decoder_id)
model.save_pretrained(tmpdir)
model = NeuronModelForCausalLM.from_pretrained(tmpdir, export=True, **export_kwargs)
model.save_pretrained(model_path)
else:
model = NeuronModelForCausalLM.from_pretrained(export_decoder_id, export=True, **export_kwargs)
model.save_pretrained(model_path)
check_neuron_model(model, **export_kwargs)
del model
model = NeuronModelForCausalLM.from_pretrained(model_path)
check_neuron_model(model, **export_kwargs)
Loading

0 comments on commit 09cc51b

Please sign in to comment.