-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* test: remove staging_test fixture * test(decoder): regroup decoder generation tests * fix(decoder): save checkpoint after export from local * test: regroup decoder tests * fix(tgi): temporarily pin ravif version * ci: run decoder tests
- Loading branch information
Showing
13 changed files
with
357 additions
and
266 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import copy | ||
import logging | ||
import subprocess | ||
import sys | ||
from tempfile import TemporaryDirectory | ||
|
||
import huggingface_hub | ||
import pytest | ||
from transformers import AutoTokenizer | ||
|
||
from optimum.neuron import NeuronModelForCausalLM | ||
from optimum.neuron.utils import synchronize_hub_cache | ||
from optimum.neuron.version import __sdk_version__ as sdk_version | ||
from optimum.neuron.version import __version__ as version | ||
|
||
|
||
logging.basicConfig( | ||
level=logging.INFO, | ||
format="[%(asctime)s] %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] %(message)s", | ||
stream=sys.stdout, | ||
) | ||
logger = logging.getLogger(__file__) | ||
|
||
OPTIMUM_CACHE_REPO_ID = "optimum-internal-testing/neuron-testing-cache" | ||
|
||
# All model configurations below will be added to the neuron_model_config fixture | ||
DECODER_MODEL_CONFIGURATIONS = { | ||
"gpt2": { | ||
"model_id": "gpt2", | ||
"export_kwargs": {"batch_size": 4, "sequence_length": 1024, "num_cores": 2, "auto_cast_type": "fp16"}, | ||
}, | ||
"llama": { | ||
"model_id": "princeton-nlp/Sheared-LLaMA-1.3B", | ||
"export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "fp16"}, | ||
}, | ||
"mistral": { | ||
"model_id": "optimum/mistral-1.1b-testing", | ||
"export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "bf16"}, | ||
}, | ||
} | ||
|
||
|
||
def _get_hub_neuron_model_id(config_name: str): | ||
return f"optimum-internal-testing/neuron-testing-{version}-{sdk_version}-{config_name}" | ||
|
||
|
||
def _export_model(model_id, export_kwargs, neuron_model_path): | ||
export_command = ["optimum-cli", "export", "neuron", "-m", model_id, "--task", "text-generation"] | ||
for kwarg, value in export_kwargs.items(): | ||
export_command.append(f"--{kwarg}") | ||
export_command.append(str(value)) | ||
export_command.append(neuron_model_path) | ||
logger.info(f"Exporting {model_id} with {export_kwargs}") | ||
try: | ||
subprocess.run(export_command, check=True) | ||
except subprocess.CalledProcessError as e: | ||
logger.error(f"Failed to export model: {e}") | ||
return | ||
|
||
|
||
@pytest.fixture(scope="session", params=DECODER_MODEL_CONFIGURATIONS.keys()) | ||
def neuron_decoder_config(request): | ||
"""Expose a pre-trained neuron decoder model | ||
The fixture first makes sure the following model artifacts are present on the hub: | ||
- exported neuron model under optimum-internal-testing/neuron-testing-<version>-<name>, | ||
- cached artifacts under optimum-internal-testing/neuron-testing-cache. | ||
If not, it will export the model and push it to the hub. | ||
It then fetches the model locally and return a dictionary containing: | ||
- a configuration name, | ||
- the original model id, | ||
- the export parameters, | ||
- the neuron model id, | ||
- the neuron model local path. | ||
For each exposed model, the local directory is maintained for the duration of the | ||
test session and cleaned up afterwards. | ||
The hub model artifacts are never cleaned up and persist accross sessions. | ||
They must be cleaned up manually when the optimum-neuron version changes. | ||
""" | ||
config_name = request.param | ||
model_config = copy.deepcopy(DECODER_MODEL_CONFIGURATIONS[request.param]) | ||
model_id = model_config["model_id"] | ||
export_kwargs = model_config["export_kwargs"] | ||
neuron_model_id = _get_hub_neuron_model_id(config_name) | ||
with TemporaryDirectory() as neuron_model_path: | ||
hub = huggingface_hub.HfApi() | ||
if hub.repo_exists(neuron_model_id): | ||
logger.info(f"Fetching {neuron_model_id} from the HuggingFace hub") | ||
hub.snapshot_download(neuron_model_id, local_dir=neuron_model_path) | ||
else: | ||
_export_model(model_id, export_kwargs, neuron_model_path) | ||
tokenizer = AutoTokenizer.from_pretrained(model_id) | ||
tokenizer.save_pretrained(neuron_model_path) | ||
del tokenizer | ||
# Create the test model on the hub | ||
hub.create_repo(neuron_model_id, private=True) | ||
hub.upload_folder( | ||
folder_path=neuron_model_path, | ||
repo_id=neuron_model_id, | ||
ignore_patterns=[NeuronModelForCausalLM.CHECKPOINT_DIR + "/*"], | ||
) | ||
# Make sure it is cached | ||
synchronize_hub_cache(cache_repo_id=OPTIMUM_CACHE_REPO_ID) | ||
# Add dynamic parameters to the model configuration | ||
model_config["neuron_model_path"] = neuron_model_path | ||
model_config["neuron_model_id"] = neuron_model_id | ||
# Also add model configuration name to allow tests to adapt their expectations | ||
model_config["name"] = config_name | ||
# Yield instead of returning to keep a reference to the temporary directory. | ||
# It will go out of scope and be released only once all tests needing the fixture | ||
# have been completed. | ||
logger.info(f"{config_name} ready for testing ...") | ||
yield model_config | ||
logger.info(f"Done with {config_name}") | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def neuron_decoder_path(neuron_decoder_config): | ||
yield neuron_decoder_config["neuron_model_path"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# coding=utf-8 | ||
# Copyright 2022 The HuggingFace Team. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from tempfile import TemporaryDirectory | ||
|
||
import pytest | ||
from transformers import AutoModelForCausalLM | ||
|
||
from optimum.neuron import NeuronModelForCausalLM | ||
from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx | ||
|
||
|
||
DECODER_MODEL_ARCHITECTURES = ["bloom", "gpt2", "llama", "mistral", "mixtral", "opt"] | ||
DECODER_MODEL_NAMES = { | ||
"bloom": "hf-internal-testing/tiny-random-BloomForCausalLM", | ||
"gpt2": "hf-internal-testing/tiny-random-gpt2", | ||
"llama": "dacorvo/tiny-random-llama", | ||
"mistral": "dacorvo/tiny-random-MistralForCausalLM", | ||
"mixtral": "dacorvo/Mixtral-tiny", | ||
"opt": "hf-internal-testing/tiny-random-OPTForCausalLM", | ||
} | ||
|
||
|
||
@pytest.fixture( | ||
scope="session", params=[DECODER_MODEL_NAMES[model_arch] for model_arch in DECODER_MODEL_ARCHITECTURES] | ||
) | ||
def export_decoder_id(request): | ||
return request.param | ||
|
||
|
||
def check_neuron_model(neuron_model, batch_size=None, sequence_length=None, num_cores=None, auto_cast_type=None): | ||
neuron_config = getattr(neuron_model.config, "neuron", None) | ||
assert neuron_config | ||
if batch_size: | ||
assert neuron_config["batch_size"] == batch_size | ||
if sequence_length: | ||
assert neuron_config["sequence_length"] == sequence_length | ||
if num_cores: | ||
assert neuron_config["num_cores"] == num_cores | ||
if auto_cast_type: | ||
assert neuron_config["auto_cast_type"] == auto_cast_type | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"batch_size, sequence_length, num_cores, auto_cast_type", | ||
[ | ||
[1, 100, 2, "fp32"], | ||
[1, 100, 2, "fp16"], | ||
[2, 100, 2, "fp16"], | ||
], | ||
) | ||
@is_inferentia_test | ||
@requires_neuronx | ||
@pytest.mark.parametrize("local", [True, False], ids=["local", "from_hub"]) | ||
def test_decoder_export_save_reload(local, export_decoder_id, batch_size, sequence_length, num_cores, auto_cast_type): | ||
export_kwargs = { | ||
"batch_size": batch_size, | ||
"sequence_length": sequence_length, | ||
"num_cores": num_cores, | ||
"auto_cast_type": auto_cast_type, | ||
} | ||
with TemporaryDirectory() as model_path: | ||
if local: | ||
with TemporaryDirectory() as tmpdir: | ||
model = AutoModelForCausalLM.from_pretrained(export_decoder_id) | ||
model.save_pretrained(tmpdir) | ||
model = NeuronModelForCausalLM.from_pretrained(tmpdir, export=True, **export_kwargs) | ||
model.save_pretrained(model_path) | ||
else: | ||
model = NeuronModelForCausalLM.from_pretrained(export_decoder_id, export=True, **export_kwargs) | ||
model.save_pretrained(model_path) | ||
check_neuron_model(model, **export_kwargs) | ||
del model | ||
model = NeuronModelForCausalLM.from_pretrained(model_path) | ||
check_neuron_model(model, **export_kwargs) |
Oops, something went wrong.