Skip to content

Commit 0110d5d

Browse files
committed
fix + test
1 parent ab582ce commit 0110d5d

File tree

4 files changed

+79
-100
lines changed

4 files changed

+79
-100
lines changed

optimum/commands/export/neuron.py

+25
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,21 @@ def parse_args_neuron(parser: "ArgumentParser"):
4646
f" {str(list(TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS.keys()) + list(TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS.keys()))}."
4747
),
4848
)
49+
optional_group.add_argument(
50+
"--library-name",
51+
type=str,
52+
choices=["transformers", "sentence_transformers"],
53+
default=None,
54+
help=("The library on the model." " If not provided, will attempt to infer the local checkpoint's library."),
55+
)
56+
optional_group.add_argument(
57+
"--subfolder",
58+
type=str,
59+
default="",
60+
help=(
61+
"In case the relevant files are located inside a subfolder of the model repo either locally or on huggingface.co, specify the folder name here."
62+
),
63+
)
4964
optional_group.add_argument(
5065
"--atol",
5166
type=float,
@@ -58,6 +73,16 @@ def parse_args_neuron(parser: "ArgumentParser"):
5873
action="store_true",
5974
help="Allow to use custom code for the modeling hosted in the model repository. This option should only be set for repositories you trust and in which you have read the code, as it will execute on your local machine arbitrary code present in the model repository.",
6075
)
76+
optional_group.add_argument(
77+
"--compiler_workdir",
78+
type=Path,
79+
help="Path indicating the directory where to store intermediary files generated by Neuronx compiler.",
80+
)
81+
optional_group.add_argument(
82+
"--disable-weights-neff-inline",
83+
action="store_true",
84+
help="Whether to disable the weights / neff graph inline. You can only replace weights of neuron-compiled models when the weights-neff inlining has been disabled during the compilation.",
85+
)
6186
optional_group.add_argument(
6287
"--disable-validation",
6388
action="store_true",

optimum/exporters/neuron/__main__.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -150,16 +150,19 @@ def customize_optional_outputs(args: argparse.Namespace) -> Dict[str, bool]:
150150

151151
def parse_optlevel(args: argparse.Namespace) -> Dict[str, bool]:
152152
"""
153-
Parse the level of optimization the compiler should perform. If not specified apply `O2`(the best balance between model performance and compile time).
153+
(NEURONX ONLY) Parse the level of optimization the compiler should perform. If not specified apply `O2`(the best balance between model performance and compile time).
154154
"""
155-
if args.O1:
156-
optlevel = "1"
157-
elif args.O2:
158-
optlevel = "2"
159-
elif args.O3:
160-
optlevel = "3"
155+
if is_neuronx_available():
156+
if args.O1:
157+
optlevel = "1"
158+
elif args.O2:
159+
optlevel = "2"
160+
elif args.O3:
161+
optlevel = "3"
162+
else:
163+
optlevel = "2"
161164
else:
162-
optlevel = "2"
165+
optlevel = None
163166
return optlevel
164167

165168

optimum/exporters/neuron/convert.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,17 @@ def export(
405405
disable_fallback: bool = False,
406406
) -> Tuple[List[str], List[str]]:
407407
if is_neuron_available():
408-
return export_neuron(model, config, output, auto_cast, auto_cast_type, disable_fast_relayout, disable_fallback)
408+
return export_neuron(
409+
model=model,
410+
config=config,
411+
output=output,
412+
compiler_workdir=compiler_workdir,
413+
inline_weights_to_neff=inline_weights_to_neff,
414+
auto_cast=auto_cast,
415+
auto_cast_type=auto_cast_type,
416+
disable_fast_relayout=disable_fast_relayout,
417+
disable_fallback=disable_fallback,
418+
)
409419
elif is_neuronx_available():
410420
return export_neuronx(
411421
model=model,
@@ -570,6 +580,8 @@ def export_neuron(
570580
model: "PreTrainedModel",
571581
config: "NeuronDefaultConfig",
572582
output: Path,
583+
compiler_workdir: Optional[Path] = None,
584+
inline_weights_to_neff: bool = True,
573585
auto_cast: Optional[str] = None,
574586
auto_cast_type: str = "bf16",
575587
disable_fast_relayout: bool = False,
@@ -585,6 +597,10 @@ def export_neuron(
585597
The Neuron configuration associated with the exported model.
586598
output (`Path`):
587599
Directory to store the exported Neuron model.
600+
compiler_workdir (`Optional[Path]`, defaults to `None`):
601+
The directory used by neuronx-cc, where you can find intermediary outputs (neff, weight, hlo...).
602+
inline_weights_to_neff (`bool`, defaults to `True`):
603+
Whether to inline the weights to the neff graph. If set to False, weights will be seperated from the neff.
588604
auto_cast (`Optional[str]`, defaults to `None`):
589605
Whether to cast operations from FP32 to lower precision to speed up the inference. Can be `None`, `"matmul"` or `"all"`, you should use `None` to disable any auto-casting, use `"matmul"` to cast FP32 matrix multiplication operations, and use `"all"` to cast all FP32 operations.
590606
auto_cast_type (`str`, defaults to `"bf16"`):
@@ -599,6 +615,8 @@ def export_neuron(
599615
the Neuron configuration.
600616
"""
601617
output.parent.mkdir(parents=True, exist_ok=True)
618+
if isinstance(compiler_workdir, Path):
619+
compiler_workdir = compiler_workdir.as_posix()
602620

603621
if hasattr(model, "config"):
604622
model.config.return_dict = True
@@ -626,6 +644,8 @@ def export_neuron(
626644
dummy_inputs_tuple,
627645
dynamic_batch_size=config.dynamic_batch_size,
628646
compiler_args=compiler_args,
647+
compiler_workdir=compiler_workdir,
648+
separate_weights=not inline_weights_to_neff,
629649
fallback=not disable_fallback,
630650
)
631651
torch.jit.save(neuron_model, output)

tests/cli/test_export_cli.py

+22-91
Original file line numberDiff line numberDiff line change
@@ -13,102 +13,18 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
import os
16-
import random
1716
import subprocess
1817
import tempfile
1918
import unittest
20-
from itertools import product
21-
from typing import Dict, Optional
2219

2320
from optimum.exporters.neuron.model_configs import * # noqa: F403
24-
from optimum.exporters.tasks import TasksManager
25-
from optimum.neuron.utils import is_neuron_available, is_neuronx_available
2621
from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx
27-
from optimum.utils import DEFAULT_DUMMY_SHAPES, logging
22+
from optimum.utils import logging
2823

2924

3025
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
3126

3227

33-
_COMMOM_COMMANDS = {
34-
"--auto_cast": ["none", "matmul", "all"],
35-
"--auto_cast_type": ["bf16", "fp16"], # "tf32", "mixed"
36-
}
37-
_NEURON_COMMANDS = {}
38-
_NEURONX_COMMANDS = {}
39-
_DYNAMIC_COMMANDS = {"neuron": ["--disable-fast-relayout"], "neuronx": []}
40-
41-
42-
def _get_models_to_test(export_models_dict: Dict, random_pick: Optional[int] = 1):
43-
models_to_test = []
44-
for model_type, model_names_tasks in export_models_dict.items():
45-
model_type = model_type.replace("_", "-")
46-
task_config_mapping = TasksManager.get_supported_tasks_for_model_type(model_type, "neuron")
47-
48-
if isinstance(model_names_tasks, str): # test export of all tasks on the same model
49-
tasks = list(task_config_mapping.keys())
50-
model_tasks = {model_names_tasks: tasks}
51-
else:
52-
n_tested_tasks = sum(len(tasks) for tasks in model_names_tasks.values())
53-
if n_tested_tasks != len(task_config_mapping):
54-
logger.warning(f"Not all tasks are tested for {model_type}.")
55-
model_tasks = model_names_tasks # possibly, test different tasks on different models
56-
57-
for model_name, tasks in model_tasks.items():
58-
for task in tasks:
59-
default_shapes = dict(DEFAULT_DUMMY_SHAPES)
60-
TasksManager.get_exporter_config_constructor(
61-
model_type=model_type,
62-
exporter="neuron",
63-
task=task,
64-
model_name=model_name,
65-
exporter_config_kwargs={**default_shapes},
66-
)
67-
68-
models_to_test.append((f"{model_type}_{task}", model_name, task))
69-
70-
if random_pick is not None:
71-
return sorted(random.choices(models_to_test, k=random_pick))
72-
else:
73-
return sorted(models_to_test)
74-
75-
76-
def _get_commands_to_test(models_to_test):
77-
commands_to_test = []
78-
for test_name, model_name, task in models_to_test:
79-
if is_neuron_available():
80-
command_items = dict(_COMMOM_COMMANDS, **_NEURON_COMMANDS)
81-
dynamic_args = _DYNAMIC_COMMANDS["neuron"]
82-
elif is_neuronx_available():
83-
command_items = dict(_COMMOM_COMMANDS, **_NEURONX_COMMANDS)
84-
dynamic_args = _DYNAMIC_COMMANDS["neuronx"]
85-
else:
86-
continue
87-
88-
base_command = f"optimum-cli export neuron --model {model_name} --task {task}"
89-
90-
# mandatory shape arguments
91-
model = TasksManager.get_model_from_task(task, model_name, framework="pt")
92-
neuron_config_constructor = TasksManager.get_exporter_config_constructor(
93-
model=model, exporter="neuron", task=task
94-
)
95-
for axis in neuron_config_constructor.func.get_mandatory_axes_for_task(task):
96-
default_size = DEFAULT_DUMMY_SHAPES[axis]
97-
base_command += f" --{axis} {default_size}"
98-
99-
# compilation arguments
100-
for extra_arg_options in product(*command_items.values()):
101-
extra_command = " ".join(
102-
[" ".join([arg, option]) for arg, option in zip(command_items, extra_arg_options)]
103-
)
104-
extra_command += " " + " ".join(random.choices(dynamic_args, k=random.randint(0, len(dynamic_args))))
105-
command = base_command + " " + extra_command
106-
107-
commands_to_test.append((test_name + extra_command.strip(), command))
108-
109-
return sorted(commands_to_test)
110-
111-
11228
@is_inferentia_test
11329
class TestExportCLI(unittest.TestCase):
11430
def test_helps_no_raise(self):
@@ -121,12 +37,27 @@ def test_helps_no_raise(self):
12137
for command in commands:
12238
subprocess.run(command, shell=True, check=True)
12339

124-
# @parameterized.expand(_get_commands_to_test(_get_models_to_test(EXPORT_MODELS_TINY)), skip_on_empty=True)
125-
# def test_export_commands(self, test_name, command_content):
126-
# with tempfile.TemporaryDirectory() as tempdir:
127-
# command = command_content + f" {tempdir}"
128-
129-
# subprocess.run(command, shell=True, check=True)
40+
def test_export_commands(self):
41+
model_id = "hf-internal-testing/tiny-random-BertModel"
42+
with tempfile.TemporaryDirectory() as tempdir:
43+
subprocess.run(
44+
[
45+
"optimum-cli",
46+
"export",
47+
"neuron",
48+
"--model",
49+
model_id,
50+
"--sequence_length",
51+
"16",
52+
"--batch_size",
53+
"1",
54+
"--task",
55+
"text-classification",
56+
tempdir,
57+
],
58+
shell=False,
59+
check=True,
60+
)
13061

13162
@requires_neuronx
13263
def test_dynamic_batching(self):

0 commit comments

Comments
 (0)