Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/nncf/common/strip.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
@tracked_function(category=MODEL_BASED_CATEGORY, extractors=[FunctionCallTelemetryExtractor("nncf.strip")])
def strip(
model: TModel,
*,
do_copy: bool = True,
strip_format: StripFormat = StripFormat.NATIVE,
example_input: Optional[Any] = None,
Expand Down
2 changes: 1 addition & 1 deletion src/nncf/pruning/prune_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

def prune(
model: TModel,
*,
mode: PruneMode,
*,
ratio: Optional[float] = None,
ignored_scope: Optional[IgnoredScope] = None,
examples_inputs: Optional[Any] = None,
Expand Down
4 changes: 3 additions & 1 deletion src/nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def _update_advanced_quantization_parameters(
def quantize(
model: TModel,
calibration_dataset: Dataset,
*,
mode: Optional[QuantizationMode] = None,
preset: Optional[QuantizationPreset] = None,
target_device: TargetDevice = TargetDevice.ANY,
Expand Down Expand Up @@ -298,6 +299,7 @@ def quantize_with_accuracy_control(
calibration_dataset: Dataset,
validation_dataset: Dataset,
validation_fn: Callable[[Any, Iterable[Any]], tuple[float, Union[None, list[float], list[list[TTensor]]]]],
*,
max_drop: float = 0.01,
drop_type: DropType = DropType.ABSOLUTE,
preset: Optional[QuantizationPreset] = None,
Expand Down Expand Up @@ -423,14 +425,14 @@ def quantize_with_accuracy_control(
)
def compress_weights(
model: TModel,
*,
mode: CompressWeightsMode = CompressWeightsMode.INT8_ASYM,
ratio: Optional[float] = None,
group_size: Optional[int] = None,
ignored_scope: Optional[IgnoredScope] = None,
all_layers: Optional[bool] = None,
dataset: Optional[Dataset] = None,
sensitivity_metric: Optional[SensitivityMetric] = None,
*,
subset_size: int = 128,
awq: Optional[bool] = None,
scale_estimation: Optional[bool] = None,
Expand Down
12 changes: 6 additions & 6 deletions tests/onnx/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def calculate_numbers_of_quantized_weights(model: onnx.ModelProto) -> WeightType
)
def test_numbers_of_quantized_weights(mode, reference_counter):
model = create_model()
model = compress_weights(model, mode)
model = compress_weights(model, mode=mode)
counter = calculate_numbers_of_quantized_weights(model)
assert counter == reference_counter

Expand All @@ -133,7 +133,7 @@ def test_numbers_of_quantized_weights(mode, reference_counter):
def test_correct_dequantizelinear_int8(mode_weight_type):
mode, expected_weight_type = mode_weight_type
model = create_model()
model = compress_weights(model, mode)
model = compress_weights(model, mode=mode)

dq_cnt = 0
for node in model.graph.node:
Expand Down Expand Up @@ -164,7 +164,7 @@ def test_correct_dequantizelinear_int8(mode_weight_type):
def test_correct_dequantizelinear_uint8(mode_weight_type):
mode, expected_weight_type = mode_weight_type
model = create_model()
model = compress_weights(model, mode)
model = compress_weights(model, mode=mode)

dq_cnt = 0
for node in model.graph.node:
Expand Down Expand Up @@ -204,7 +204,7 @@ def test_correct_dequantizelinear_uint8(mode_weight_type):
def test_correct_dequantizelinear_int4(mode_weight_type, group_size):
mode, expected_weight_type = mode_weight_type
model = create_model()
model = compress_weights(model, mode, group_size=group_size, all_layers=True)
model = compress_weights(model, mode=mode, group_size=group_size, all_layers=True)

dq_cnt = 0
for node in model.graph.node:
Expand Down Expand Up @@ -240,7 +240,7 @@ def test_correct_dequantizelinear_int4(mode_weight_type, group_size):
def test_correct_dequantizelinear_uint4(mode_weight_type, group_size):
mode, expected_weight_type = mode_weight_type
model = create_model()
model = compress_weights(model, mode, group_size=group_size, all_layers=True)
model = compress_weights(model, mode=mode, group_size=group_size, all_layers=True)

dq_cnt = 0
for node in model.graph.node:
Expand Down Expand Up @@ -281,7 +281,7 @@ def test_correct_dequantizelinear_uint4(mode_weight_type, group_size):
)
def test_compression_with_inference(mode):
model = create_model()
model = compress_weights(model, mode)
model = compress_weights(model, mode=mode)
onnx.checker.check_model(model)
input_data = np.random.rand(100, 1280).astype(np.float32)
session = InferenceSession(model.SerializeToString())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ def test_shared_gather(mode):
"matmul_1_data": ov.Type.i4 if mode == CompressWeightsMode.INT4_SYM else ov.Type.u4,
}
model = GatherAndMatmulShareData().ov_model
compressed_model = compress_weights(model, mode, group_size=3)
compressed_model = compress_weights(model, mode=mode, group_size=3)
for op in compressed_model.get_ordered_ops():
op_name = op.get_friendly_name()
if op.get_type_name() == "Constant" and op_name in weight_name_vs_type:
Expand All @@ -626,7 +626,7 @@ def test_shared_gather_all_layers(all_layers):
"matmul_1_data": ov.Type.u4,
}
model = GatherAndMatmulShareData().ov_model
compressed_model = compress_weights(model, CompressWeightsMode.INT4_ASYM, group_size=-1, all_layers=all_layers)
compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_ASYM, group_size=-1, all_layers=all_layers)
for op in compressed_model.get_ordered_ops():
op_name = op.get_friendly_name()
if op.get_type_name() == "Constant" and op_name in weight_name_vs_type:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def get_input_node_data(node: ov.Node, input_id: int) -> Tensor:
if is_data_aware:
compression_kwargs["dataset"] = create_dataset(model)

nncf.compress_weights(model, config.mode, group_size=config.group_size, **compression_kwargs)
nncf.compress_weights(model, mode=config.mode, group_size=config.group_size, **compression_kwargs)

if cb == ComputationBackend.NumPy:
mock.assert_not_called()
Expand Down
4 changes: 2 additions & 2 deletions tests/torch/quantization/test_strip.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,9 @@ def test_nncf_strip_api(strip_type, do_copy):
quantized_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)

if strip_type == "nncf":
strip_model = nncf.strip(quantized_model, do_copy)
strip_model = nncf.strip(quantized_model, do_copy=do_copy)
elif strip_type == "torch":
strip_model = nncf.torch.strip(quantized_model, do_copy)
strip_model = nncf.torch.strip(quantized_model, do_copy=do_copy)
elif strip_type == "nncf_interfere":
strip_model = quantized_model.nncf.strip(do_copy)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,19 @@ def test_nncf_strip_api(strip_type: str, do_copy: bool):
quantized_model = nncf.quantize(model, nncf.Dataset([torch.ones(model.INPUT_SIZE)]), subset_size=1)

if strip_type == "nncf":
strip_model = nncf.strip(quantized_model, do_copy, nncf.StripFormat.NATIVE, example_input)
strip_model = nncf.strip(
quantized_model,
do_copy=do_copy,
strip_format=nncf.StripFormat.NATIVE,
example_input=example_input,
)
elif strip_type == "torch":
strip_model = nncf.torch.strip(quantized_model, do_copy, nncf.StripFormat.NATIVE, example_input)
strip_model = nncf.torch.strip(
quantized_model,
do_copy=do_copy,
strip_format=nncf.StripFormat.NATIVE,
example_input=example_input,
)

if do_copy:
assert id(strip_model) != id(quantized_model)
Expand Down