openvinotoolkit · AlexanderDokuchaev · Nov 25, 2025 · Nov 17, 2025 · Nov 20, 2025 · Nov 20, 2025
@@ -28,6 +28,7 @@
 @tracked_function(category=MODEL_BASED_CATEGORY, extractors=[FunctionCallTelemetryExtractor("nncf.strip")])
 def strip(
     model: TModel,
+    *,
     do_copy: bool = True,
     strip_format: StripFormat = StripFormat.NATIVE,
     example_input: Optional[Any] = None,

@@ -23,8 +23,8 @@
 
 def prune(
     model: TModel,
-    *,
     mode: PruneMode,
+    *,
     ratio: Optional[float] = None,
     ignored_scope: Optional[IgnoredScope] = None,
     examples_inputs: Optional[Any] = None,

@@ -129,6 +129,7 @@ def _update_advanced_quantization_parameters(
 def quantize(
     model: TModel,
     calibration_dataset: Dataset,
+    *,
     mode: Optional[QuantizationMode] = None,
     preset: Optional[QuantizationPreset] = None,
     target_device: TargetDevice = TargetDevice.ANY,
@@ -298,6 +299,7 @@ def quantize_with_accuracy_control(
     calibration_dataset: Dataset,
     validation_dataset: Dataset,
     validation_fn: Callable[[Any, Iterable[Any]], tuple[float, Union[None, list[float], list[list[TTensor]]]]],
+    *,
     max_drop: float = 0.01,
     drop_type: DropType = DropType.ABSOLUTE,
     preset: Optional[QuantizationPreset] = None,
@@ -423,14 +425,14 @@ def quantize_with_accuracy_control(
 )
 def compress_weights(
     model: TModel,
+    *,
     mode: CompressWeightsMode = CompressWeightsMode.INT8_ASYM,
     ratio: Optional[float] = None,
     group_size: Optional[int] = None,
     ignored_scope: Optional[IgnoredScope] = None,
     all_layers: Optional[bool] = None,
     dataset: Optional[Dataset] = None,
     sensitivity_metric: Optional[SensitivityMetric] = None,
-    *,
     subset_size: int = 128,
     awq: Optional[bool] = None,
     scale_estimation: Optional[bool] = None,

@@ -121,7 +121,7 @@ def calculate_numbers_of_quantized_weights(model: onnx.ModelProto) -> WeightType
 )
 def test_numbers_of_quantized_weights(mode, reference_counter):
     model = create_model()
-    model = compress_weights(model, mode)
+    model = compress_weights(model, mode=mode)
     counter = calculate_numbers_of_quantized_weights(model)
     assert counter == reference_counter
 
@@ -133,7 +133,7 @@ def test_numbers_of_quantized_weights(mode, reference_counter):
 def test_correct_dequantizelinear_int8(mode_weight_type):
     mode, expected_weight_type = mode_weight_type
     model = create_model()
-    model = compress_weights(model, mode)
+    model = compress_weights(model, mode=mode)
 
     dq_cnt = 0
     for node in model.graph.node:
@@ -164,7 +164,7 @@ def test_correct_dequantizelinear_int8(mode_weight_type):
 def test_correct_dequantizelinear_uint8(mode_weight_type):
     mode, expected_weight_type = mode_weight_type
     model = create_model()
-    model = compress_weights(model, mode)
+    model = compress_weights(model, mode=mode)
 
     dq_cnt = 0
     for node in model.graph.node:
@@ -204,7 +204,7 @@ def test_correct_dequantizelinear_uint8(mode_weight_type):
 def test_correct_dequantizelinear_int4(mode_weight_type, group_size):
     mode, expected_weight_type = mode_weight_type
     model = create_model()
-    model = compress_weights(model, mode, group_size=group_size, all_layers=True)
+    model = compress_weights(model, mode=mode, group_size=group_size, all_layers=True)
 
     dq_cnt = 0
     for node in model.graph.node:
@@ -240,7 +240,7 @@ def test_correct_dequantizelinear_int4(mode_weight_type, group_size):
 def test_correct_dequantizelinear_uint4(mode_weight_type, group_size):
     mode, expected_weight_type = mode_weight_type
     model = create_model()
-    model = compress_weights(model, mode, group_size=group_size, all_layers=True)
+    model = compress_weights(model, mode=mode, group_size=group_size, all_layers=True)
 
     dq_cnt = 0
     for node in model.graph.node:
@@ -281,7 +281,7 @@ def test_correct_dequantizelinear_uint4(mode_weight_type, group_size):
 )
 def test_compression_with_inference(mode):
     model = create_model()
-    model = compress_weights(model, mode)
+    model = compress_weights(model, mode=mode)
     onnx.checker.check_model(model)
     input_data = np.random.rand(100, 1280).astype(np.float32)
     session = InferenceSession(model.SerializeToString())

@@ -611,7 +611,7 @@ def test_shared_gather(mode):
         "matmul_1_data": ov.Type.i4 if mode == CompressWeightsMode.INT4_SYM else ov.Type.u4,
     }
     model = GatherAndMatmulShareData().ov_model
-    compressed_model = compress_weights(model, mode, group_size=3)
+    compressed_model = compress_weights(model, mode=mode, group_size=3)
     for op in compressed_model.get_ordered_ops():
         op_name = op.get_friendly_name()
         if op.get_type_name() == "Constant" and op_name in weight_name_vs_type:
@@ -626,7 +626,7 @@ def test_shared_gather_all_layers(all_layers):
         "matmul_1_data": ov.Type.u4,
     }
     model = GatherAndMatmulShareData().ov_model
-    compressed_model = compress_weights(model, CompressWeightsMode.INT4_ASYM, group_size=-1, all_layers=all_layers)
+    compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_ASYM, group_size=-1, all_layers=all_layers)
     for op in compressed_model.get_ordered_ops():
         op_name = op.get_friendly_name()
         if op.get_type_name() == "Constant" and op_name in weight_name_vs_type:

@@ -389,7 +389,7 @@ def get_input_node_data(node: ov.Node, input_id: int) -> Tensor:
                 if is_data_aware:
                     compression_kwargs["dataset"] = create_dataset(model)
 
-                nncf.compress_weights(model, config.mode, group_size=config.group_size, **compression_kwargs)
+                nncf.compress_weights(model, mode=config.mode, group_size=config.group_size, **compression_kwargs)
 
                 if cb == ComputationBackend.NumPy:
                     mock.assert_not_called()

@@ -316,9 +316,9 @@ def test_nncf_strip_api(strip_type, do_copy):
     quantized_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)
 
     if strip_type == "nncf":
-        strip_model = nncf.strip(quantized_model, do_copy)
+        strip_model = nncf.strip(quantized_model, do_copy=do_copy)
     elif strip_type == "torch":
-        strip_model = nncf.torch.strip(quantized_model, do_copy)
+        strip_model = nncf.torch.strip(quantized_model, do_copy=do_copy)
     elif strip_type == "nncf_interfere":
         strip_model = quantized_model.nncf.strip(do_copy)
 

@@ -32,9 +32,19 @@ def test_nncf_strip_api(strip_type: str, do_copy: bool):
     quantized_model = nncf.quantize(model, nncf.Dataset([torch.ones(model.INPUT_SIZE)]), subset_size=1)
 
     if strip_type == "nncf":
-        strip_model = nncf.strip(quantized_model, do_copy, nncf.StripFormat.NATIVE, example_input)
+        strip_model = nncf.strip(
+            quantized_model,
+            do_copy=do_copy,
+            strip_format=nncf.StripFormat.NATIVE,
+            example_input=example_input,
+        )
     elif strip_type == "torch":
-        strip_model = nncf.torch.strip(quantized_model, do_copy, nncf.StripFormat.NATIVE, example_input)
+        strip_model = nncf.torch.strip(
+            quantized_model,
+            do_copy=do_copy,
+            strip_format=nncf.StripFormat.NATIVE,
+            example_input=example_input,
+        )
 
     if do_copy:
         assert id(strip_model) != id(quantized_model)