-
Notifications
You must be signed in to change notification settings - Fork 679
NXP backend: Per-channel quantization of convolution layer #14061
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e7c43b6
2c82054
b340ad1
82e6032
1a3d9e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,8 +4,6 @@ | |
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from typing import List, Optional, Tuple, Union | ||
|
||
import torch | ||
|
||
from executorch.backends.nxp.aten_passes.neutron_aten_pass_manager import ( | ||
|
@@ -27,6 +25,7 @@ | |
LinearPattern, | ||
MaxPoolPattern, | ||
MeanDimPattern, | ||
NodeArgsIdx, | ||
PadPattern, | ||
PermutePattern, | ||
QuantizationPattern, | ||
|
@@ -106,57 +105,43 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: | |
) | ||
|
||
def annotate_inputs( | ||
inputs: Union[ | ||
List[Tuple[fx.Node, int]], | ||
List[Tuple[fx.Node, int, DerivedQuantizationSpec],], | ||
], | ||
spec: Optional[QuantizationSpec], | ||
inputs: ( | ||
list[tuple[fx.Node, NodeArgsIdx]] | ||
| list[tuple[fx.Node, NodeArgsIdx, DerivedQuantizationSpec]] | ||
), | ||
spec: QuantizationSpec | None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just curious, why switch from Optional to | None? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a part of move to Python 3.10 type hints and leaving imports from |
||
) -> None: | ||
for node, idx, *custom_spec in inputs: | ||
for node, args_idx, *custom_spec in inputs: | ||
# pyre-ignore[16]: no attribute | ||
annotation = node.meta.get( | ||
Q_ANNOTATION_KEY, | ||
QuantizationAnnotation(_annotated=True), | ||
) | ||
arg = ( | ||
# pyre-ignore[16]: no attribute | ||
node.args[idx] | ||
if isinstance(idx, int) | ||
node.args[args_idx.idx] | ||
if args_idx.inner_idx is None | ||
# pyre-ignore[16]: no attribute | ||
else node.args[idx[0]][idx[1]] | ||
else node.args[args_idx.idx][args_idx.inner_idx] | ||
) | ||
annotation.input_qspec_map[arg] = ( | ||
custom_spec[0] if custom_spec else spec | ||
) | ||
# pyre-ignore[16]: no attribute | ||
node.meta[Q_ANNOTATION_KEY] = annotation | ||
|
||
def annotate_weights_or_biases( | ||
weights_or_biases: List[Tuple[fx.Node, int]], | ||
spec: Optional[QuantizationSpec], | ||
) -> None: | ||
for node, idx, *custom_spec in weights_or_biases: | ||
annotation = node.meta.get( | ||
Q_ANNOTATION_KEY, | ||
QuantizationAnnotation(_annotated=True), | ||
) | ||
annotation.input_qspec_map[node.args[idx]] = ( | ||
custom_spec[0] if custom_spec else spec | ||
) | ||
node.meta[Q_ANNOTATION_KEY] = annotation | ||
|
||
# pyre-ignore[6]: incompatible parameter type | ||
annotate_inputs(anchors.inputs, input_act_qspec) | ||
annotate_weights_or_biases(anchors.weights, weight_qspec) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this function no longer used at all now and can be removed entirely? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it is replaced by |
||
annotate_inputs(anchors.weights, weight_qspec) | ||
# pyre-ignore[6]: incompatible parameter type | ||
annotate_weights_or_biases(anchors.biases, bias_qspec) | ||
annotate_inputs(anchors.biases, bias_qspec) | ||
return model | ||
|
||
def validate(self, model: fx.GraphModule) -> None: | ||
pass | ||
|
||
@classmethod | ||
def get_supported_operators(cls) -> List[OperatorConfig]: | ||
def get_supported_operators(cls) -> list[OperatorConfig]: | ||
return [] | ||
|
||
|
||
|
@@ -195,12 +180,7 @@ def get_supported_operators(cls) -> List[OperatorConfig]: | |
|
||
class NeutronQuantizer(ComposableQuantizer): | ||
def __init__(self): | ||
static_qconfig = QuantizationConfig( | ||
act_qspec, | ||
act_qspec, | ||
wgt_qspec, | ||
None, | ||
) | ||
static_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_qspec, None) | ||
static_fc_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_fc_qspec, None) | ||
super().__init__( | ||
[ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
only QDQDequantizer needs to be updated, not QDQQuantizeConverter too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correct, as there are no changes to
QDQQuantizeConverter
. Per channel quantization scheme is used only for weights and biases, which are inputs - dequantize nodes.