Skip to content

Commit

Permalink
Merge branch 'dev_int8_conv' of https://github.com/Oneflow-Inc/oneflow
Browse files Browse the repository at this point in the history
…into dev_int8_conv
  • Loading branch information
clackhan committed Sep 5, 2023
2 parents 42c1ee9 + 4a5a5c6 commit af2f7d4
Show file tree
Hide file tree
Showing 5 changed files with 484 additions and 0 deletions.
6 changes: 6 additions & 0 deletions oneflow/core/functional/functional_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,12 @@
signature: 'Tensor (Tensor x, Tensor w, Tensor w_scale, *, Tensor w_zero=None, Tensor b=None, Int32 num_bits=8, Bool symmetric=True, Int64 group_dim=-1, Int64 group_size=-1) => FusedLinearWithGroupwiseQuantizedWeight'
bind_python: True

- name: "fused_activation_min_max_observer"
signature:
"TensorTuple (Tensor in, Tensor weight_scale, Tensor weight_acc, Tensor bias=None, String quantization_formula, Int32 quantization_bit,
String quantization_scheme, Bool per_layer_quantization=True) => FusedActivationMinMaxObserver"
bind_python: True

- name: "conv_data_grad"
signature:
'Tensor (Tensor dy, Tensor weight, Tensor x, Int32 num_spatial_dims,
Expand Down
46 changes: 46 additions & 0 deletions oneflow/core/functional/impl/quantization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,51 @@ class MovingAverageMinMaxObserverFunctor {
std::shared_ptr<OpExpr> op_;
};

class FusedActivationMinMaxObserverFunctor {
public:
FusedActivationMinMaxObserverFunctor() {
op_ = CHECK_JUST(one::OpBuilder("fused_activation_min_max_observer")
.Input("in")
.Input("weight_scale")
.Input("weight_acc")
.Output("in_scale")
.Output("in_zero_point")
.Output("out_scale")
.Output("out_bias")
.Build());
op_with_bias_ = CHECK_JUST(one::OpBuilder("fused_activation_min_max_observer")
.Input("in")
.Input("weight_scale")
.Input("weight_acc")
.Input("bias")
.Output("in_scale")
.Output("in_zero_point")
.Output("out_scale")
.Output("out_bias")
.Build());
}
Maybe<TensorTuple> operator()(
const std::shared_ptr<one::Tensor>& in, const std::shared_ptr<one::Tensor>& weight_scale,
const std::shared_ptr<one::Tensor>& weight_acc, const Optional<one::Tensor>& bias,
const std::string& quantization_formula, const int32_t& quantization_bit,
const std::string& quantization_scheme, const bool& per_layer_quantization) const {
auto& attrs = THREAD_CACHED_MUTABLE_ATTR_MAP("quantization_formula", "quantization_bit",
"quantization_scheme", "per_layer_quantization");
attrs.SetAllAttrs(quantization_formula, quantization_bit, quantization_scheme,
per_layer_quantization);
if (bias) {
return OpInterpUtil::Dispatch<TensorTuple>(*op_with_bias_,
{in, weight_scale, weight_acc, JUST(bias)}, attrs);
} else {
return OpInterpUtil::Dispatch<TensorTuple>(*op_, {in, weight_scale, weight_acc}, attrs);
}
}

private:
std::shared_ptr<OpExpr> op_;
std::shared_ptr<OpExpr> op_with_bias_;
};

class FakeQuantizationFunctor {
public:
FakeQuantizationFunctor() {
Expand Down Expand Up @@ -390,6 +435,7 @@ ONEFLOW_FUNCTION_LIBRARY(m) {
m.add_functor<impl::GroupwiseDequantizeFunctor>("GroupwiseDequantize");
m.add_functor<impl::FusedLinearWithGroupwiseQuantizedWeightFunctor>(
"FusedLinearWithGroupwiseQuantizedWeight");
m.add_functor<impl::FusedActivationMinMaxObserverFunctor>("FusedActivationMinMaxObserver");
};

} // namespace functional
Expand Down
26 changes: 26 additions & 0 deletions oneflow/ir/include/OneFlow/OneFlowUserOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -8354,6 +8354,32 @@ def OneFlow_FusedLinearWithGroupwiseQuantizedWeightOp : OneFlow_BaseOp<"fused_li
let has_data_type_infer_fn = 1;
}

def OneFlow_FusedActivationMinMaxObserverOp : OneFlow_BaseOp<"fused_activation_min_max_observer", [NoMemoryEffect, NoGrad, DeclareOpInterfaceMethods<UserOpCompatibleInterface>]> {
let input = (ins
OneFlow_Tensor:$in,
OneFlow_Tensor:$weight_scale,
OneFlow_Tensor:$weight_acc,
Optional<OneFlow_Tensor>:$bias
);
let output = (outs
OneFlow_Tensor:$in_scale,
OneFlow_Tensor:$in_zero_point,
OneFlow_Tensor:$out_scale,
OneFlow_Tensor:$out_bias
);
let attrs = (ins
DefaultValuedAttr<StrAttr, "\"google\"">:$quantization_formula,
DefaultValuedAttr<SI32Attr, "8">:$quantization_bit,
DefaultValuedAttr<StrAttr, "\"symmetric\"">:$quantization_scheme,
DefaultValuedAttr<BoolAttr, "true">:$per_layer_quantization
);
let has_check_fn = 1;
let has_logical_tensor_desc_infer_fn = 1;
let has_physical_tensor_desc_infer_fn = 1;
let has_get_sbp_fn = 1;
let has_data_type_infer_fn = 1;
}

def OneFlow_Conv2DQuantOp : OneFlow_BaseOp<"conv2d_quant", [NoMemoryEffect, AttrSizedOperandSegments, DeclareOpInterfaceMethods<UserOpCompatibleInterface>, DeclareOpInterfaceMethods<NCHWCompatibleInterface>]> {
let summary = "OneFlow fused convolution quant operation";
let description = [{
Expand Down
Loading

0 comments on commit af2f7d4

Please sign in to comment.