From 6b074fd59adaa66144cb66240d98a66946db5b38 Mon Sep 17 00:00:00 2001 From: qinyiqun Date: Wed, 18 Mar 2026 17:25:41 +0800 Subject: [PATCH] Issue/1052: add functional per_tensor_quant_i8 API --- include/infinicore/ops/per_tensor_quant_i8.hpp | 2 ++ include/infinicore/quantization/quantization_scheme.hpp | 2 +- .../quant/per_tensor_quant_int8/per_tensor_quant_int8.cc | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/infinicore/ops/per_tensor_quant_i8.hpp b/include/infinicore/ops/per_tensor_quant_i8.hpp index 9826d2123..84ec99c37 100644 --- a/include/infinicore/ops/per_tensor_quant_i8.hpp +++ b/include/infinicore/ops/per_tensor_quant_i8.hpp @@ -8,4 +8,6 @@ namespace infinicore::op { INFINICORE_GRAPH_OP_CLASS(PerTensorQuantI8, const Tensor &, Tensor, Tensor, Tensor, bool); void per_tensor_quant_i8_(const Tensor &x, Tensor x_packed, Tensor x_scale, Tensor x_zero, bool is_static); + +Tensor per_tensor_quant_i8(const Tensor &x, Tensor x_scale, Tensor x_zero, bool is_static); } // namespace infinicore::op diff --git a/include/infinicore/quantization/quantization_scheme.hpp b/include/infinicore/quantization/quantization_scheme.hpp index 3f6286375..9c08ea6e0 100644 --- a/include/infinicore/quantization/quantization_scheme.hpp +++ b/include/infinicore/quantization/quantization_scheme.hpp @@ -9,7 +9,7 @@ enum class QuantScheme { AWQ_W4A16, }; -enum class KVQuantScheme { +enum class KVQuantAlgo { NONE, INT8, }; diff --git a/src/infinicore/ops/quant/per_tensor_quant_int8/per_tensor_quant_int8.cc b/src/infinicore/ops/quant/per_tensor_quant_int8/per_tensor_quant_int8.cc index add21e64a..498af541e 100644 --- a/src/infinicore/ops/quant/per_tensor_quant_int8/per_tensor_quant_int8.cc +++ b/src/infinicore/ops/quant/per_tensor_quant_int8/per_tensor_quant_int8.cc @@ -17,4 +17,10 @@ void PerTensorQuantI8::execute(const Tensor &x, Tensor x_packed, Tensor x_scale, void per_tensor_quant_i8_(const Tensor &x, Tensor x_packed, Tensor x_scale, Tensor x_zero, bool is_static) { PerTensorQuantI8::execute(x, x_packed, x_scale, x_zero, is_static); } + +Tensor per_tensor_quant_i8(const Tensor &x, Tensor x_scale, Tensor x_zero, bool is_static) { + auto x_packed = Tensor::strided_empty(x->shape(), x->strides(), infinicore::DataType::I8, x->device()); + PerTensorQuantI8::execute(x, x_packed, x_scale, x_zero, is_static); + return x_packed; +} } // namespace infinicore::op