Fixes/changes based on comments on PR

fzi-peccia · fzi-peccia · commit 49d9a0c5bde5 · 2025-08-18T10:13:15.000+02:00
diff --git a/python/tvm/meta_schedule/tune_context.py b/python/tvm/meta_schedule/tune_context.py
@@ -28,6 +28,7 @@
 from tvm.runtime import Object
 from tvm.target import Target
 from tvm.tir import PrimFunc, Schedule
+from tvm.target.codegen import target_has_features
 
 from . import _ffi_api
 from .logging import Logger, get_logger, get_logging_func
@@ -118,8 +119,7 @@ def __init__(
             if not isinstance(target, Target):
                 target = Target(target)
             if "riscv_cpu" in target.keys:
-                base_features = str(target.attrs["march"]).split("_")[0].replace("rv", "")
-                if "v" in base_features:
+                if target_has_features("v", target):
                     # Because the RVV intrinsics depend on the target, we register them here
                     # pylint: disable=import-outside-toplevel
                     from tvm.tir.tensor_intrin.riscv_cpu import register_riscv_tensor_intrinsics
diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
@@ -638,16 +638,12 @@ def riscv_cpu(model="sifive-u54", options=None):
             # cc: riscv64-unknown-linux-gnu-g++ -march=rv64gc -mabi=lp64d -mcpu=sifive-u74
         ],
         "bpi-f3": [
-            # "-model=sifive-u74",
             "-mtriple=riscv64-unknown-linux-gnu",
             "-mcpu=generic",
-            # "-march=rv64gcv_zvl256b",
-            # "-mcpu=generic-rv64",
             "-mfloat-abi=hard",
             "-num-cores=8",
             "-mabi=lp64d",
             "-mattr=+v,+zvl256b",
-            # cc: riscv64-unknown-linux-gnu-g++ -march=rv64gc -mabi=lp64d -mcpu=generic -mattr=+v
         ],
     }
     pre_defined_opt = trans_table.get(model, ["-model=%s" % model])
diff --git a/python/tvm/tir/tensor_intrin/riscv_cpu.py b/python/tvm/tir/tensor_intrin/riscv_cpu.py
@@ -20,10 +20,13 @@
 **Author**: `Federico Peccia <https://fPecc.github.io/>`_
 """
 import re
+import logging
 from tvm.script import tir as T
-from tvm.target.datatype import lower_call_pure_extern, register, register_op
+from tvm.target.codegen import llvm_get_vector_width
 from .. import TensorIntrin
 
+logger = logging.getLogger(__name__)
+
 #####################################################
 # LLVM RISC-V Intrinsic usage:
 # https://llvm.org/docs//RISCV/RISCVVectorExtension.html
@@ -327,7 +330,7 @@ def rvv_multivmul(J: int, vlmax: int, input_dtype: str, output_dtype: str, lmul:
     @T.prim_func
     def rvv_multivmul_desc(
         A: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
-        B: T.Buffer((J, int(vlmax)), kernel_dtype, align=4, offset_factor=1),
+        B: T.Buffer((J, int(vlmax)), input_dtype, align=4, offset_factor=1),
         C: T.Buffer((J,), output_dtype, align=4, offset_factor=1),
     ) -> None:
         with T.block("root"):
@@ -345,7 +348,7 @@ def rvv_multivmul_desc(
     def rvv_multivmul_llvm_impl(
         A: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
         B: T.Buffer(
-            (J, int(vlmax)), kernel_dtype, align=4, offset_factor=1, strides=[T.int32(), T.int32()]
+            (J, int(vlmax)), input_dtype, align=4, offset_factor=1, strides=[T.int32(), T.int32()]
         ),
         C: T.Buffer((J,), output_dtype, align=4, offset_factor=1),
     ) -> None:
@@ -530,7 +533,7 @@ def rvv_vmul(J: int, vlmax: int, input_dtype: str, output_dtype: str, lmul: int)
     @T.prim_func
     def rvv_vmul_desc(
         A: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
-        B: T.Buffer((int(vlmax),), kernel_dtype, align=4, offset_factor=1),
+        B: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
         C: T.Buffer((1,), output_dtype, align=4, offset_factor=1),
     ) -> None:
         with T.block("root"):
@@ -544,7 +547,7 @@ def rvv_vmul_desc(
     @T.prim_func
     def rvv_vmul_llvm_impl(
         A: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
-        B: T.Buffer((int(vlmax),), kernel_dtype, align=4, offset_factor=1),
+        B: T.Buffer((int(vlmax),), input_dtype, align=4, offset_factor=1),
         C: T.Buffer((1,), output_dtype, align=4, offset_factor=1),
     ) -> None:
 
@@ -690,7 +693,7 @@ def register_intrinsic_combinations(
 
             desc, impl = generator(J, current_vlmax, input_dtype, output_dtype, lmul)
 
-            print(f"Registering intrin {name}...")
+            logger.debug(f"Registering intrin {name}...")
 
             TensorIntrin.register(name, desc, impl, override=True)
 
@@ -701,33 +704,15 @@ def register_riscv_tensor_intrinsics(target):
     target_kind = target.kind.name
     assert target_kind in ["llvm"]
 
-    #####################################################
-    # Register custom RVV types for C code generation
-    #####################################################
-    dtype_counter = 0
-    for bits in [8, 16, 32, 64]:
-        for dtype in ["int", "uint", "float"]:
-            for m in [1, 2, 4, 8]:
-                custom_rvv_type = f"v{dtype}{bits}m{m}_t"
-                register(custom_rvv_type, 150 + dtype_counter)
-                register_op(
-                    lower_call_pure_extern,
-                    "Call",
-                    "c",
-                    custom_rvv_type,
-                    intrinsic_name="tir.call_pure_extern",
-                )
-                dtype_counter += 1
-
-    vlen = get_vlen_from_mattrs(target.mattr)
+    vlen = llvm_get_vector_width(target)
 
     for vmul_type, func, outer_loops in zip(
         ["vmacc", "multivmul", "vmul"],
         [rvv_vmacc, rvv_multivmul, rvv_vmul],
         [[1], [get_vlmax(vlen, lmul=1, max_sew=32)], [1]],
     ):
 
-        for idtype, odtype in zip(["int16", "float32"], ["int32", "float32"]):
+        for idtype, odtype in zip(["int16", "float16", "float32"], ["int32", "float16", "float32"]):
 
             if idtype == "float32" and vmul_type == "multivmul":
                 continue
diff --git a/src/target/source/codegen_c.cc b/src/target/source/codegen_c.cc
@@ -268,8 +268,6 @@ std::string CodeGenC::GetBufferRef(DataType t, const BufferNode* buffer, PrimExp
        << " + " << index_str << " / " << div_factor << ")";
   } else if (t == buffer_element_dtype) {
     os << buffer_str << "[" << index_str << "]";
-  } else if (t == buffer_element_dtype) {
-    os << buffer_str << "[" << index_str << "]";
   } else {
     os << "*" << ptr_cast(t) << "(" << buffer_str << " + " << index_str << ")";
   }

Original file line number	Diff line number	Diff line change
`@@ -268,8 +268,6 @@ std::string CodeGenC::GetBufferRef(DataType t, const BufferNode* buffer, PrimExp`
`268`	`268`	`<< " + " << index_str << " / " << div_factor << ")";`
`269`	`269`	`} else if (t == buffer_element_dtype) {`
`270`	`270`	`os << buffer_str << "[" << index_str << "]";`
`271`		`- } else if (t == buffer_element_dtype) {`
`272`		`- os << buffer_str << "[" << index_str << "]";`
`273`	`271`	`} else {`
`274`	`272`	`os << "*" << ptr_cast(t) << "(" << buffer_str << " + " << index_str << ")";`
`275`	`273`	`}`