@@ -177,12 +177,10 @@ def rvv_vmacc_llvm_impl(
177177 T .call_llvm_intrin (
178178 llvm_macc_dtype ,
179179 expand_llvm_intrinsic ,
180- T .uint32 (3 ),
181180 T .broadcast (broadcast_output , n_output_dtype * T .vscale ()),
182181 T .call_llvm_intrin (
183182 llvm_input_dtype ,
184183 load_llvm_intrinsic ,
185- T .uint32 (3 ),
186184 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
187185 A .access_ptr (access_mask = A .READ , ptr_type = "handle" ),
188186 T .int64 (vlmax ),
@@ -193,7 +191,6 @@ def rvv_vmacc_llvm_impl(
193191 else T .call_llvm_intrin (
194192 llvm_input_dtype ,
195193 load_llvm_intrinsic ,
196- T .uint32 (3 ),
197194 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
198195 A .access_ptr (access_mask = A .READ , ptr_type = "handle" ),
199196 T .int64 (vlmax ),
@@ -204,12 +201,10 @@ def rvv_vmacc_llvm_impl(
204201 T .call_llvm_intrin (
205202 llvm_macc_dtype ,
206203 expand_llvm_intrinsic ,
207- T .uint32 (3 ),
208204 T .broadcast (broadcast_output , n_output_dtype * T .vscale ()),
209205 T .call_llvm_intrin (
210206 llvm_input_dtype ,
211207 load_llvm_intrinsic ,
212- T .uint32 (3 ),
213208 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
214209 B .access_ptr (access_mask = B .READ , ptr_type = "handle" ),
215210 T .int64 (vlmax ),
@@ -220,7 +215,6 @@ def rvv_vmacc_llvm_impl(
220215 else T .call_llvm_intrin (
221216 llvm_input_dtype ,
222217 load_llvm_intrinsic ,
223- T .uint32 (3 ),
224218 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
225219 B .access_ptr (access_mask = B .READ , ptr_type = "handle" ),
226220 T .int64 (vlmax ),
@@ -230,7 +224,6 @@ def rvv_vmacc_llvm_impl(
230224 init = T .call_llvm_intrin (
231225 llvm_macc_dtype ,
232226 init_llvm_intrinsic ,
233- T .uint32 (3 ),
234227 T .broadcast (broadcast_output , n_output_dtype * T .vscale ()),
235228 C .access_ptr (access_mask = C .READ , ptr_type = "handle" ),
236229 T .uint64 (vlmax ),
@@ -240,7 +233,6 @@ def rvv_vmacc_llvm_impl(
240233 T .call_llvm_intrin (
241234 llvm_macc_dtype ,
242235 macc_llvm_intrinsic ,
243- T .uint32 (6 ),
244236 init ,
245237 vec_A ,
246238 vec_B ,
@@ -252,7 +244,6 @@ def rvv_vmacc_llvm_impl(
252244 else T .call_llvm_intrin (
253245 llvm_macc_dtype ,
254246 macc_llvm_intrinsic ,
255- T .uint32 (5 ),
256247 init ,
257248 vec_A ,
258249 vec_B ,
@@ -264,7 +255,6 @@ def rvv_vmacc_llvm_impl(
264255 T .call_llvm_intrin (
265256 "" ,
266257 store_llvm_intrinsic ,
267- T .uint32 (3 ),
268258 product ,
269259 C .access_ptr (access_mask = C .WRITE , ptr_type = "handle" ),
270260 T .uint64 (vlmax ),
@@ -362,12 +352,10 @@ def rvv_multivmul_llvm_impl(
362352 T .call_llvm_intrin (
363353 llvm_mult_dtype ,
364354 expand_llvm_intrinsic ,
365- T .uint32 (3 ),
366355 T .broadcast (broadcast_intermmediate , n_intermmediate_dtype * T .vscale ()),
367356 T .call_llvm_intrin (
368357 llvm_input_dtype ,
369358 load_llvm_intrinsic ,
370- T .uint32 (3 ),
371359 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
372360 A .access_ptr (access_mask = A .READ , ptr_type = "handle" ),
373361 T .int64 (vlmax ),
@@ -378,7 +366,6 @@ def rvv_multivmul_llvm_impl(
378366 else T .call_llvm_intrin (
379367 llvm_input_dtype ,
380368 load_llvm_intrinsic ,
381- T .uint32 (3 ),
382369 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
383370 A .access_ptr (access_mask = A .READ , ptr_type = "handle" ),
384371 T .int64 (vlmax ),
@@ -389,12 +376,10 @@ def rvv_multivmul_llvm_impl(
389376 T .call_llvm_intrin (
390377 llvm_mult_dtype ,
391378 expand_llvm_intrinsic ,
392- T .uint32 (3 ),
393379 T .broadcast (broadcast_intermmediate , n_intermmediate_dtype * T .vscale ()),
394380 T .call_llvm_intrin (
395381 llvm_input_dtype ,
396382 load_llvm_intrinsic ,
397- T .uint32 (3 ),
398383 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
399384 B .access_ptr (access_mask = B .READ , ptr_type = "handle" ),
400385 T .int64 (vlmax ),
@@ -405,7 +390,6 @@ def rvv_multivmul_llvm_impl(
405390 else T .call_llvm_intrin (
406391 llvm_kernel_dtype ,
407392 load_llvm_intrinsic ,
408- T .uint32 (3 ),
409393 T .broadcast (broadcast_kernel , n_kernel_dtype * T .vscale ()),
410394 B .access_ptr (access_mask = B .READ , ptr_type = "handle" ),
411395 T .int64 (vlmax ),
@@ -415,7 +399,6 @@ def rvv_multivmul_llvm_impl(
415399 redsum = T .call_llvm_intrin (
416400 llvm_redsum_dtype ,
417401 init_llvm_intrinsic ,
418- T .uint32 (3 ),
419402 T .broadcast (broadcast_output , n_redsum_dtype * T .vscale ()),
420403 C [0 ],
421404 T .uint64 (1 ),
@@ -425,7 +408,6 @@ def rvv_multivmul_llvm_impl(
425408 T .call_llvm_intrin (
426409 llvm_mult_dtype ,
427410 mult_llvm_intrinsic ,
428- T .uint32 (5 ),
429411 T .broadcast (broadcast_output , n_intermmediate_dtype * T .vscale ()),
430412 vec_A ,
431413 vec_B ,
@@ -436,7 +418,6 @@ def rvv_multivmul_llvm_impl(
436418 else T .call_llvm_intrin (
437419 llvm_mult_dtype ,
438420 mult_llvm_intrinsic ,
439- T .uint32 (4 ),
440421 T .broadcast (broadcast_output , n_intermmediate_dtype * T .vscale ()),
441422 vec_A ,
442423 vec_B ,
@@ -448,7 +429,6 @@ def rvv_multivmul_llvm_impl(
448429 T .call_llvm_intrin (
449430 llvm_redsum_dtype ,
450431 redsum_llvm_intrinsic ,
451- T .uint32 (5 ),
452432 T .broadcast (broadcast_output , n_redsum_dtype * T .vscale ()),
453433 product ,
454434 redsum ,
@@ -459,7 +439,6 @@ def rvv_multivmul_llvm_impl(
459439 else T .call_llvm_intrin (
460440 llvm_redsum_dtype ,
461441 redsum_llvm_intrinsic ,
462- T .uint32 (4 ),
463442 T .broadcast (broadcast_output , n_redsum_dtype * T .vscale ()),
464443 product ,
465444 redsum ,
@@ -470,7 +449,6 @@ def rvv_multivmul_llvm_impl(
470449 T .call_llvm_intrin (
471450 "" ,
472451 store_llvm_intrinsic ,
473- T .uint32 (3 ),
474452 redsum_result ,
475453 C .access_ptr (access_mask = C .WRITE , ptr_type = "handle" ),
476454 T .uint64 (1 ),
@@ -560,12 +538,10 @@ def rvv_vmul_llvm_impl(
560538 T .call_llvm_intrin (
561539 llvm_mult_dtype ,
562540 expand_llvm_intrinsic ,
563- T .uint32 (3 ),
564541 T .broadcast (broadcast_intermmediate , n_intermmediate_dtype * T .vscale ()),
565542 T .call_llvm_intrin (
566543 llvm_input_dtype ,
567544 load_llvm_intrinsic ,
568- T .uint32 (3 ),
569545 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
570546 A .access_ptr (access_mask = A .READ , ptr_type = "handle" ),
571547 T .int64 (vlmax ),
@@ -576,7 +552,6 @@ def rvv_vmul_llvm_impl(
576552 else T .call_llvm_intrin (
577553 llvm_input_dtype ,
578554 load_llvm_intrinsic ,
579- T .uint32 (3 ),
580555 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
581556 A .access_ptr (access_mask = A .READ , ptr_type = "handle" ),
582557 T .int64 (vlmax ),
@@ -587,12 +562,10 @@ def rvv_vmul_llvm_impl(
587562 T .call_llvm_intrin (
588563 llvm_mult_dtype ,
589564 expand_llvm_intrinsic ,
590- T .uint32 (3 ),
591565 T .broadcast (broadcast_intermmediate , n_intermmediate_dtype * T .vscale ()),
592566 T .call_llvm_intrin (
593567 llvm_input_dtype ,
594568 load_llvm_intrinsic ,
595- T .uint32 (3 ),
596569 T .broadcast (broadcast_input , n_input_dtype * T .vscale ()),
597570 B .access_ptr (access_mask = B .READ , ptr_type = "handle" ),
598571 T .int64 (vlmax ),
@@ -603,7 +576,6 @@ def rvv_vmul_llvm_impl(
603576 else T .call_llvm_intrin (
604577 llvm_kernel_dtype ,
605578 load_llvm_intrinsic ,
606- T .uint32 (3 ),
607579 T .broadcast (broadcast_kernel , n_kernel_dtype * T .vscale ()),
608580 B .access_ptr (access_mask = B .READ , ptr_type = "handle" ),
609581 T .int64 (vlmax ),
@@ -613,7 +585,6 @@ def rvv_vmul_llvm_impl(
613585 redsum = T .call_llvm_intrin (
614586 llvm_redsum_dtype ,
615587 init_llvm_intrinsic ,
616- T .uint32 (3 ),
617588 T .broadcast (broadcast_output , n_redsum_dtype * T .vscale ()),
618589 C [0 ],
619590 T .uint64 (1 ),
@@ -623,7 +594,6 @@ def rvv_vmul_llvm_impl(
623594 T .call_llvm_intrin (
624595 llvm_mult_dtype ,
625596 mult_llvm_intrinsic ,
626- T .uint32 (5 ),
627597 T .broadcast (broadcast_output , n_intermmediate_dtype * T .vscale ()),
628598 vec_A ,
629599 vec_B ,
@@ -634,7 +604,6 @@ def rvv_vmul_llvm_impl(
634604 else T .call_llvm_intrin (
635605 llvm_mult_dtype ,
636606 mult_llvm_intrinsic ,
637- T .uint32 (4 ),
638607 T .broadcast (broadcast_output , n_intermmediate_dtype * T .vscale ()),
639608 vec_A ,
640609 vec_B ,
@@ -646,7 +615,6 @@ def rvv_vmul_llvm_impl(
646615 T .call_llvm_intrin (
647616 llvm_redsum_dtype ,
648617 redsum_llvm_intrinsic ,
649- T .uint32 (5 ),
650618 T .broadcast (broadcast_output , n_redsum_dtype * T .vscale ()),
651619 product ,
652620 redsum ,
@@ -657,7 +625,6 @@ def rvv_vmul_llvm_impl(
657625 else T .call_llvm_intrin (
658626 llvm_redsum_dtype ,
659627 redsum_llvm_intrinsic ,
660- T .uint32 (4 ),
661628 T .broadcast (broadcast_output , n_redsum_dtype * T .vscale ()),
662629 product ,
663630 redsum ,
@@ -668,7 +635,6 @@ def rvv_vmul_llvm_impl(
668635 T .call_llvm_intrin (
669636 "" ,
670637 store_llvm_intrinsic ,
671- T .uint32 (3 ),
672638 redsum_result ,
673639 C .access_ptr (access_mask = C .WRITE , ptr_type = "handle" ),
674640 T .uint64 (1 ),
0 commit comments