@@ -77,8 +77,8 @@ typedef uint32x4_t flb_vector32;
77
77
typedef vuint8m1_t flb_vector8 ;
78
78
typedef vuint32m1_t flb_vector32 ;
79
79
80
- /* Currently, VLEN is assumed to 128. */
81
- #define RVV_VEC_INST_LEN (128 / 8) /* 16 */
80
+ #define RVV_VEC8_INST_LEN (128 / 8) /* 16 */
81
+ #define RVV_VEC32_INST_LEN (128 / 8 / 4 ) /* 4 */
82
82
83
83
#else
84
84
/*
@@ -116,7 +116,7 @@ static inline void flb_vector8_load(flb_vector8 *v, const uint8_t *s)
116
116
#elif defined(FLB_SIMD_NEON )
117
117
* v = vld1q_u8 (s );
118
118
#elif defined(FLB_SIMD_RVV )
119
- * v = __riscv_vle8_v_u8m1 (s , 16 );
119
+ * v = __riscv_vle8_v_u8m1 (s , RVV_VEC8_INST_LEN );
120
120
#else
121
121
memset (v , 0 , sizeof (flb_vector8 ));
122
122
#endif
@@ -153,7 +153,7 @@ static inline flb_vector8 flb_vector8_ssub(const flb_vector8 v1, const flb_vecto
153
153
#elif defined(FLB_SIMD_NEON )
154
154
return vqsubq_u8 (v1 , v2 );
155
155
#elif defined(FLB_SIMD_RVV )
156
- return __riscv_vssubu_vv_u8m1 (v1 , v2 , 16 );
156
+ return __riscv_vssubu_vv_u8m1 (v1 , v2 , RVV_VEC8_INST_LEN );
157
157
#endif
158
158
}
159
159
#endif /* ! FLB_SIMD_NONE */
@@ -170,8 +170,10 @@ static inline flb_vector8 flb_vector8_eq(const flb_vector8 v1, const flb_vector8
170
170
#elif defined(FLB_SIMD_NEON )
171
171
return vceqq_u8 (v1 , v2 );
172
172
#elif defined(FLB_SIMD_RVV )
173
- vbool8_t ret = __riscv_vmseq_vv_u8m1_b8 (v1 , v2 , 16 );
174
- return __riscv_vmerge_vvm_u8m1 (__riscv_vmv_v_x_u8m1 (0 , 16 ), __riscv_vmv_v_x_u8m1 (UINT8_MAX , 16 ), ret , 16 );
173
+ vbool8_t ret = __riscv_vmseq_vv_u8m1_b8 (v1 , v2 , RVV_VEC8_INST_LEN );
174
+ return __riscv_vmerge_vvm_u8m1 (__riscv_vmv_v_x_u8m1 (0 , RVV_VEC8_INST_LEN ),
175
+ __riscv_vmv_v_x_u8m1 (UINT8_MAX , RVV_VEC8_INST_LEN ),
176
+ ret , RVV_VEC8_INST_LEN );
175
177
#endif
176
178
}
177
179
#endif /* ! FLB_SIMD_NONE */
@@ -184,8 +186,10 @@ static inline flb_vector32 flb_vector32_eq(const flb_vector32 v1, const flb_vect
184
186
#elif defined(FLB_SIMD_NEON )
185
187
return vceqq_u32 (v1 , v2 );
186
188
#elif defined(FLB_SIMD_RVV )
187
- vbool32_t ret = __riscv_vmseq_vv_u32m1_b32 (v1 , v2 , 4 );
188
- return __riscv_vmerge_vvm_u32m1 (__riscv_vmv_v_x_u32m1 (0 , 4 ), __riscv_vmv_v_x_u32m1 (UINT32_MAX , 4 ), ret , 4 );
189
+ vbool32_t ret = __riscv_vmseq_vv_u32m1_b32 (v1 , v2 , RVV_VEC32_INST_LEN );
190
+ return __riscv_vmerge_vvm_u32m1 (__riscv_vmv_v_x_u32m1 (0 , RVV_VEC32_INST_LEN ),
191
+ __riscv_vmv_v_x_u32m1 (UINT32_MAX , RVV_VEC32_INST_LEN ),
192
+ ret , RVV_VEC32_INST_LEN );
189
193
#endif
190
194
}
191
195
#endif /* ! FLB_SIMD_NONE */
@@ -200,7 +204,7 @@ static inline flb_vector8 flb_vector8_broadcast(const uint8_t c)
200
204
#elif defined(FLB_SIMD_NEON )
201
205
return vdupq_n_u8 (c );
202
206
#elif defined(FLB_SIMD_RVV )
203
- return __riscv_vmv_v_x_u8m1 (c , 16 );
207
+ return __riscv_vmv_v_x_u8m1 (c , RVV_VEC8_INST_LEN );
204
208
#else
205
209
return ~UINT64CONST (0 ) / 0xFF * c ;
206
210
#endif
@@ -216,7 +220,9 @@ static inline bool flb_vector8_is_highbit_set(const flb_vector8 v)
216
220
#elif defined(FLB_SIMD_NEON )
217
221
return vmaxvq_u8 (v ) > 0x7F ;
218
222
#elif defined(FLB_SIMD_RVV )
219
- return __riscv_vmv_x_s_u8m1_u8 (__riscv_vredmaxu_vs_u8m1_u8m1 (v , __riscv_vmv_v_x_u8m1 (0 , 16 ), 16 ));
223
+ return __riscv_vmv_x_s_u8m1_u8 (__riscv_vredmaxu_vs_u8m1_u8m1 (v ,
224
+ __riscv_vmv_v_x_u8m1 (0 , RVV_VEC8_INST_LEN ),
225
+ RVV_VEC8_INST_LEN ));
220
226
#else
221
227
return v & flb_vector8_broadcast (0x80 );
222
228
#endif
0 commit comments