@@ -77,8 +77,13 @@ typedef uint32x4_t flb_vector32;
77
77
typedef vuint8m1_t flb_vector8 ;
78
78
typedef vuint32m1_t flb_vector32 ;
79
79
80
- /* Currently, VLEN is assumed to 128. */
81
- #define RVV_VEC_INST_LEN (128 / 8) /* 16 */
80
+ #ifdef FLB_RVV_VLEN
81
+ #define RVV_VEC8_INST_LEN (FLB_RVV_VLEN / 8)
82
+ #define RVV_VEC32_INST_LEN (FLB_RVV_VLEN / 8 / 4)
83
+ #else
84
+ #define RVV_VEC8_INST_LEN (128 / 8) /* 16 */
85
+ #define RVV_VEC32_INST_LEN (128 / 8 / 4) /* 4 */
86
+ #endif
82
87
83
88
#else
84
89
/*
@@ -116,7 +121,7 @@ static inline void flb_vector8_load(flb_vector8 *v, const uint8_t *s)
116
121
#elif defined(FLB_SIMD_NEON )
117
122
* v = vld1q_u8 (s );
118
123
#elif defined(FLB_SIMD_RVV )
119
- * v = __riscv_vle8_v_u8m1 (s , 16 );
124
+ * v = __riscv_vle8_v_u8m1 (s , RVV_VEC8_INST_LEN );
120
125
#else
121
126
memset (v , 0 , sizeof (flb_vector8 ));
122
127
#endif
@@ -153,7 +158,7 @@ static inline flb_vector8 flb_vector8_ssub(const flb_vector8 v1, const flb_vecto
153
158
#elif defined(FLB_SIMD_NEON )
154
159
return vqsubq_u8 (v1 , v2 );
155
160
#elif defined(FLB_SIMD_RVV )
156
- return __riscv_vssubu_vv_u8m1 (v1 , v2 , 16 );
161
+ return __riscv_vssubu_vv_u8m1 (v1 , v2 , RVV_VEC8_INST_LEN );
157
162
#endif
158
163
}
159
164
#endif /* ! FLB_SIMD_NONE */
@@ -170,8 +175,10 @@ static inline flb_vector8 flb_vector8_eq(const flb_vector8 v1, const flb_vector8
170
175
#elif defined(FLB_SIMD_NEON )
171
176
return vceqq_u8 (v1 , v2 );
172
177
#elif defined(FLB_SIMD_RVV )
173
- vbool8_t ret = __riscv_vmseq_vv_u8m1_b8 (v1 , v2 , 16 );
174
- return __riscv_vmerge_vvm_u8m1 (__riscv_vmv_v_x_u8m1 (0 , 16 ), __riscv_vmv_v_x_u8m1 (UINT8_MAX , 16 ), ret , 16 );
178
+ vbool8_t ret = __riscv_vmseq_vv_u8m1_b8 (v1 , v2 , RVV_VEC8_INST_LEN );
179
+ return __riscv_vmerge_vvm_u8m1 (__riscv_vmv_v_x_u8m1 (0 , RVV_VEC8_INST_LEN ),
180
+ __riscv_vmv_v_x_u8m1 (UINT8_MAX , RVV_VEC8_INST_LEN ),
181
+ ret , RVV_VEC8_INST_LEN );
175
182
#endif
176
183
}
177
184
#endif /* ! FLB_SIMD_NONE */
@@ -184,8 +191,10 @@ static inline flb_vector32 flb_vector32_eq(const flb_vector32 v1, const flb_vect
184
191
#elif defined(FLB_SIMD_NEON )
185
192
return vceqq_u32 (v1 , v2 );
186
193
#elif defined(FLB_SIMD_RVV )
187
- vbool32_t ret = __riscv_vmseq_vv_u32m1_b32 (v1 , v2 , 4 );
188
- return __riscv_vmerge_vvm_u32m1 (__riscv_vmv_v_x_u32m1 (0 , 4 ), __riscv_vmv_v_x_u32m1 (UINT32_MAX , 4 ), ret , 4 );
194
+ vbool32_t ret = __riscv_vmseq_vv_u32m1_b32 (v1 , v2 , RVV_VEC32_INST_LEN );
195
+ return __riscv_vmerge_vvm_u32m1 (__riscv_vmv_v_x_u32m1 (0 , RVV_VEC32_INST_LEN ),
196
+ __riscv_vmv_v_x_u32m1 (UINT32_MAX , RVV_VEC32_INST_LEN ),
197
+ ret , RVV_VEC32_INST_LEN );
189
198
#endif
190
199
}
191
200
#endif /* ! FLB_SIMD_NONE */
@@ -200,7 +209,7 @@ static inline flb_vector8 flb_vector8_broadcast(const uint8_t c)
200
209
#elif defined(FLB_SIMD_NEON )
201
210
return vdupq_n_u8 (c );
202
211
#elif defined(FLB_SIMD_RVV )
203
- return __riscv_vmv_v_x_u8m1 (c , 16 );
212
+ return __riscv_vmv_v_x_u8m1 (c , RVV_VEC8_INST_LEN );
204
213
#else
205
214
return ~UINT64CONST (0 ) / 0xFF * c ;
206
215
#endif
@@ -216,7 +225,9 @@ static inline bool flb_vector8_is_highbit_set(const flb_vector8 v)
216
225
#elif defined(FLB_SIMD_NEON )
217
226
return vmaxvq_u8 (v ) > 0x7F ;
218
227
#elif defined(FLB_SIMD_RVV )
219
- return __riscv_vmv_x_s_u8m1_u8 (__riscv_vredmaxu_vs_u8m1_u8m1 (v , __riscv_vmv_v_x_u8m1 (0 , 16 ), 16 ));
228
+ return __riscv_vmv_x_s_u8m1_u8 (__riscv_vredmaxu_vs_u8m1_u8m1 (v ,
229
+ __riscv_vmv_v_x_u8m1 (0 , RVV_VEC8_INST_LEN ),
230
+ RVV_VEC8_INST_LEN ));
220
231
#else
221
232
return v & flb_vector8_broadcast (0x80 );
222
233
#endif
0 commit comments