diff --git a/src/math/p_add.c b/src/math/p_add.c index a10dd1d..f041d94 100644 --- a/src/math/p_add.c +++ b/src/math/p_add.c @@ -16,11 +16,64 @@ * */ -void p_add_f32(const float *a, const float *b, float *c, int n) -{ - - int i; - for (i = 0; i < n; i++) { - *(c + i) = *(a + i) + *(b + i); - } -} +#define GEN_FUNC_ADD(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) + *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) + *(b + n); \ + } + +GEN_FUNC_ADD(p_add_f32,float); + +GEN_FUNC_ADD(p_add_int8,int8_t); +GEN_FUNC_ADD(p_add_uint8,uint8_t); + +GEN_FUNC_ADD(p_add_int16,int16_t); +GEN_FUNC_ADD(p_add_uint16,uint16_t); + +GEN_FUNC_ADD(p_add_int32,int32_t); +GEN_FUNC_ADD(p_add_uint32,uint32_t); + +GEN_FUNC_ADD(p_add_int64,int64_t); +GEN_FUNC_ADD(p_add_uint64,uint64_t); + +/** + * + * Element wise vector addition between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @return None + * + */ + +#define GEN_FUNC_ADDS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) + *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) + *(b); \ + } + +GEN_FUNC_ADDS(p_adds_f32,float); + +GEN_FUNC_ADDS(p_adds_int8,int8_t); +GEN_FUNC_ADDS(p_adds_uint8,uint8_t); + +GEN_FUNC_ADDS(p_adds_int16,int16_t); +GEN_FUNC_ADDS(p_adds_uint16,uint16_t); + +GEN_FUNC_ADDS(p_adds_int32,int32_t); +GEN_FUNC_ADDS(p_adds_uint32,uint32_t); + +GEN_FUNC_ADDS(p_adds_int64,int64_t); +GEN_FUNC_ADDS(p_adds_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_and.c b/src/math/p_and.c new file mode 100644 index 0000000..56871bb --- /dev/null +++ b/src/math/p_and.c @@ -0,0 +1,83 @@ +#include + +/** + * + * Element wise vector 'bitwise and' between input vectors 'a' and 'b' + * + * @param a Pointer to first input vector + * + * @param b Pointer to second input vector + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_AND(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) & *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) & *(b + n); \ + } + +GEN_FUNC_AND(p_and_int8,int8_t); +GEN_FUNC_AND(p_and_uint8,uint8_t); + +GEN_FUNC_AND(p_and_int16,int16_t); +GEN_FUNC_AND(p_and_uint16,uint16_t); + +GEN_FUNC_AND(p_and_int32,int32_t); +GEN_FUNC_AND(p_and_uint32,uint32_t); + +GEN_FUNC_AND(p_and_int64,int64_t); +GEN_FUNC_AND(p_and_uint64,uint64_t); + +/** + * + * Element wise vector 'bitwise and' between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_ANDS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) & *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) & *(b); \ + } + +GEN_FUNC_ANDS(p_ands_int8,int8_t); +GEN_FUNC_ANDS(p_ands_uint8,uint8_t); + +GEN_FUNC_ANDS(p_ands_int16,int16_t); +GEN_FUNC_ANDS(p_ands_uint16,uint16_t); + +GEN_FUNC_ANDS(p_ands_int32,int32_t); +GEN_FUNC_ANDS(p_ands_uint32,uint32_t); + +GEN_FUNC_ANDS(p_ands_int64,int64_t); +GEN_FUNC_ANDS(p_ands_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_div.c b/src/math/p_div.c index 814e04f..0d991ae 100644 --- a/src/math/p_div.c +++ b/src/math/p_div.c @@ -16,11 +16,66 @@ * */ -void p_div_f32(const float *a, const float *b, float *c, int n) -{ - int i; - for (i = 0; i < n; i++) { - *(c + i) = *(a + i) / *(b + i); +#define GEN_FUNC_DIV(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) / *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) / *(b + n); \ } -} + +GEN_FUNC_DIV(p_div_f32,float); + +GEN_FUNC_DIV(p_div_int8,int8_t); +GEN_FUNC_DIV(p_div_uint8,uint8_t); + +GEN_FUNC_DIV(p_div_int16,int16_t); +GEN_FUNC_DIV(p_div_uint16,uint16_t); + +GEN_FUNC_DIV(p_div_int32,int32_t); +GEN_FUNC_DIV(p_div_uint32,uint32_t); + +GEN_FUNC_DIV(p_div_int64,int64_t); +GEN_FUNC_DIV(p_div_uint64,uint64_t); + +/** + * + * Element wise vector division between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @return None + * + */ + +#define GEN_FUNC_DIVS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + float t = 1.0f / *(b); \ + *(c) = *(a) * t; \ + for (;--n;) \ + *(c + n) = *(a + n) * t; \ + } + +GEN_FUNC_DIVS(p_divs_f32,float); + +GEN_FUNC_DIVS(p_divs_int8,int8_t); +GEN_FUNC_DIVS(p_divs_uint8,uint8_t); + +GEN_FUNC_DIVS(p_divs_int16,int16_t); +GEN_FUNC_DIVS(p_divs_uint16,uint16_t); + +GEN_FUNC_DIVS(p_divs_int32,int32_t); +GEN_FUNC_DIVS(p_divs_uint32,uint32_t); + +GEN_FUNC_DIVS(p_divs_int64,int64_t); +GEN_FUNC_DIVS(p_divs_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_mul.c b/src/math/p_mul.c index 57af389..1315c7d 100644 --- a/src/math/p_mul.c +++ b/src/math/p_mul.c @@ -16,12 +16,65 @@ * */ -void p_mul_f32(const float *a, const float *b, float *c, - int n) -{ - int i; - for (i = 0; i < n; i++) { - *(c + i) = *(a + i) * *(b + i); +#define GEN_FUNC_MUL(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) * *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) * *(b + n); \ } -} + +GEN_FUNC_MUL(p_mul_f32,float); + +GEN_FUNC_MUL(p_mul_int8,int8_t); +GEN_FUNC_MUL(p_mul_uint8,uint8_t); + +GEN_FUNC_MUL(p_mul_int16,int16_t); +GEN_FUNC_MUL(p_mul_uint16,uint16_t); + +GEN_FUNC_MUL(p_mul_int32,int32_t); +GEN_FUNC_MUL(p_mul_uint32,uint32_t); + +GEN_FUNC_MUL(p_mul_int64,int64_t); +GEN_FUNC_MUL(p_mul_uint64,uint64_t); + +/** + * + * Element wise vector multiplication between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @return None + * + */ + +#define GEN_FUNC_MULS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) * *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) * *(b); \ + } + +GEN_FUNC_MULS(p_muls_f32,float); + +GEN_FUNC_MULS(p_muls_int8,int8_t); +GEN_FUNC_MULS(p_muls_uint8,uint8_t); + +GEN_FUNC_MULS(p_muls_int16,int16_t); +GEN_FUNC_MULS(p_muls_uint16,uint16_t); + +GEN_FUNC_MULS(p_muls_int32,int32_t); +GEN_FUNC_MULS(p_muls_uint32,uint32_t); + +GEN_FUNC_MULS(p_muls_int64,int64_t); +GEN_FUNC_MULS(p_muls_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_not.c b/src/math/p_not.c new file mode 100644 index 0000000..1a0f3fa --- /dev/null +++ b/src/math/p_not.c @@ -0,0 +1,41 @@ +#include + +/** + * + * Element wise vector 'bitwise not' of input vector 'a' + * + * @param a Pointer to input vector + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, TYPE * restrict c, int n) \ + { \ + *c = ~*(a); \ + for (;--n;) \ + *(c + n) = ~*(a + n); \ + } + + +GEN_FUNC(p_not_int8,int8_t); +GEN_FUNC(p_not_uint8,uint8_t); + +GEN_FUNC(p_not_int16,int16_t); +GEN_FUNC(p_not_uint16,uint16_t); + +GEN_FUNC(p_not_int32,int32_t); +GEN_FUNC(p_not_uint32,uint32_t); + +GEN_FUNC(p_not_int64,int64_t); +GEN_FUNC(p_not_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_or.c b/src/math/p_or.c new file mode 100644 index 0000000..5a4215e --- /dev/null +++ b/src/math/p_or.c @@ -0,0 +1,83 @@ +#include + +/** + * + * Element wise vector 'bitwise or' between input vectors 'a' and 'b' + * + * @param a Pointer to first input vector + * + * @param b Pointer to second input vector + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_OR(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) | *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) | *(b + n); \ + } + +GEN_FUNC_OR(p_or_int8,int8_t); +GEN_FUNC_OR(p_or_uint8,uint8_t); + +GEN_FUNC_OR(p_or_int16,int16_t); +GEN_FUNC_OR(p_or_uint16,uint16_t); + +GEN_FUNC_OR(p_or_int32,int32_t); +GEN_FUNC_OR(p_or_uint32,uint32_t); + +GEN_FUNC_OR(p_or_int64,int64_t); +GEN_FUNC_OR(p_or_uint64,uint64_t); + +/** + * + * Element wise vector 'bitwise or' between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_ORS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) | *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) | *(b); \ + } + +GEN_FUNC_ORS(p_ors_int8,int8_t); +GEN_FUNC_ORS(p_ors_uint8,uint8_t); + +GEN_FUNC_ORS(p_ors_int16,int16_t); +GEN_FUNC_ORS(p_ors_uint16,uint16_t); + +GEN_FUNC_ORS(p_ors_int32,int32_t); +GEN_FUNC_ORS(p_ors_uint32,uint32_t); + +GEN_FUNC_ORS(p_ors_int64,int64_t); +GEN_FUNC_ORS(p_ors_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_sub.c b/src/math/p_sub.c index b5bea91..19d8268 100644 --- a/src/math/p_sub.c +++ b/src/math/p_sub.c @@ -16,11 +16,65 @@ * */ -void p_sub_f32(const float *a, const float *b, float *c, int n) -{ - - int i; - for (i = 0; i < n; i++) { - *(c + i) = *(a + i) - *(b + i); - } -} + +#define GEN_FUNC_SUB(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) - *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) - *(b + n); \ + } + +GEN_FUNC_SUB(p_sub_f32,float); + +GEN_FUNC_SUB(p_sub_int8,int8_t); +GEN_FUNC_SUB(p_sub_uint8,uint8_t); + +GEN_FUNC_SUB(p_sub_int16,int16_t); +GEN_FUNC_SUB(p_sub_uint16,uint16_t); + +GEN_FUNC_SUB(p_sub_int32,int32_t); +GEN_FUNC_SUB(p_sub_uint32,uint32_t); + +GEN_FUNC_SUB(p_sub_int64,int64_t); +GEN_FUNC_SUB(p_sub_uint64,uint64_t); + +/** + * + * Element wise vector substraction between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @return None + * + */ + +#define GEN_FUNC_SUBS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) - *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) - *(b); \ + } + +GEN_FUNC_SUBS(p_subs_f32,float); + +GEN_FUNC_SUBS(p_subs_int8,int8_t); +GEN_FUNC_SUBS(p_subs_uint8,uint8_t); + +GEN_FUNC_SUBS(p_subs_int16,int16_t); +GEN_FUNC_SUBS(p_subs_uint16,uint16_t); + +GEN_FUNC_SUBS(p_subs_int32,int32_t); +GEN_FUNC_SUBS(p_subs_uint32,uint32_t); + +GEN_FUNC_SUBS(p_subs_int64,int64_t); +GEN_FUNC_SUBS(p_subs_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_sum.c b/src/math/p_sum.c index 4c3b976..fd1f715 100644 --- a/src/math/p_sum.c +++ b/src/math/p_sum.c @@ -14,13 +14,25 @@ * */ -void p_sum_f32(const float *a, float *c, int n) -{ - float tmp = 0.0f; - int i; - - for (i = 0; i < n; i++) { - tmp += *(a + i); +#define GEN_FUNC(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, TYPE * restrict c, int n) \ + { \ + *c = *(a); \ + for (;--n;) \ + *c += *(a + n); \ } - *c = tmp; -} + +GEN_FUNC(p_sum_f32,float); + +GEN_FUNC(p_sum_int8,int8_t); +GEN_FUNC(p_sum_uint8,uint8_t); + +GEN_FUNC(p_sum_int16,int16_t); +GEN_FUNC(p_sum_uint16,uint16_t); + +GEN_FUNC(p_sum_int32,int32_t); +GEN_FUNC(p_sum_uint32,uint32_t); + +GEN_FUNC(p_sum_int64,int64_t); +GEN_FUNC(p_sum_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_sumsq.c b/src/math/p_sumsq.c index 7849caf..007bfb9 100644 --- a/src/math/p_sumsq.c +++ b/src/math/p_sumsq.c @@ -2,7 +2,7 @@ /** * - * Calculates the sum of the square of all of the elements vector 'a'. + * Calculates the sumsq of the square of all of the elements vector 'a'. * * @param a Pointer to input vector * @@ -14,13 +14,25 @@ * */ -void p_sumsq_f32(const float *a, float *c, int n) -{ - float tmp = 0.0f; - int i; - - for (i = 0; i < n; i++) { - tmp += *(a + i) * *(a + i); +#define GEN_FUNC(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, TYPE * restrict c, int n) \ + { \ + *c = *(a) * *(a); \ + for (;--n;) \ + *c += *(a + n) * *(a + n); \ } - *c = tmp; -} + +GEN_FUNC(p_sumsq_f32,float); + +GEN_FUNC(p_sumsq_int8,int8_t); +GEN_FUNC(p_sumsq_uint8,uint8_t); + +GEN_FUNC(p_sumsq_int16,int16_t); +GEN_FUNC(p_sumsq_uint16,uint16_t); + +GEN_FUNC(p_sumsq_int32,int32_t); +GEN_FUNC(p_sumsq_uint32,uint32_t); + +GEN_FUNC(p_sumsq_int64,int64_t); +GEN_FUNC(p_sumsq_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_xor.c b/src/math/p_xor.c new file mode 100644 index 0000000..379a878 --- /dev/null +++ b/src/math/p_xor.c @@ -0,0 +1,83 @@ +#include + +/** + * + * Element wise vector 'bitwise xor' between input vectors 'a' and 'b' + * + * @param a Pointer to first input vector + * + * @param b Pointer to second input vector + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_XOR(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) ^ *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) ^ *(b + n); \ + } + +GEN_FUNC_XOR(p_xor_int8,int8_t); +GEN_FUNC_XOR(p_xor_uint8,uint8_t); + +GEN_FUNC_XOR(p_xor_int16,int16_t); +GEN_FUNC_XOR(p_xor_uint16,uint16_t); + +GEN_FUNC_XOR(p_xor_int32,int32_t); +GEN_FUNC_XOR(p_xor_uint32,uint32_t); + +GEN_FUNC_XOR(p_xor_int64,int64_t); +GEN_FUNC_XOR(p_xor_uint64,uint64_t); + +/** + * + * Element wise vector 'bitwise xor' between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_XORS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) ^ *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) ^ *(b); \ + } + +GEN_FUNC_XORS(p_xors_int8,int8_t); +GEN_FUNC_XORS(p_xors_uint8,uint8_t); + +GEN_FUNC_XORS(p_xors_int16,int16_t); +GEN_FUNC_XORS(p_xors_uint16,uint16_t); + +GEN_FUNC_XORS(p_xors_int32,int32_t); +GEN_FUNC_XORS(p_xors_uint32,uint32_t); + +GEN_FUNC_XORS(p_xors_int64,int64_t); +GEN_FUNC_XORS(p_xors_uint64,uint64_t); \ No newline at end of file