From 28a3570424e3317896cf6cb70685f3fc3f6595a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=C3=B6hm?= Date: Wed, 17 Jun 2015 01:55:45 +0200 Subject: [PATCH 1/6] Wrote the sum and sum square functions as a template and added instances for each variable type. --- src/math/p_sum.c | 30 +++++++++++++++++++++--------- src/math/p_sumsq.c | 32 ++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/src/math/p_sum.c b/src/math/p_sum.c index 4c3b976..f8abf5f 100644 --- a/src/math/p_sum.c +++ b/src/math/p_sum.c @@ -14,13 +14,25 @@ * */ -void p_sum_f32(const float *a, float *c, int n) -{ - float tmp = 0.0f; - int i; - - for (i = 0; i < n; i++) { - tmp += *(a + i); +#define GEN_FUNC(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE *a, TYPE * restrict c, int n) \ + { \ + *c = *(a); \ + for (;--n;) \ + *c += *(a + n); \ } - *c = tmp; -} + +GEN_FUNC(p_sum_f32,float); + +GEN_FUNC(p_sum_int8,int8_t); +GEN_FUNC(p_sum_uint8,uint8_t); + +GEN_FUNC(p_sum_int16,int16_t); +GEN_FUNC(p_sum_uint16,uint16_t); + +GEN_FUNC(p_sum_int32,int32_t); +GEN_FUNC(p_sum_uint32,uint32_t); + +GEN_FUNC(p_sum_int64,int64_t); +GEN_FUNC(p_sum_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_sumsq.c b/src/math/p_sumsq.c index 7849caf..55c9729 100644 --- a/src/math/p_sumsq.c +++ b/src/math/p_sumsq.c @@ -2,7 +2,7 @@ /** * - * Calculates the sum of the square of all of the elements vector 'a'. + * Calculates the sumsq of the square of all of the elements vector 'a'. * * @param a Pointer to input vector * @@ -14,13 +14,25 @@ * */ -void p_sumsq_f32(const float *a, float *c, int n) -{ - float tmp = 0.0f; - int i; - - for (i = 0; i < n; i++) { - tmp += *(a + i) * *(a + i); +#define GEN_FUNC(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE *a, TYPE * restrict c, int n) \ + { \ + *c = *(a) * *(a); \ + for (;--n;) \ + *c += *(a + n) * *(a + n); \ } - *c = tmp; -} + +GEN_FUNC(p_sumsq_f32,float); + +GEN_FUNC(p_sumsq_int8,int8_t); +GEN_FUNC(p_sumsq_uint8,uint8_t); + +GEN_FUNC(p_sumsq_int16,int16_t); +GEN_FUNC(p_sumsq_uint16,uint16_t); + +GEN_FUNC(p_sumsq_int32,int32_t); +GEN_FUNC(p_sumsq_uint32,uint32_t); + +GEN_FUNC(p_sumsq_int64,int64_t); +GEN_FUNC(p_sumsq_uint64,uint64_t); \ No newline at end of file From 2d83de3ad0e8be67489c84707d8641c5bcd15c1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=C3=B6hm?= Date: Wed, 17 Jun 2015 02:01:24 +0200 Subject: [PATCH 2/6] Added restrict keyword on the input vector. --- src/math/p_sum.c | 2 +- src/math/p_sumsq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/math/p_sum.c b/src/math/p_sum.c index f8abf5f..fd1f715 100644 --- a/src/math/p_sum.c +++ b/src/math/p_sum.c @@ -16,7 +16,7 @@ #define GEN_FUNC(NAME,TYPE) \ /** NAME TYPE */ \ - void NAME(const TYPE *a, TYPE * restrict c, int n) \ + void NAME(const TYPE * restrict a, TYPE * restrict c, int n) \ { \ *c = *(a); \ for (;--n;) \ diff --git a/src/math/p_sumsq.c b/src/math/p_sumsq.c index 55c9729..007bfb9 100644 --- a/src/math/p_sumsq.c +++ b/src/math/p_sumsq.c @@ -16,7 +16,7 @@ #define GEN_FUNC(NAME,TYPE) \ /** NAME TYPE */ \ - void NAME(const TYPE *a, TYPE * restrict c, int n) \ + void NAME(const TYPE * restrict a, TYPE * restrict c, int n) \ { \ *c = *(a) * *(a); \ for (;--n;) \ From 207f87585499cc5382e39fddfe4c0acaf822a841 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=C3=B6hm?= Date: Wed, 17 Jun 2015 02:18:19 +0200 Subject: [PATCH 3/6] Fixed description for scalar function. --- src/math/p_add.c | 69 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/src/math/p_add.c b/src/math/p_add.c index a10dd1d..35745cf 100644 --- a/src/math/p_add.c +++ b/src/math/p_add.c @@ -16,11 +16,64 @@ * */ -void p_add_f32(const float *a, const float *b, float *c, int n) -{ - - int i; - for (i = 0; i < n; i++) { - *(c + i) = *(a + i) + *(b + i); - } -} +#define GEN_FUNC_ADD(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) + *(b); \ + for (;--n;) \ + *(c + n) += *(a + n) + *(b + n); \ + } + +GEN_FUNC_ADD(p_add_f32,float); + +GEN_FUNC_ADD(p_add_int8,int8_t); +GEN_FUNC_ADD(p_add_uint8,uint8_t); + +GEN_FUNC_ADD(p_add_int16,int16_t); +GEN_FUNC_ADD(p_add_uint16,uint16_t); + +GEN_FUNC_ADD(p_add_int32,int32_t); +GEN_FUNC_ADD(p_add_uint32,uint32_t); + +GEN_FUNC_ADD(p_add_int64,int64_t); +GEN_FUNC_ADD(p_add_uint64,uint64_t); + +/** + * + * Element wise vector addition between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @return None + * + */ + +#define GEN_FUNC_ADDS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) + *(b); \ + for (;--n;) \ + *(c + n) += *(a + n) + *(b); \ + } + +GEN_FUNC_ADDS(p_adds_f32,float); + +GEN_FUNC_ADDS(p_adds_int8,int8_t); +GEN_FUNC_ADDS(p_adds_uint8,uint8_t); + +GEN_FUNC_ADDS(p_adds_int16,int16_t); +GEN_FUNC_ADDS(p_adds_uint16,uint16_t); + +GEN_FUNC_ADDS(p_adds_int32,int32_t); +GEN_FUNC_ADDS(p_adds_uint32,uint32_t); + +GEN_FUNC_ADDS(p_adds_int64,int64_t); +GEN_FUNC_ADDS(p_adds_uint64,uint64_t); \ No newline at end of file From b81ca3d1748e83f82b01cd4c4f4afbd14daa8b30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=C3=B6hm?= Date: Wed, 17 Jun 2015 02:29:08 +0200 Subject: [PATCH 4/6] Vector-vector and vector-scalar basic math operations implemented as templates for all data types. --- src/math/p_add.c | 4 +-- src/math/p_div.c | 67 ++++++++++++++++++++++++++++++++++++++++----- src/math/p_mul.c | 67 ++++++++++++++++++++++++++++++++++++++++----- src/math/p_sub.c | 70 ++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 185 insertions(+), 23 deletions(-) diff --git a/src/math/p_add.c b/src/math/p_add.c index 35745cf..f041d94 100644 --- a/src/math/p_add.c +++ b/src/math/p_add.c @@ -22,7 +22,7 @@ { \ *(c) = *(a) + *(b); \ for (;--n;) \ - *(c + n) += *(a + n) + *(b + n); \ + *(c + n) = *(a + n) + *(b + n); \ } GEN_FUNC_ADD(p_add_f32,float); @@ -61,7 +61,7 @@ GEN_FUNC_ADD(p_add_uint64,uint64_t); { \ *(c) = *(a) + *(b); \ for (;--n;) \ - *(c + n) += *(a + n) + *(b); \ + *(c + n) = *(a + n) + *(b); \ } GEN_FUNC_ADDS(p_adds_f32,float); diff --git a/src/math/p_div.c b/src/math/p_div.c index 814e04f..0d991ae 100644 --- a/src/math/p_div.c +++ b/src/math/p_div.c @@ -16,11 +16,66 @@ * */ -void p_div_f32(const float *a, const float *b, float *c, int n) -{ - int i; - for (i = 0; i < n; i++) { - *(c + i) = *(a + i) / *(b + i); +#define GEN_FUNC_DIV(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) / *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) / *(b + n); \ } -} + +GEN_FUNC_DIV(p_div_f32,float); + +GEN_FUNC_DIV(p_div_int8,int8_t); +GEN_FUNC_DIV(p_div_uint8,uint8_t); + +GEN_FUNC_DIV(p_div_int16,int16_t); +GEN_FUNC_DIV(p_div_uint16,uint16_t); + +GEN_FUNC_DIV(p_div_int32,int32_t); +GEN_FUNC_DIV(p_div_uint32,uint32_t); + +GEN_FUNC_DIV(p_div_int64,int64_t); +GEN_FUNC_DIV(p_div_uint64,uint64_t); + +/** + * + * Element wise vector division between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @return None + * + */ + +#define GEN_FUNC_DIVS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + float t = 1.0f / *(b); \ + *(c) = *(a) * t; \ + for (;--n;) \ + *(c + n) = *(a + n) * t; \ + } + +GEN_FUNC_DIVS(p_divs_f32,float); + +GEN_FUNC_DIVS(p_divs_int8,int8_t); +GEN_FUNC_DIVS(p_divs_uint8,uint8_t); + +GEN_FUNC_DIVS(p_divs_int16,int16_t); +GEN_FUNC_DIVS(p_divs_uint16,uint16_t); + +GEN_FUNC_DIVS(p_divs_int32,int32_t); +GEN_FUNC_DIVS(p_divs_uint32,uint32_t); + +GEN_FUNC_DIVS(p_divs_int64,int64_t); +GEN_FUNC_DIVS(p_divs_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_mul.c b/src/math/p_mul.c index 57af389..1315c7d 100644 --- a/src/math/p_mul.c +++ b/src/math/p_mul.c @@ -16,12 +16,65 @@ * */ -void p_mul_f32(const float *a, const float *b, float *c, - int n) -{ - int i; - for (i = 0; i < n; i++) { - *(c + i) = *(a + i) * *(b + i); +#define GEN_FUNC_MUL(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) * *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) * *(b + n); \ } -} + +GEN_FUNC_MUL(p_mul_f32,float); + +GEN_FUNC_MUL(p_mul_int8,int8_t); +GEN_FUNC_MUL(p_mul_uint8,uint8_t); + +GEN_FUNC_MUL(p_mul_int16,int16_t); +GEN_FUNC_MUL(p_mul_uint16,uint16_t); + +GEN_FUNC_MUL(p_mul_int32,int32_t); +GEN_FUNC_MUL(p_mul_uint32,uint32_t); + +GEN_FUNC_MUL(p_mul_int64,int64_t); +GEN_FUNC_MUL(p_mul_uint64,uint64_t); + +/** + * + * Element wise vector multiplication between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @return None + * + */ + +#define GEN_FUNC_MULS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) * *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) * *(b); \ + } + +GEN_FUNC_MULS(p_muls_f32,float); + +GEN_FUNC_MULS(p_muls_int8,int8_t); +GEN_FUNC_MULS(p_muls_uint8,uint8_t); + +GEN_FUNC_MULS(p_muls_int16,int16_t); +GEN_FUNC_MULS(p_muls_uint16,uint16_t); + +GEN_FUNC_MULS(p_muls_int32,int32_t); +GEN_FUNC_MULS(p_muls_uint32,uint32_t); + +GEN_FUNC_MULS(p_muls_int64,int64_t); +GEN_FUNC_MULS(p_muls_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_sub.c b/src/math/p_sub.c index b5bea91..19d8268 100644 --- a/src/math/p_sub.c +++ b/src/math/p_sub.c @@ -16,11 +16,65 @@ * */ -void p_sub_f32(const float *a, const float *b, float *c, int n) -{ - - int i; - for (i = 0; i < n; i++) { - *(c + i) = *(a + i) - *(b + i); - } -} + +#define GEN_FUNC_SUB(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) - *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) - *(b + n); \ + } + +GEN_FUNC_SUB(p_sub_f32,float); + +GEN_FUNC_SUB(p_sub_int8,int8_t); +GEN_FUNC_SUB(p_sub_uint8,uint8_t); + +GEN_FUNC_SUB(p_sub_int16,int16_t); +GEN_FUNC_SUB(p_sub_uint16,uint16_t); + +GEN_FUNC_SUB(p_sub_int32,int32_t); +GEN_FUNC_SUB(p_sub_uint32,uint32_t); + +GEN_FUNC_SUB(p_sub_int64,int64_t); +GEN_FUNC_SUB(p_sub_uint64,uint64_t); + +/** + * + * Element wise vector substraction between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @return None + * + */ + +#define GEN_FUNC_SUBS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) - *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) - *(b); \ + } + +GEN_FUNC_SUBS(p_subs_f32,float); + +GEN_FUNC_SUBS(p_subs_int8,int8_t); +GEN_FUNC_SUBS(p_subs_uint8,uint8_t); + +GEN_FUNC_SUBS(p_subs_int16,int16_t); +GEN_FUNC_SUBS(p_subs_uint16,uint16_t); + +GEN_FUNC_SUBS(p_subs_int32,int32_t); +GEN_FUNC_SUBS(p_subs_uint32,uint32_t); + +GEN_FUNC_SUBS(p_subs_int64,int64_t); +GEN_FUNC_SUBS(p_subs_uint64,uint64_t); \ No newline at end of file From 90e0cfc64db07a2137c800918b004b2ea35aa5f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=C3=B6hm?= Date: Wed, 17 Jun 2015 02:44:40 +0200 Subject: [PATCH 5/6] Bitwise logic operator templates/instances for all suported types. --- src/math/p_and.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ src/math/p_not.c | 41 ++++++++++++++++++++++++ src/math/p_or.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ src/math/p_xor.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 290 insertions(+) create mode 100644 src/math/p_and.c create mode 100644 src/math/p_not.c create mode 100644 src/math/p_or.c create mode 100644 src/math/p_xor.c diff --git a/src/math/p_and.c b/src/math/p_and.c new file mode 100644 index 0000000..56871bb --- /dev/null +++ b/src/math/p_and.c @@ -0,0 +1,83 @@ +#include + +/** + * + * Element wise vector 'bitwise and' between input vectors 'a' and 'b' + * + * @param a Pointer to first input vector + * + * @param b Pointer to second input vector + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_AND(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) & *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) & *(b + n); \ + } + +GEN_FUNC_AND(p_and_int8,int8_t); +GEN_FUNC_AND(p_and_uint8,uint8_t); + +GEN_FUNC_AND(p_and_int16,int16_t); +GEN_FUNC_AND(p_and_uint16,uint16_t); + +GEN_FUNC_AND(p_and_int32,int32_t); +GEN_FUNC_AND(p_and_uint32,uint32_t); + +GEN_FUNC_AND(p_and_int64,int64_t); +GEN_FUNC_AND(p_and_uint64,uint64_t); + +/** + * + * Element wise vector 'bitwise and' between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_ANDS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) & *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) & *(b); \ + } + +GEN_FUNC_ANDS(p_ands_int8,int8_t); +GEN_FUNC_ANDS(p_ands_uint8,uint8_t); + +GEN_FUNC_ANDS(p_ands_int16,int16_t); +GEN_FUNC_ANDS(p_ands_uint16,uint16_t); + +GEN_FUNC_ANDS(p_ands_int32,int32_t); +GEN_FUNC_ANDS(p_ands_uint32,uint32_t); + +GEN_FUNC_ANDS(p_ands_int64,int64_t); +GEN_FUNC_ANDS(p_ands_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_not.c b/src/math/p_not.c new file mode 100644 index 0000000..8840011 --- /dev/null +++ b/src/math/p_not.c @@ -0,0 +1,41 @@ +#include + +/** + * + * Element wise vector 'bitwise not' of input vector 'a' + * + * @param a Pointer to input vector + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, TYPE * restrict c, int n) \ + { \ + *c = ~*(a); \ + for (;--n;) \ + *c = ~*(a + n); \ + } + + +GEN_FUNC(p_not_int8,int8_t); +GEN_FUNC(p_not_uint8,uint8_t); + +GEN_FUNC(p_not_int16,int16_t); +GEN_FUNC(p_not_uint16,uint16_t); + +GEN_FUNC(p_not_int32,int32_t); +GEN_FUNC(p_not_uint32,uint32_t); + +GEN_FUNC(p_not_int64,int64_t); +GEN_FUNC(p_not_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_or.c b/src/math/p_or.c new file mode 100644 index 0000000..5a4215e --- /dev/null +++ b/src/math/p_or.c @@ -0,0 +1,83 @@ +#include + +/** + * + * Element wise vector 'bitwise or' between input vectors 'a' and 'b' + * + * @param a Pointer to first input vector + * + * @param b Pointer to second input vector + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_OR(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) | *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) | *(b + n); \ + } + +GEN_FUNC_OR(p_or_int8,int8_t); +GEN_FUNC_OR(p_or_uint8,uint8_t); + +GEN_FUNC_OR(p_or_int16,int16_t); +GEN_FUNC_OR(p_or_uint16,uint16_t); + +GEN_FUNC_OR(p_or_int32,int32_t); +GEN_FUNC_OR(p_or_uint32,uint32_t); + +GEN_FUNC_OR(p_or_int64,int64_t); +GEN_FUNC_OR(p_or_uint64,uint64_t); + +/** + * + * Element wise vector 'bitwise or' between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_ORS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) | *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) | *(b); \ + } + +GEN_FUNC_ORS(p_ors_int8,int8_t); +GEN_FUNC_ORS(p_ors_uint8,uint8_t); + +GEN_FUNC_ORS(p_ors_int16,int16_t); +GEN_FUNC_ORS(p_ors_uint16,uint16_t); + +GEN_FUNC_ORS(p_ors_int32,int32_t); +GEN_FUNC_ORS(p_ors_uint32,uint32_t); + +GEN_FUNC_ORS(p_ors_int64,int64_t); +GEN_FUNC_ORS(p_ors_uint64,uint64_t); \ No newline at end of file diff --git a/src/math/p_xor.c b/src/math/p_xor.c new file mode 100644 index 0000000..379a878 --- /dev/null +++ b/src/math/p_xor.c @@ -0,0 +1,83 @@ +#include + +/** + * + * Element wise vector 'bitwise xor' between input vectors 'a' and 'b' + * + * @param a Pointer to first input vector + * + * @param b Pointer to second input vector + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_XOR(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) ^ *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) ^ *(b + n); \ + } + +GEN_FUNC_XOR(p_xor_int8,int8_t); +GEN_FUNC_XOR(p_xor_uint8,uint8_t); + +GEN_FUNC_XOR(p_xor_int16,int16_t); +GEN_FUNC_XOR(p_xor_uint16,uint16_t); + +GEN_FUNC_XOR(p_xor_int32,int32_t); +GEN_FUNC_XOR(p_xor_uint32,uint32_t); + +GEN_FUNC_XOR(p_xor_int64,int64_t); +GEN_FUNC_XOR(p_xor_uint64,uint64_t); + +/** + * + * Element wise vector 'bitwise xor' between input vector 'a' and scalar 'b' + * + * @param a Pointer to input vector + * + * @param b Pointer to input scalar + * + * @param c Pointer to output vector + * + * @param n Size of 'a' and 'c' vector. + * + * @param p Number of processor to use (task parallelism) + * + * @param team Team to work with + * + * @return None + * + */ + +#define GEN_FUNC_XORS(NAME,TYPE) \ + /** NAME TYPE */ \ + void NAME(const TYPE * restrict a, const TYPE * restrict b, TYPE * restrict c, int n) \ + { \ + *(c) = *(a) ^ *(b); \ + for (;--n;) \ + *(c + n) = *(a + n) ^ *(b); \ + } + +GEN_FUNC_XORS(p_xors_int8,int8_t); +GEN_FUNC_XORS(p_xors_uint8,uint8_t); + +GEN_FUNC_XORS(p_xors_int16,int16_t); +GEN_FUNC_XORS(p_xors_uint16,uint16_t); + +GEN_FUNC_XORS(p_xors_int32,int32_t); +GEN_FUNC_XORS(p_xors_uint32,uint32_t); + +GEN_FUNC_XORS(p_xors_int64,int64_t); +GEN_FUNC_XORS(p_xors_uint64,uint64_t); \ No newline at end of file From 9591a95dca759dabbc1ac3658f780812a6a2c21f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=C3=B6hm?= Date: Tue, 23 Jun 2015 00:11:57 +0200 Subject: [PATCH 6/6] Fixed missing index in output vector. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Thomas Böhm --- src/math/p_not.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/math/p_not.c b/src/math/p_not.c index 8840011..1a0f3fa 100644 --- a/src/math/p_not.c +++ b/src/math/p_not.c @@ -24,7 +24,7 @@ { \ *c = ~*(a); \ for (;--n;) \ - *c = ~*(a + n); \ + *(c + n) = ~*(a + n); \ }