diff --git a/README.md b/README.md index d0ef86c..23ccb7c 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,7 @@ FUNCTION | NOTES [p_cosh()](src/math/p_cosh.c) | hyperbolic cosine [p_div()](src/math/p_div.c) | division [p_dot()](src/math/p_dot.c) | dot product +[p_erf()](src/math/p_erf.c) | error function [p_exp()](src/math/p_exp.c) | exponential [p_ftoi()](src/math/p_ftoi.c) | float to [p_itof()](src/math/p_itof.c) | integer to float conversion diff --git a/include/pal_math.h b/include/pal_math.h index 46713b7..49824c7 100644 --- a/include/pal_math.h +++ b/include/pal_math.h @@ -142,6 +142,9 @@ void p_cosh_f32(const float *a, float *c, int n); /*division: c = a ./ b */ void p_div_f32(const float *a, const float *b, float *c, int n); +/*error function: c = erf ( a ) */ +void p_erf_f32(const float *a, float *c, int n); + /*exponential: c = exp ( a ) */ void p_exp_f32(const float *a, float *c, int n); diff --git a/src/math/Makefile.am b/src/math/Makefile.am index 0ca5d63..38f57f5 100644 --- a/src/math/Makefile.am +++ b/src/math/Makefile.am @@ -20,7 +20,7 @@ libpal_math_la_SOURCES = \ p_cosh.c \ p_div.c \ p_dot.c \ - p_erff.c \ + p_erf.c \ p_exp.c p_exp.h \ p_ftoi.c \ p_inv.c \ diff --git a/src/math/p_erf.c b/src/math/p_erf.c new file mode 100644 index 0000000..1453a31 --- /dev/null +++ b/src/math/p_erf.c @@ -0,0 +1,46 @@ +#include +/** +* +* Calculate error function +* +* @param a Pointer to input vector +* +* @param c Pointer to output vector +* +* @param n Size of 'a' and 'c' vector. +* +* @return None +* +*/ + +void p_erf_f32(const float *a, float *c, int n) + +{ + int i; int sign; float x; + for ( i = 0; i < n; i++) + { + const float *pa = (a+i); + float *pc = (c+i); + + // the computation is based on formula 7.1.28 from Abramowitz and Stegun for a >= 0 + + if (*pa >= 0) sign = 1.0; else sign = -1.0; // store sign of input variable + if (*pa < 0) x = *pa * (-1.0f); else x = *pa; // this is equivalent of x=abs(a) + if (x >= 3.6f) *pc = 1.0f * sign; // c = 1 * sign if a >=3.6 else calculate error function + else + { + // constants for formula 7.1.28 from Abramowitz and Stegun + float a1 = 0.0705230784f; + float a2 = 0.0422820123f; + float a3 = 0.0092705272f; + float a4 = 0.0001520143f; + float a5 = 0.0002765672f; + float a6 = 0.0000430638f; + + //formula 7.1.28 from Abramowitz and Stegun + float t = 1.0f / ((((((a6 * x + a5) * x + a4) * x + a3) * x + a2) * x + a1) * x + 1.0f); + float z = 1.0f - (t * t * t * t * t * t * t * t * t * t * t * t * t * t * t * t) ; + *pc = z*sign; + } + } +} diff --git a/tests/math/Makefile.am b/tests/math/Makefile.am index 67400c0..01aab93 100644 --- a/tests/math/Makefile.am +++ b/tests/math/Makefile.am @@ -32,6 +32,7 @@ BUILT_SOURCES = \ gold/p_cosh_f32.gold.h \ gold/p_div_f32.gold.h \ gold/p_dot_f32.gold.h \ + gold/p_erf_f32.gold.h \ gold/p_exp_f32.gold.h \ gold/p_invcbrt_f32.gold.h \ gold/p_inv_f32.gold.h \ @@ -85,6 +86,7 @@ check_PROGRAMS = \ check_p_cosh_f32 \ check_p_div_f32 \ check_p_dot_f32 \ + check_p_erf_f32 \ check_p_exp_f32 \ check_p_ftoi \ check_p_invcbrt_f32 \ @@ -130,6 +132,7 @@ check_p_cos_f32_SOURCES = $(SIMPLE) p_cos.c check_p_cosh_f32_SOURCES = $(SIMPLE) p_cosh.c check_p_div_f32_SOURCES = $(SIMPLE) check_p_dot_f32_SOURCES = $(SIMPLE) +check_p_erf_f32_SOURCES = $(SIMPLE) p_erf.c check_p_exp_f32_SOURCES = $(SIMPLE) p_exp.c check_p_ftoi_SOURCES = notest.c check_p_inv_f32_SOURCES = $(SIMPLE) @@ -176,6 +179,7 @@ check_p_cos_f32_CFLAGS = -DFUNCTION=p_cos_f32 -DIS_UNARY check_p_cosh_f32_CFLAGS = -DFUNCTION=p_cosh_f32 -DIS_UNARY check_p_div_f32_CFLAGS = -DFUNCTION=p_div_f32 -DIS_BINARY check_p_dot_f32_CFLAGS = -DFUNCTION=p_dot_f32 -DIS_BINARY -DSCALAR_OUTPUT +check_p_erf_f32_CFLAGS = -DFUNCTION=p_erf_f32 -DIS_UNARY check_p_exp_f32_CFLAGS = -DFUNCTION=p_exp_f32 -DIS_UNARY check_p_ftoi_CFLAGS = -DFUNCTION=p_ftoi check_p_invcbrt_f32_CFLAGS = -DFUNCTION=p_invcbrt_f32 -DIS_UNARY diff --git a/tests/math/gold/p_erf_f32.dat b/tests/math/gold/p_erf_f32.dat new file mode 100644 index 0000000..197fe04 --- /dev/null +++ b/tests/math/gold/p_erf_f32.dat @@ -0,0 +1,100 @@ +3.751485,0.000000,0.000000,1.000000 +1.644753,0.000000,0.000000,0.979983 +2.731141,0.000000,0.000000,0.999888 +4.749291,0.000000,0.000000,1.000000 +2.340936,0.000000,0.000000,0.999069 +1.468554,0.000000,0.000000,0.962184 +0.747816,0.000000,0.000000,0.709749 +2.542677,0.000000,0.000000,0.999677 +3.927524,0.000000,0.000000,1.000000 +4.591555,0.000000,0.000000,1.000000 +0.940791,0.000000,0.000000,0.816640 +0.990828,0.000000,0.000000,0.838858 +0.633477,0.000000,0.000000,0.629679 +0.963006,0.000000,0.000000,0.826769 +0.579684,0.000000,0.000000,0.587668 +0.802347,0.000000,0.000000,0.743495 +0.503400,0.000000,0.000000,0.523483 +0.962943,0.000000,0.000000,0.826741 +0.994273,0.000000,0.000000,0.840310 +0.209306,0.000000,0.000000,0.232772 +0.064431,0.000000,0.000000,0.072602 +0.045727,0.000000,0.000000,0.051561 +0.081177,0.000000,0.000000,0.091398 +0.066577,0.000000,0.000000,0.075013 +0.004642,0.000000,0.000000,0.005238 +0.052149,0.000000,0.000000,0.058791 +0.046477,0.000000,0.000000,0.052406 +0.016091,0.000000,0.000000,0.018155 +0.028130,0.000000,0.000000,0.031733 +0.098757,0.000000,0.000000,0.111074 +0.008272,0.000000,0.000000,0.009334 +0.005968,0.000000,0.000000,0.006734 +0.009862,0.000000,0.000000,0.011128 +0.009413,0.000000,0.000000,0.010621 +0.001611,0.000000,0.000000,0.001818 +0.000799,0.000000,0.000000,0.000902 +0.004319,0.000000,0.000000,0.004873 +0.009427,0.000000,0.000000,0.010637 +0.009828,0.000000,0.000000,0.011089 +0.001843,0.000000,0.000000,0.002080 +0.000334,0.000000,0.000000,0.000377 +0.000619,0.000000,0.000000,0.000698 +0.000024,0.000000,0.000000,0.000027 +0.000812,0.000000,0.000000,0.000916 +0.000977,0.000000,0.000000,0.001102 +0.000708,0.000000,0.000000,0.000799 +0.000511,0.000000,0.000000,0.000577 +0.000729,0.000000,0.000000,0.000823 +0.000003,0.000000,0.000000,0.000003 +0.000784,0.000000,0.000000,0.000885 +-1.109036,0.000000,0.000000,-0.883215 +-2.732435,0.000000,0.000000,-0.999889 +-1.525864,0.000000,0.000000,-0.969064 +-3.190213,0.000000,0.000000,-0.999994 +-2.315364,0.000000,0.000000,-0.998941 +-4.730506,0.000000,0.000000,-1.000000 +-4.958714,0.000000,0.000000,-1.000000 +-4.778193,0.000000,0.000000,-1.000000 +-3.746597,0.000000,0.000000,-1.000000 +-1.386845,0.000000,0.000000,-0.950155 +-0.193302,0.000000,0.000000,-0.215431 +-0.421222,0.000000,0.000000,-0.448623 +-0.599165,0.000000,0.000000,-0.603198 +-0.253165,0.000000,0.000000,-0.279679 +-0.826987,0.000000,0.000000,-0.757812 +-0.560776,0.000000,0.000000,-0.572255 +-0.170316,0.000000,0.000000,-0.190339 +-0.701307,0.000000,0.000000,-0.678704 +-0.270203,0.000000,0.000000,-0.297631 +-0.630144,0.000000,0.000000,-0.627156 +-0.019502,0.000000,0.000000,-0.022003 +-0.087538,0.000000,0.000000,-0.098524 +-0.047115,0.000000,0.000000,-0.053124 +-0.028526,0.000000,0.000000,-0.032179 +-0.054702,0.000000,0.000000,-0.061663 +-0.043445,0.000000,0.000000,-0.048992 +-0.033587,0.000000,0.000000,-0.037885 +-0.024213,0.000000,0.000000,-0.027316 +-0.043174,0.000000,0.000000,-0.048686 +-0.001590,0.000000,0.000000,-0.001794 +-0.007997,0.000000,0.000000,-0.009023 +-0.008562,0.000000,0.000000,-0.009661 +-0.000377,0.000000,0.000000,-0.000425 +-0.003861,0.000000,0.000000,-0.004357 +-0.008776,0.000000,0.000000,-0.009902 +-0.002093,0.000000,0.000000,-0.002362 +-0.000720,0.000000,0.000000,-0.000812 +-0.003842,0.000000,0.000000,-0.004335 +-0.000287,0.000000,0.000000,-0.000324 +-0.007317,0.000000,0.000000,-0.008256 +-0.000039,0.000000,0.000000,-0.000044 +-0.000941,0.000000,0.000000,-0.001062 +-0.000891,0.000000,0.000000,-0.001005 +-0.000204,0.000000,0.000000,-0.000230 +-0.000106,0.000000,0.000000,-0.000120 +-0.000879,0.000000,0.000000,-0.000992 +-0.000980,0.000000,0.000000,-0.001106 +-0.000774,0.000000,0.000000,-0.000873 +-0.000186,0.000000,0.000000,-0.000210 +-0.000536,0.000000,0.000000,-0.000605 diff --git a/tests/math/p_erf.c b/tests/math/p_erf.c new file mode 100644 index 0000000..eba7dbb --- /dev/null +++ b/tests/math/p_erf.c @@ -0,0 +1,10 @@ +#include +#include "simple.h" + +void generate_ref(float *out, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + out[i] = erff(ai[i]); +}