libraries/sincos.as at master · ForwardCom/libraries · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/*********************************  sin.as  ***********************************
* Author:        Agner Fog
* date created:  2018-03-29
* Last modified: 2021-04-25
* Version:       1.11
* Project:       ForwardCom library math.li
* Description:   sin, cos, and tan functions. Calculate in radians, double precision
*                The argument x can be a scalar or a vector
*                The return value will be a vector with the same length
* C declaration: double sin(double x);
* C declaration: double cos(double x);
* C declaration: double tan(double x);
* C declaration: struct {double s; double c;} sincos(double x);
*
* This code is adapted from C++ vector class library www.github.com/vectorclass
* Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/


// define constants
% M_2_PI = 0.636619772367581343076       // 2./pi
% P0sin  = -1.66666666666666307295E-1    // polynomial coefficients for sin
% P1sin  = 8.33333333332211858878E-3
% P2sin  = -1.98412698295895385996E-4
% P3sin  = 2.75573136213857245213E-6
% P4sin  = -2.50507477628578072866E-8
% P5sin  = 1.58962301576546568060E-10

% P0cos  = 4.16666666666665929218E-2     // polynomial coefficients for cos
% P1cos  = -1.38888888888730564116E-3
% P2cos  = 2.48015872888517045348E-5
% P3cos  = -2.75573141792967388112E-7
% P4cos  = 2.08757008419747316778E-9
% P5cos  = -1.13585365213876817300E-11

% DP1    = 7.853981554508209228515625E-1 // modulo pi/2 for extended precision
% DP2    = 7.94662735614792836714E-9     // correction for extended precision modular artithmetic
% DP3    = 3.06161699786838294307E-17    // correction for extended precision modular artithmetic


code section execute align = 4

public _sin:    function, reguse = 0, 0x7BF
public _cos:    function, reguse = 0, 0x7BF
public _sincos: function, reguse = 0, 0x7BF
public _tan:    function, reguse = 0, 0x7BF

// common entry for sin and sincos functions
_sin function
_sincos:

/* registers:
  v0 = x
  v1 = abs(x)
  v1 = quadrant
  v10 = abs(x) reduced modulo pi/2
  v2 = v10^2
  v3 = v10^4
  v4 = v10^8
  v5 = v10^3
  v6 = unused (vacant flag for calling function)
  v7 = temp
  v8 = sin
  v9 = cos
*/

// Find quadrant:
//      0 -   pi/4 => 0
//   pi/4 - 3*pi/4 => 1
// 3*pi/4 - 5*pi/4 => 2
// 5*pi/4 - 7*pi/4 => 3
// 7*pi/4 - 8*pi/4 => 4

double v1 = clear_bit(v0, 63)                    // abs(x)
double v4 = v1 * M_2_PI
double v4 = round(v4, 0)   // round to integer

// reduce modulo pi/2, with extended precision
// x = ((xa - y * DP1) - y * DP2) - y * DP3;
double v10 = v4 * (-DP1*2) + v1
double v10 = v4 * (-DP2*2) + v10
double v10 = v4 * (-DP3*2) + v10
double v5 = !(v4 > ((1 << 51) + 0.0))              // check for loss of precision and overflow, but not NAN
double v1 = v4 + ((1 << 52) + 0.0)                 // add magic number 2^52 to get integer into lowest bit
double v10 = v5 ? v10 : 0                          // zero if out of range. result will be -1, 0, or 1

// Expansion of sin and cos, valid for -pi/4 <= x <= pi/4
double v2 = v10 * v10                              // x^2
double v3 = v2 * v2                              // x^4
double v4 = v3 * v3                              // x^8

// calculate polynomial P5sin*x2^5 + P4sin*x2^4 + P3sin*x2^3 + P2sin*x2^2 + P1sin*x2 + P0sin
// = (p2+p3*x2)*x4 + ((p4+p5*x2)*x8 + (p0+p1*x2));

double v5 = replace(v10, P0sin)                   // broadcast to same length as x
double v5 = v2 * P1sin + v5
double v7 = replace(v10, P4sin)
double v7 = v2 * P5sin + v7
double v8 = replace(v10, P2sin)
double v8 = v2 * P3sin + v8
double v7 = v7 * v4 + v5
double v8 = v8 * v3 + v7

// calculate polynomial P5cos*x2^5 + P4cos*x2^4 + P3cos*x2^3 + P2cos*x2^2 + P1cos*x2 + P0cos
// = (p2+p3*x2)*x4 + ((p4+p5*x2)*x8 + (p0+p1*x2));
double v5 = replace(v10, P0cos)
double v5 = v2 * P1cos + v5
double v7 = replace(v10, P4cos)
double v7 = v2 * P5cos + v7
double v9 = replace(v10, P2cos)
double v9 = v2 * P3cos + v9
double v7 = v7 * v4 + v5
double v9 = v9 * v3 + v7

// s = x + (x * x2) * s;
double v5 = v10 * v2
double v8 = v8 * v5 + v10
// c = 1.0 - x2 * 0.5 + (x2 * x2) * c;
double v9 = v9 * v3
double v9 = v2 * (-0.5) + v9
double v9 = 1.0 + v9

// swap sin and cos if odd quadrant
double v3 = v1 ? v9 : v8        // sin
double v4 = v1 ? v8 : v9        // cos

// get sign of sin
int64  v5 = v1 << 62            // get bit 1 into sign bit, x modulo pi/2 = 2 or 3
int64  v5 ^= v0                 // toggle with sign of original x
int64  v5 = and(v5, 1 << 63)    // isolate sign bit
double v0 = v3 ^ v5             // apply sign bit to sin

// get sign of cos
int64  v1 = v1 + 1              // change sign when x modulo pi/2 = 1 or 2
int64  v1 = v1 << 62            // get bit 1 into sign bit
int64  v1 = and(v1, 1 << 63)    // isolate sign bit
double v1 = v4 ^ v1             // apply sign bit to cos

// return sin in v0, cos in v1
return
_sin end

// cosine function
_cos function
call _sincos
double v0 = v1                                   // cos is in v1
return
_cos end

// tangent function
_tan function
call _sincos
double v0 = v0 / v1                              // tan(x) = sin(x)/cos(x)
return
_tan end

code end