2 /* @(#)k_rem_pio2.c 1.3 95/01/18 */
4 * ====================================================
5 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
7 * Developed at SunSoft, a Sun Microsystems, Inc. business.
8 * Permission to use, copy, modify, and distribute this
9 * software is freely granted, provided that this notice
11 * ====================================================
13 * FreeBSD SVN: 176550 (2008-02-25)
17 * __kernel_rem_pio2(x,y,e0,nx,prec)
18 * double x[],y[]; int e0,nx,prec;
20 * __kernel_rem_pio2 return the last three digits of N with
24 * The method is to compute the integer (mod 8) and fraction parts of
25 * (2/pi)*x without doing the full multiplication. In general we
26 * skip the part of the product that are known to be a huge integer (
27 * more accurately, = 0 mod 8 ). Thus the number of operations are
28 * independent of the exponent of the input.
30 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
33 * x[] The input value (must be positive) is broken into nx
34 * pieces of 24-bit integers in double precision format.
35 * x[i] will be the i-th 24 bit of x. The scaled exponent
36 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
37 * match x's up to 24 bits.
39 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
47 * y[] output result in an array of double precision numbers.
48 * The dimension of y[] is:
53 * The actual value is the sum of them. Thus for 113-bit
54 * precison, one may have to do something like:
56 * long double t,w,r_head, r_tail;
57 * t = (long double)y[2] + (long double)y[1];
58 * w = (long double)y[0];
60 * r_tail = w - (r_head - t);
62 * e0 The exponent of x[0]. Must be <= 16360 or you need to
63 * expand the ipio2 table.
67 * prec an integer indicating the precision:
70 * 2 64 bits (extended)
74 * double scalbn(), floor();
77 * Here is the description of some local variables:
79 * jk jk+1 is the initial number of terms of ipio2[] needed
80 * in the computation. The minimum and recommended value
81 * for jk is 3,4,4,6 for single, double, extended, and quad.
82 * jk+1 must be 2 larger than you might expect so that our
83 * recomputation test works. (Up to 24 bits in the integer
84 * part (the 24 bits of it that we compute) and 23 bits in
85 * the fraction part may be lost to cancelation before we
88 * jz local integer variable indicating the number of
89 * terms of ipio2[] used.
93 * jv index for pointing to the suitable ipio2[] for the
94 * computation. In general, we want
95 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
97 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
98 * Hence jv = max(0,(e0-3)/24).
100 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
102 * q[] double array with integral value, representing the
103 * 24-bits chunk of the product of x and 2/pi.
105 * q0 the corresponding exponent of q[0]. Note that the
106 * exponent for q[i] would be q0-24*i.
108 * PIo2[] double precision array, obtained by cutting pi/2
109 * into 24 bits chunks.
111 * f[] ipio2[] in floating point
113 * iq[] integer array by breaking up q[] in 24-bits chunk.
115 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
117 * ih integer. If >0 it indicates q[] is >= 0.5, hence
118 * it also indicates the *sign* of the result.
125 * The hexadecimal values are the intended ones for the following
126 * constants. The decimal values may be used, provided that the
127 * compiler will convert from decimal to binary accurately enough
128 * to produce the hexadecimal values shown.
132 #include "math_private.h"
134 static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
137 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
139 * integer array, contains the (24*i)-th to (24*i+23)-th
140 * bit of 2/pi after binary point. The corresponding
143 * ipio2[i] * 2^(-24(i+1)).
145 * NB: This table must have at least (e0-3)/24 + jk terms.
146 * For quad precision (e0 <= 16360, jk = 6), this is 686.
148 static const int32_t ipio2[] = {
149 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
150 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
151 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
152 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
153 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
154 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
155 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
156 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
157 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
158 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
159 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
161 #if LDBL_MAX_EXP > 1024
162 #if LDBL_MAX_EXP > 16384
163 #error "ipio2 table needs to be expanded"
165 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
166 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2,
167 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35,
168 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
169 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C,
170 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4,
171 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
172 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7,
173 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19,
174 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
175 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16,
176 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6,
177 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
178 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48,
179 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3,
180 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
181 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55,
182 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612,
183 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
184 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC,
185 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B,
186 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
187 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4,
188 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB,
189 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
190 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C,
191 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F,
192 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
193 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437,
194 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B,
195 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
196 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD,
197 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3,
198 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
199 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717,
200 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F,
201 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
202 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB,
203 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51,
204 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
205 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C,
206 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6,
207 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
208 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED,
209 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328,
210 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
211 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0,
212 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B,
213 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
214 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3,
215 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F,
216 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
217 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B,
218 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4,
219 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
220 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31,
221 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30,
222 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
223 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E,
224 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1,
225 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
226 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4,
227 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08,
228 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
229 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9,
230 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4,
231 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
232 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C,
233 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0,
234 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
235 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0,
236 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC,
237 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
238 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893,
239 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7,
240 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
241 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F,
242 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4,
243 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
244 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B,
245 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2,
246 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
247 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E,
248 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569,
249 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
250 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9,
251 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D,
252 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
253 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855,
254 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569,
255 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
256 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE,
257 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41,
258 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
259 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F,
260 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110,
261 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
262 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365,
263 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A,
264 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
265 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5,
266 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616,
267 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
268 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0,
273 static const double PIo2[] = {
274 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
275 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
276 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
277 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
278 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
279 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
280 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
281 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
287 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
288 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
291 __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec)
293 int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
294 double z,fw,f[20],fq[20],q[20];
300 /* determine jx,jv,q0, note that 3>q0 */
302 jv = (e0-3)/24; if(jv<0) jv=0;
305 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
306 j = jv-jx; m = jx+jk;
307 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
309 /* compute q[0],q[1],...q[jk] */
310 for (i=0;i<=jk;i++) {
311 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
316 /* distill q[] into iq[] reversingly */
317 for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
318 fw = (double)((int32_t)(twon24* z));
319 iq[i] = (int32_t)(z-two24*fw);
324 z = scalbn(z,q0); /* actual value of z */
325 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
329 if(q0>0) { /* need iq[jz-1] to determine n */
330 i = (iq[jz-1]>>(24-q0)); n += i;
331 iq[jz-1] -= i<<(24-q0);
332 ih = iq[jz-1]>>(23-q0);
334 else if(q0==0) ih = iq[jz-1]>>23;
335 else if(z>=0.5) ih=2;
337 if(ih>0) { /* q > 0.5 */
339 for(i=0;i<jz ;i++) { /* compute 1-q */
343 carry = 1; iq[i] = 0x1000000- j;
345 } else iq[i] = 0xffffff - j;
347 if(q0>0) { /* rare case: chance is 1 in 12 */
350 iq[jz-1] &= 0x7fffff; break;
352 iq[jz-1] &= 0x3fffff; break;
357 if(carry!=0) z -= scalbn(one,q0);
361 /* check if recomputation is needed */
364 for (i=jz-1;i>=jk;i--) j |= iq[i];
365 if(j==0) { /* need recomputation */
366 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
368 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
369 f[jx+i] = (double) ipio2[jv+i];
370 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
378 /* chop off zero terms */
381 while(iq[jz]==0) { jz--; q0-=24;}
382 } else { /* break z into 24-bit if necessary */
385 fw = (double)((int32_t)(twon24*z));
386 iq[jz] = (int32_t)(z-two24*fw);
388 iq[jz] = (int32_t) fw;
389 } else iq[jz] = (int32_t) z ;
392 /* convert integer "bit" chunk to floating-point value */
395 q[i] = fw*(double)iq[i]; fw*=twon24;
398 /* compute PIo2[0,...,jp]*q[jz,...,0] */
400 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
404 /* compress fq[] into y[] */
408 for (i=jz;i>=0;i--) fw += fq[i];
409 y[0] = (ih==0)? fw: -fw;
414 for (i=jz;i>=0;i--) fw += fq[i];
415 STRICT_ASSIGN(double,fw,fw);
416 y[0] = (ih==0)? fw: -fw;
418 for (i=1;i<=jz;i++) fw += fq[i];
419 y[1] = (ih==0)? fw: -fw;
421 case 3: /* painful */
432 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
434 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
436 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;