Commit | Line | Data |
---|---|---|
a8a6a916 JM |
1 | /*- |
2 | * Copyright (c) 2007-2013 Bruce D. Evans | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice unmodified, this list of conditions, and the following | |
10 | * disclaimer. | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in the | |
13 | * documentation and/or other materials provided with the distribution. | |
14 | * | |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
16 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
17 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
18 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
19 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
20 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
21 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
22 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
24 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
25 | * | |
26 | * $FreeBSD: head/lib/msun/ld80/s_logl.c 251292 2013-06-03 09:14:31Z das $ | |
27 | */ | |
28 | ||
29 | /** | |
30 | * Implementation of the natural logarithm of x for Intel 80-bit format. | |
31 | * | |
32 | * First decompose x into its base 2 representation: | |
33 | * | |
34 | * log(x) = log(X * 2**k), where X is in [1, 2) | |
35 | * = log(X) + k * log(2). | |
36 | * | |
37 | * Let X = X_i + e, where X_i is the center of one of the intervals | |
38 | * [-1.0/256, 1.0/256), [1.0/256, 3.0/256), .... [2.0-1.0/256, 2.0+1.0/256) | |
39 | * and X is in this interval. Then | |
40 | * | |
41 | * log(X) = log(X_i + e) | |
42 | * = log(X_i * (1 + e / X_i)) | |
43 | * = log(X_i) + log(1 + e / X_i). | |
44 | * | |
45 | * The values log(X_i) are tabulated below. Let d = e / X_i and use | |
46 | * | |
47 | * log(1 + d) = p(d) | |
48 | * | |
49 | * where p(d) = d - 0.5*d*d + ... is a special minimax polynomial of | |
50 | * suitably high degree. | |
51 | * | |
52 | * To get sufficiently small roundoff errors, k * log(2), log(X_i), and | |
53 | * sometimes (if |k| is not large) the first term in p(d) must be evaluated | |
54 | * and added up in extra precision. Extra precision is not needed for the | |
55 | * rest of p(d). In the worst case when k = 0 and log(X_i) is 0, the final | |
56 | * error is controlled mainly by the error in the second term in p(d). The | |
57 | * error in this term itself is at most 0.5 ulps from the d*d operation in | |
58 | * it. The error in this term relative to the first term is thus at most | |
59 | * 0.5 * |-0.5| * |d| < 1.0/1024 ulps. We aim for an accumulated error of | |
60 | * at most twice this at the point of the final rounding step. Thus the | |
61 | * final error should be at most 0.5 + 1.0/512 = 0.5020 ulps. Exhaustive | |
62 | * testing of a float variant of this function showed a maximum final error | |
63 | * of 0.5008 ulps. Non-exhaustive testing of a double variant of this | |
64 | * function showed a maximum final error of 0.5078 ulps (near 1+1.0/256). | |
65 | * | |
66 | * We made the maximum of |d| (and thus the total relative error and the | |
67 | * degree of p(d)) small by using a large number of intervals. Using | |
68 | * centers of intervals instead of endpoints reduces this maximum by a | |
69 | * factor of 2 for a given number of intervals. p(d) is special only | |
70 | * in beginning with the Taylor coefficients 0 + 1*d, which tends to happen | |
71 | * naturally. The most accurate minimax polynomial of a given degree might | |
72 | * be different, but then we wouldn't want it since we would have to do | |
73 | * extra work to avoid roundoff error (especially for P0*d instead of d). | |
74 | */ | |
75 | ||
76 | #ifdef DEBUG | |
77 | #include <assert.h> | |
78 | #include <fenv.h> | |
79 | #endif | |
80 | ||
81 | #ifdef __i386__ | |
82 | #include <ieeefp.h> | |
83 | #endif | |
84 | ||
85 | #include "fpmath.h" | |
86 | #include "math.h" | |
87 | #define i386_SSE_GOOD | |
88 | #ifndef NO_STRUCT_RETURN | |
89 | #define STRUCT_RETURN | |
90 | #endif | |
91 | #include "math_private.h" | |
92 | ||
93 | #if !defined(NO_UTAB) && !defined(NO_UTABL) | |
94 | #define USE_UTAB | |
95 | #endif | |
96 | ||
97 | /* | |
98 | * Domain [-0.005280, 0.004838], range ~[-5.1736e-22, 5.1738e-22]: | |
99 | * |log(1 + d)/d - p(d)| < 2**-70.7 | |
100 | */ | |
101 | static const double | |
102 | P2 = -0.5, | |
103 | P3 = 3.3333333333333359e-1, /* 0x1555555555555a.0p-54 */ | |
104 | P4 = -2.5000000000004424e-1, /* -0x1000000000031d.0p-54 */ | |
105 | P5 = 1.9999999992970016e-1, /* 0x1999999972f3c7.0p-55 */ | |
106 | P6 = -1.6666666072191585e-1, /* -0x15555548912c09.0p-55 */ | |
107 | P7 = 1.4286227413310518e-1, /* 0x12494f9d9def91.0p-55 */ | |
108 | P8 = -1.2518388626763144e-1; /* -0x1006068cc0b97c.0p-55 */ | |
109 | ||
110 | static volatile const double zero = 0; | |
111 | ||
112 | #define INTERVALS 128 | |
113 | #define LOG2_INTERVALS 7 | |
114 | #define TSIZE (INTERVALS + 1) | |
115 | #define G(i) (T[(i)].G) | |
116 | #define F_hi(i) (T[(i)].F_hi) | |
117 | #define F_lo(i) (T[(i)].F_lo) | |
118 | #define ln2_hi F_hi(TSIZE - 1) | |
119 | #define ln2_lo F_lo(TSIZE - 1) | |
120 | #define E(i) (U[(i)].E) | |
121 | #define H(i) (U[(i)].H) | |
122 | ||
123 | static const struct { | |
124 | float G; /* 1/(1 + i/128) rounded to 8/9 bits */ | |
125 | float F_hi; /* log(1 / G_i) rounded (see below) */ | |
126 | double F_lo; /* next 53 bits for log(1 / G_i) */ | |
127 | } T[TSIZE] = { | |
128 | /* | |
129 | * ln2_hi and each F_hi(i) are rounded to a number of bits that | |
130 | * makes F_hi(i) + dk*ln2_hi exact for all i and all dk. | |
131 | * | |
132 | * The last entry (for X just below 2) is used to define ln2_hi | |
133 | * and ln2_lo, to ensure that F_hi(i) and F_lo(i) cancel exactly | |
134 | * with dk*ln2_hi and dk*ln2_lo, respectively, when dk = -1. | |
135 | * This is needed for accuracy when x is just below 1. (To avoid | |
136 | * special cases, such x are "reduced" strangely to X just below | |
137 | * 2 and dk = -1, and then the exact cancellation is needed | |
138 | * because any the error from any non-exactness would be too | |
139 | * large). | |
140 | * | |
141 | * We want to share this table between double precision and ld80, | |
142 | * so the relevant range of dk is the larger one of ld80 | |
143 | * ([-16445, 16383]) and the relevant exactness requirement is | |
144 | * the stricter one of double precision. The maximum number of | |
145 | * bits in F_hi(i) that works is very dependent on i but has | |
146 | * a minimum of 33. We only need about 12 bits in F_hi(i) for | |
147 | * it to provide enough extra precision in double precision (11 | |
148 | * more than that are required for ld80). | |
149 | * | |
150 | * We round F_hi(i) to 24 bits so that it can have type float, | |
151 | * mainly to minimize the size of the table. Using all 24 bits | |
152 | * in a float for it automatically satisfies the above constraints. | |
153 | */ | |
154 | 0x800000.0p-23, 0, 0, | |
155 | 0xfe0000.0p-24, 0x8080ac.0p-30, -0x14ee431dae6675.0p-84, | |
156 | 0xfc0000.0p-24, 0x8102b3.0p-29, -0x1db29ee2d83718.0p-84, | |
157 | 0xfa0000.0p-24, 0xc24929.0p-29, 0x1191957d173698.0p-83, | |
158 | 0xf80000.0p-24, 0x820aec.0p-28, 0x13ce8888e02e79.0p-82, | |
159 | 0xf60000.0p-24, 0xa33577.0p-28, -0x17a4382ce6eb7c.0p-82, | |
160 | 0xf48000.0p-24, 0xbc42cb.0p-28, -0x172a21161a1076.0p-83, | |
161 | 0xf30000.0p-24, 0xd57797.0p-28, -0x1e09de07cb9589.0p-82, | |
162 | 0xf10000.0p-24, 0xf7518e.0p-28, 0x1ae1eec1b036c5.0p-91, | |
163 | 0xef0000.0p-24, 0x8cb9df.0p-27, -0x1d7355325d560e.0p-81, | |
164 | 0xed8000.0p-24, 0x999ec0.0p-27, -0x1f9f02d256d503.0p-82, | |
165 | 0xec0000.0p-24, 0xa6988b.0p-27, -0x16fc0a9d12c17a.0p-83, | |
166 | 0xea0000.0p-24, 0xb80698.0p-27, 0x15d581c1e8da9a.0p-81, | |
167 | 0xe80000.0p-24, 0xc99af3.0p-27, -0x1535b3ba8f150b.0p-83, | |
168 | 0xe70000.0p-24, 0xd273b2.0p-27, 0x163786f5251af0.0p-85, | |
169 | 0xe50000.0p-24, 0xe442c0.0p-27, 0x1bc4b2368e32d5.0p-84, | |
170 | 0xe38000.0p-24, 0xf1b83f.0p-27, 0x1c6090f684e676.0p-81, | |
171 | 0xe20000.0p-24, 0xff448a.0p-27, -0x1890aa69ac9f42.0p-82, | |
172 | 0xe08000.0p-24, 0x8673f6.0p-26, 0x1b9985194b6b00.0p-80, | |
173 | 0xdf0000.0p-24, 0x8d515c.0p-26, -0x1dc08d61c6ef1e.0p-83, | |
174 | 0xdd8000.0p-24, 0x943a9e.0p-26, -0x1f72a2dac729b4.0p-82, | |
175 | 0xdc0000.0p-24, 0x9b2fe6.0p-26, -0x1fd4dfd3a0afb9.0p-80, | |
176 | 0xda8000.0p-24, 0xa2315d.0p-26, -0x11b26121629c47.0p-82, | |
177 | 0xd90000.0p-24, 0xa93f2f.0p-26, 0x1286d633e8e569.0p-81, | |
178 | 0xd78000.0p-24, 0xb05988.0p-26, 0x16128eba936770.0p-84, | |
179 | 0xd60000.0p-24, 0xb78094.0p-26, 0x16ead577390d32.0p-80, | |
180 | 0xd50000.0p-24, 0xbc4c6c.0p-26, 0x151131ccf7c7b7.0p-81, | |
181 | 0xd38000.0p-24, 0xc3890a.0p-26, -0x115e2cd714bd06.0p-80, | |
182 | 0xd20000.0p-24, 0xcad2d7.0p-26, -0x1847f406ebd3b0.0p-82, | |
183 | 0xd10000.0p-24, 0xcfb620.0p-26, 0x1c2259904d6866.0p-81, | |
184 | 0xcf8000.0p-24, 0xd71653.0p-26, 0x1ece57a8d5ae55.0p-80, | |
185 | 0xce0000.0p-24, 0xde843a.0p-26, -0x1f109d4bc45954.0p-81, | |
186 | 0xcd0000.0p-24, 0xe37fde.0p-26, 0x1bc03dc271a74d.0p-81, | |
187 | 0xcb8000.0p-24, 0xeb050c.0p-26, -0x1bf2badc0df842.0p-85, | |
188 | 0xca0000.0p-24, 0xf29878.0p-26, -0x18efededd89fbe.0p-87, | |
189 | 0xc90000.0p-24, 0xf7ad6f.0p-26, 0x1373ff977baa69.0p-81, | |
190 | 0xc80000.0p-24, 0xfcc8e3.0p-26, 0x196766f2fb3283.0p-80, | |
191 | 0xc68000.0p-24, 0x823f30.0p-25, 0x19bd076f7c434e.0p-79, | |
192 | 0xc58000.0p-24, 0x84d52c.0p-25, -0x1a327257af0f46.0p-79, | |
193 | 0xc40000.0p-24, 0x88bc74.0p-25, 0x113f23def19c5a.0p-81, | |
194 | 0xc30000.0p-24, 0x8b5ae6.0p-25, 0x1759f6e6b37de9.0p-79, | |
195 | 0xc20000.0p-24, 0x8dfccb.0p-25, 0x1ad35ca6ed5148.0p-81, | |
196 | 0xc10000.0p-24, 0x90a22b.0p-25, 0x1a1d71a87deba4.0p-79, | |
197 | 0xbf8000.0p-24, 0x94a0d8.0p-25, -0x139e5210c2b731.0p-80, | |
198 | 0xbe8000.0p-24, 0x974f16.0p-25, -0x18f6ebcff3ed73.0p-81, | |
199 | 0xbd8000.0p-24, 0x9a00f1.0p-25, -0x1aa268be39aab7.0p-79, | |
200 | 0xbc8000.0p-24, 0x9cb672.0p-25, -0x14c8815839c566.0p-79, | |
201 | 0xbb0000.0p-24, 0xa0cda1.0p-25, 0x1eaf46390dbb24.0p-81, | |
202 | 0xba0000.0p-24, 0xa38c6e.0p-25, 0x138e20d831f698.0p-81, | |
203 | 0xb90000.0p-24, 0xa64f05.0p-25, -0x1e8d3c41123616.0p-82, | |
204 | 0xb80000.0p-24, 0xa91570.0p-25, 0x1ce28f5f3840b2.0p-80, | |
205 | 0xb70000.0p-24, 0xabdfbb.0p-25, -0x186e5c0a424234.0p-79, | |
206 | 0xb60000.0p-24, 0xaeadef.0p-25, -0x14d41a0b2a08a4.0p-83, | |
207 | 0xb50000.0p-24, 0xb18018.0p-25, 0x16755892770634.0p-79, | |
208 | 0xb40000.0p-24, 0xb45642.0p-25, -0x16395ebe59b152.0p-82, | |
209 | 0xb30000.0p-24, 0xb73077.0p-25, 0x1abc65c8595f09.0p-80, | |
210 | 0xb20000.0p-24, 0xba0ec4.0p-25, -0x1273089d3dad89.0p-79, | |
211 | 0xb10000.0p-24, 0xbcf133.0p-25, 0x10f9f67b1f4bbf.0p-79, | |
212 | 0xb00000.0p-24, 0xbfd7d2.0p-25, -0x109fab90486409.0p-80, | |
213 | 0xaf0000.0p-24, 0xc2c2ac.0p-25, -0x1124680aa43333.0p-79, | |
214 | 0xae8000.0p-24, 0xc439b3.0p-25, -0x1f360cc4710fc0.0p-80, | |
215 | 0xad8000.0p-24, 0xc72afd.0p-25, -0x132d91f21d89c9.0p-80, | |
216 | 0xac8000.0p-24, 0xca20a2.0p-25, -0x16bf9b4d1f8da8.0p-79, | |
217 | 0xab8000.0p-24, 0xcd1aae.0p-25, 0x19deb5ce6a6a87.0p-81, | |
218 | 0xaa8000.0p-24, 0xd0192f.0p-25, 0x1a29fb48f7d3cb.0p-79, | |
219 | 0xaa0000.0p-24, 0xd19a20.0p-25, 0x1127d3c6457f9d.0p-81, | |
220 | 0xa90000.0p-24, 0xd49f6a.0p-25, -0x1ba930e486a0ac.0p-81, | |
221 | 0xa80000.0p-24, 0xd7a94b.0p-25, -0x1b6e645f31549e.0p-79, | |
222 | 0xa70000.0p-24, 0xdab7d0.0p-25, 0x1118a425494b61.0p-80, | |
223 | 0xa68000.0p-24, 0xdc40d5.0p-25, 0x1966f24d29d3a3.0p-80, | |
224 | 0xa58000.0p-24, 0xdf566d.0p-25, -0x1d8e52eb2248f1.0p-82, | |
225 | 0xa48000.0p-24, 0xe270ce.0p-25, -0x1ee370f96e6b68.0p-80, | |
226 | 0xa40000.0p-24, 0xe3ffce.0p-25, 0x1d155324911f57.0p-80, | |
227 | 0xa30000.0p-24, 0xe72179.0p-25, -0x1fe6e2f2f867d9.0p-80, | |
228 | 0xa20000.0p-24, 0xea4812.0p-25, 0x1b7be9add7f4d4.0p-80, | |
229 | 0xa18000.0p-24, 0xebdd3d.0p-25, 0x1b3cfb3f7511dd.0p-79, | |
230 | 0xa08000.0p-24, 0xef0b5b.0p-25, -0x1220de1f730190.0p-79, | |
231 | 0xa00000.0p-24, 0xf0a451.0p-25, -0x176364c9ac81cd.0p-80, | |
232 | 0x9f0000.0p-24, 0xf3da16.0p-25, 0x1eed6b9aafac8d.0p-81, | |
233 | 0x9e8000.0p-24, 0xf576e9.0p-25, 0x1d593218675af2.0p-79, | |
234 | 0x9d8000.0p-24, 0xf8b47c.0p-25, -0x13e8eb7da053e0.0p-84, | |
235 | 0x9d0000.0p-24, 0xfa553f.0p-25, 0x1c063259bcade0.0p-79, | |
236 | 0x9c0000.0p-24, 0xfd9ac5.0p-25, 0x1ef491085fa3c1.0p-79, | |
237 | 0x9b8000.0p-24, 0xff3f8c.0p-25, 0x1d607a7c2b8c53.0p-79, | |
238 | 0x9a8000.0p-24, 0x814697.0p-24, -0x12ad3817004f3f.0p-78, | |
239 | 0x9a0000.0p-24, 0x821b06.0p-24, -0x189fc53117f9e5.0p-81, | |
240 | 0x990000.0p-24, 0x83c5f8.0p-24, 0x14cf15a048907b.0p-79, | |
241 | 0x988000.0p-24, 0x849c7d.0p-24, 0x1cbb1d35fb8287.0p-78, | |
242 | 0x978000.0p-24, 0x864ba6.0p-24, 0x1128639b814f9c.0p-78, | |
243 | 0x970000.0p-24, 0x87244c.0p-24, 0x184733853300f0.0p-79, | |
244 | 0x968000.0p-24, 0x87fdaa.0p-24, 0x109d23aef77dd6.0p-80, | |
245 | 0x958000.0p-24, 0x89b293.0p-24, -0x1a81ef367a59de.0p-78, | |
246 | 0x950000.0p-24, 0x8a8e20.0p-24, -0x121ad3dbb2f452.0p-78, | |
247 | 0x948000.0p-24, 0x8b6a6a.0p-24, -0x1cfb981628af72.0p-79, | |
248 | 0x938000.0p-24, 0x8d253a.0p-24, -0x1d21730ea76cfe.0p-79, | |
249 | 0x930000.0p-24, 0x8e03c2.0p-24, 0x135cc00e566f77.0p-78, | |
250 | 0x928000.0p-24, 0x8ee30d.0p-24, -0x10fcb5df257a26.0p-80, | |
251 | 0x918000.0p-24, 0x90a3ee.0p-24, -0x16e171b15433d7.0p-79, | |
252 | 0x910000.0p-24, 0x918587.0p-24, -0x1d050da07f3237.0p-79, | |
253 | 0x908000.0p-24, 0x9267e7.0p-24, 0x1be03669a5268d.0p-79, | |
254 | 0x8f8000.0p-24, 0x942f04.0p-24, 0x10b28e0e26c337.0p-79, | |
255 | 0x8f0000.0p-24, 0x9513c3.0p-24, 0x1a1d820da57cf3.0p-78, | |
256 | 0x8e8000.0p-24, 0x95f950.0p-24, -0x19ef8f13ae3cf1.0p-79, | |
257 | 0x8e0000.0p-24, 0x96dfab.0p-24, -0x109e417a6e507c.0p-78, | |
258 | 0x8d0000.0p-24, 0x98aed2.0p-24, 0x10d01a2c5b0e98.0p-79, | |
259 | 0x8c8000.0p-24, 0x9997a2.0p-24, -0x1d6a50d4b61ea7.0p-78, | |
260 | 0x8c0000.0p-24, 0x9a8145.0p-24, 0x1b3b190b83f952.0p-78, | |
261 | 0x8b8000.0p-24, 0x9b6bbf.0p-24, 0x13a69fad7e7abe.0p-78, | |
262 | 0x8b0000.0p-24, 0x9c5711.0p-24, -0x11cd12316f576b.0p-78, | |
263 | 0x8a8000.0p-24, 0x9d433b.0p-24, 0x1c95c444b807a2.0p-79, | |
264 | 0x898000.0p-24, 0x9f1e22.0p-24, -0x1b9c224ea698c3.0p-79, | |
265 | 0x890000.0p-24, 0xa00ce1.0p-24, 0x125ca93186cf0f.0p-81, | |
266 | 0x888000.0p-24, 0xa0fc80.0p-24, -0x1ee38a7bc228b3.0p-79, | |
267 | 0x880000.0p-24, 0xa1ed00.0p-24, -0x1a0db876613d20.0p-78, | |
268 | 0x878000.0p-24, 0xa2de62.0p-24, 0x193224e8516c01.0p-79, | |
269 | 0x870000.0p-24, 0xa3d0a9.0p-24, 0x1fa28b4d2541ad.0p-79, | |
270 | 0x868000.0p-24, 0xa4c3d6.0p-24, 0x1c1b5760fb4572.0p-78, | |
271 | 0x858000.0p-24, 0xa6acea.0p-24, 0x1fed5d0f65949c.0p-80, | |
272 | 0x850000.0p-24, 0xa7a2d4.0p-24, 0x1ad270c9d74936.0p-80, | |
273 | 0x848000.0p-24, 0xa899ab.0p-24, 0x199ff15ce53266.0p-79, | |
274 | 0x840000.0p-24, 0xa99171.0p-24, 0x1a19e15ccc45d2.0p-79, | |
275 | 0x838000.0p-24, 0xaa8a28.0p-24, -0x121a14ec532b36.0p-80, | |
276 | 0x830000.0p-24, 0xab83d1.0p-24, 0x1aee319980bff3.0p-79, | |
277 | 0x828000.0p-24, 0xac7e6f.0p-24, -0x18ffd9e3900346.0p-80, | |
278 | 0x820000.0p-24, 0xad7a03.0p-24, -0x1e4db102ce29f8.0p-80, | |
279 | 0x818000.0p-24, 0xae768f.0p-24, 0x17c35c55a04a83.0p-81, | |
280 | 0x810000.0p-24, 0xaf7415.0p-24, 0x1448324047019b.0p-78, | |
281 | 0x808000.0p-24, 0xb07298.0p-24, -0x1750ee3915a198.0p-78, | |
282 | 0x800000.0p-24, 0xb17218.0p-24, -0x105c610ca86c39.0p-81, | |
283 | }; | |
284 | ||
285 | #ifdef USE_UTAB | |
286 | static const struct { | |
287 | float H; /* 1 + i/INTERVALS (exact) */ | |
288 | float E; /* H(i) * G(i) - 1 (exact) */ | |
289 | } U[TSIZE] = { | |
290 | 0x800000.0p-23, 0, | |
291 | 0x810000.0p-23, -0x800000.0p-37, | |
292 | 0x820000.0p-23, -0x800000.0p-35, | |
293 | 0x830000.0p-23, -0x900000.0p-34, | |
294 | 0x840000.0p-23, -0x800000.0p-33, | |
295 | 0x850000.0p-23, -0xc80000.0p-33, | |
296 | 0x860000.0p-23, -0xa00000.0p-36, | |
297 | 0x870000.0p-23, 0x940000.0p-33, | |
298 | 0x880000.0p-23, 0x800000.0p-35, | |
299 | 0x890000.0p-23, -0xc80000.0p-34, | |
300 | 0x8a0000.0p-23, 0xe00000.0p-36, | |
301 | 0x8b0000.0p-23, 0x900000.0p-33, | |
302 | 0x8c0000.0p-23, -0x800000.0p-35, | |
303 | 0x8d0000.0p-23, -0xe00000.0p-33, | |
304 | 0x8e0000.0p-23, 0x880000.0p-33, | |
305 | 0x8f0000.0p-23, -0xa80000.0p-34, | |
306 | 0x900000.0p-23, -0x800000.0p-35, | |
307 | 0x910000.0p-23, 0x800000.0p-37, | |
308 | 0x920000.0p-23, 0x900000.0p-35, | |
309 | 0x930000.0p-23, 0xd00000.0p-35, | |
310 | 0x940000.0p-23, 0xe00000.0p-35, | |
311 | 0x950000.0p-23, 0xc00000.0p-35, | |
312 | 0x960000.0p-23, 0xe00000.0p-36, | |
313 | 0x970000.0p-23, -0x800000.0p-38, | |
314 | 0x980000.0p-23, -0xc00000.0p-35, | |
315 | 0x990000.0p-23, -0xd00000.0p-34, | |
316 | 0x9a0000.0p-23, 0x880000.0p-33, | |
317 | 0x9b0000.0p-23, 0xe80000.0p-35, | |
318 | 0x9c0000.0p-23, -0x800000.0p-35, | |
319 | 0x9d0000.0p-23, 0xb40000.0p-33, | |
320 | 0x9e0000.0p-23, 0x880000.0p-34, | |
321 | 0x9f0000.0p-23, -0xe00000.0p-35, | |
322 | 0xa00000.0p-23, 0x800000.0p-33, | |
323 | 0xa10000.0p-23, -0x900000.0p-36, | |
324 | 0xa20000.0p-23, -0xb00000.0p-33, | |
325 | 0xa30000.0p-23, -0xa00000.0p-36, | |
326 | 0xa40000.0p-23, 0x800000.0p-33, | |
327 | 0xa50000.0p-23, -0xf80000.0p-35, | |
328 | 0xa60000.0p-23, 0x880000.0p-34, | |
329 | 0xa70000.0p-23, -0x900000.0p-33, | |
330 | 0xa80000.0p-23, -0x800000.0p-35, | |
331 | 0xa90000.0p-23, 0x900000.0p-34, | |
332 | 0xaa0000.0p-23, 0xa80000.0p-33, | |
333 | 0xab0000.0p-23, -0xac0000.0p-34, | |
334 | 0xac0000.0p-23, -0x800000.0p-37, | |
335 | 0xad0000.0p-23, 0xf80000.0p-35, | |
336 | 0xae0000.0p-23, 0xf80000.0p-34, | |
337 | 0xaf0000.0p-23, -0xac0000.0p-33, | |
338 | 0xb00000.0p-23, -0x800000.0p-33, | |
339 | 0xb10000.0p-23, -0xb80000.0p-34, | |
340 | 0xb20000.0p-23, -0x800000.0p-34, | |
341 | 0xb30000.0p-23, -0xb00000.0p-35, | |
342 | 0xb40000.0p-23, -0x800000.0p-35, | |
343 | 0xb50000.0p-23, -0xe00000.0p-36, | |
344 | 0xb60000.0p-23, -0x800000.0p-35, | |
345 | 0xb70000.0p-23, -0xb00000.0p-35, | |
346 | 0xb80000.0p-23, -0x800000.0p-34, | |
347 | 0xb90000.0p-23, -0xb80000.0p-34, | |
348 | 0xba0000.0p-23, -0x800000.0p-33, | |
349 | 0xbb0000.0p-23, -0xac0000.0p-33, | |
350 | 0xbc0000.0p-23, 0x980000.0p-33, | |
351 | 0xbd0000.0p-23, 0xbc0000.0p-34, | |
352 | 0xbe0000.0p-23, 0xe00000.0p-36, | |
353 | 0xbf0000.0p-23, -0xb80000.0p-35, | |
354 | 0xc00000.0p-23, -0x800000.0p-33, | |
355 | 0xc10000.0p-23, 0xa80000.0p-33, | |
356 | 0xc20000.0p-23, 0x900000.0p-34, | |
357 | 0xc30000.0p-23, -0x800000.0p-35, | |
358 | 0xc40000.0p-23, -0x900000.0p-33, | |
359 | 0xc50000.0p-23, 0x820000.0p-33, | |
360 | 0xc60000.0p-23, 0x800000.0p-38, | |
361 | 0xc70000.0p-23, -0x820000.0p-33, | |
362 | 0xc80000.0p-23, 0x800000.0p-33, | |
363 | 0xc90000.0p-23, -0xa00000.0p-36, | |
364 | 0xca0000.0p-23, -0xb00000.0p-33, | |
365 | 0xcb0000.0p-23, 0x840000.0p-34, | |
366 | 0xcc0000.0p-23, -0xd00000.0p-34, | |
367 | 0xcd0000.0p-23, 0x800000.0p-33, | |
368 | 0xce0000.0p-23, -0xe00000.0p-35, | |
369 | 0xcf0000.0p-23, 0xa60000.0p-33, | |
370 | 0xd00000.0p-23, -0x800000.0p-35, | |
371 | 0xd10000.0p-23, 0xb40000.0p-33, | |
372 | 0xd20000.0p-23, -0x800000.0p-35, | |
373 | 0xd30000.0p-23, 0xaa0000.0p-33, | |
374 | 0xd40000.0p-23, -0xe00000.0p-35, | |
375 | 0xd50000.0p-23, 0x880000.0p-33, | |
376 | 0xd60000.0p-23, -0xd00000.0p-34, | |
377 | 0xd70000.0p-23, 0x9c0000.0p-34, | |
378 | 0xd80000.0p-23, -0xb00000.0p-33, | |
379 | 0xd90000.0p-23, -0x800000.0p-38, | |
380 | 0xda0000.0p-23, 0xa40000.0p-33, | |
381 | 0xdb0000.0p-23, -0xdc0000.0p-34, | |
382 | 0xdc0000.0p-23, 0xc00000.0p-35, | |
383 | 0xdd0000.0p-23, 0xca0000.0p-33, | |
384 | 0xde0000.0p-23, -0xb80000.0p-34, | |
385 | 0xdf0000.0p-23, 0xd00000.0p-35, | |
386 | 0xe00000.0p-23, 0xc00000.0p-33, | |
387 | 0xe10000.0p-23, -0xf40000.0p-34, | |
388 | 0xe20000.0p-23, 0x800000.0p-37, | |
389 | 0xe30000.0p-23, 0x860000.0p-33, | |
390 | 0xe40000.0p-23, -0xc80000.0p-33, | |
391 | 0xe50000.0p-23, -0xa80000.0p-34, | |
392 | 0xe60000.0p-23, 0xe00000.0p-36, | |
393 | 0xe70000.0p-23, 0x880000.0p-33, | |
394 | 0xe80000.0p-23, -0xe00000.0p-33, | |
395 | 0xe90000.0p-23, -0xfc0000.0p-34, | |
396 | 0xea0000.0p-23, -0x800000.0p-35, | |
397 | 0xeb0000.0p-23, 0xe80000.0p-35, | |
398 | 0xec0000.0p-23, 0x900000.0p-33, | |
399 | 0xed0000.0p-23, 0xe20000.0p-33, | |
400 | 0xee0000.0p-23, -0xac0000.0p-33, | |
401 | 0xef0000.0p-23, -0xc80000.0p-34, | |
402 | 0xf00000.0p-23, -0x800000.0p-35, | |
403 | 0xf10000.0p-23, 0x800000.0p-35, | |
404 | 0xf20000.0p-23, 0xb80000.0p-34, | |
405 | 0xf30000.0p-23, 0x940000.0p-33, | |
406 | 0xf40000.0p-23, 0xc80000.0p-33, | |
407 | 0xf50000.0p-23, -0xf20000.0p-33, | |
408 | 0xf60000.0p-23, -0xc80000.0p-33, | |
409 | 0xf70000.0p-23, -0xa20000.0p-33, | |
410 | 0xf80000.0p-23, -0x800000.0p-33, | |
411 | 0xf90000.0p-23, -0xc40000.0p-34, | |
412 | 0xfa0000.0p-23, -0x900000.0p-34, | |
413 | 0xfb0000.0p-23, -0xc80000.0p-35, | |
414 | 0xfc0000.0p-23, -0x800000.0p-35, | |
415 | 0xfd0000.0p-23, -0x900000.0p-36, | |
416 | 0xfe0000.0p-23, -0x800000.0p-37, | |
417 | 0xff0000.0p-23, -0x800000.0p-39, | |
418 | 0x800000.0p-22, 0, | |
419 | }; | |
420 | #endif /* USE_UTAB */ | |
421 | ||
422 | #ifdef STRUCT_RETURN | |
423 | #define RETURN1(rp, v) do { \ | |
424 | (rp)->hi = (v); \ | |
425 | (rp)->lo_set = 0; \ | |
426 | return; \ | |
427 | } while (0) | |
428 | ||
429 | #define RETURN2(rp, h, l) do { \ | |
430 | (rp)->hi = (h); \ | |
431 | (rp)->lo = (l); \ | |
432 | (rp)->lo_set = 1; \ | |
433 | return; \ | |
434 | } while (0) | |
435 | ||
436 | struct ld { | |
437 | long double hi; | |
438 | long double lo; | |
439 | int lo_set; | |
440 | }; | |
441 | #else | |
442 | #define RETURN1(rp, v) RETURNF(v) | |
443 | #define RETURN2(rp, h, l) RETURNI((h) + (l)) | |
444 | #endif | |
445 | ||
446 | #ifdef STRUCT_RETURN | |
447 | static inline __always_inline void | |
448 | k_logl(long double x, struct ld *rp) | |
449 | #else | |
450 | long double | |
451 | logl(long double x) | |
452 | #endif | |
453 | { | |
454 | long double d, dk, val_hi, val_lo, z; | |
455 | uint64_t ix, lx; | |
456 | int i, k; | |
457 | uint16_t hx; | |
458 | ||
459 | EXTRACT_LDBL80_WORDS(hx, lx, x); | |
460 | k = -16383; | |
461 | #if 0 /* Hard to do efficiently. Don't do it until we support all modes. */ | |
462 | if (x == 1) | |
463 | RETURN1(rp, 0); /* log(1) = +0 in all rounding modes */ | |
464 | #endif | |
465 | if (hx == 0 || hx >= 0x8000) { /* zero, negative or subnormal? */ | |
466 | if (((hx & 0x7fff) | lx) == 0) | |
467 | RETURN1(rp, -1 / zero); /* log(+-0) = -Inf */ | |
468 | if (hx != 0) | |
469 | /* log(neg or [pseudo-]NaN) = qNaN: */ | |
470 | RETURN1(rp, (x - x) / zero); | |
471 | x *= 0x1.0p65; /* subnormal; scale up x */ | |
472 | /* including pseudo-subnormals */ | |
473 | EXTRACT_LDBL80_WORDS(hx, lx, x); | |
474 | k = -16383 - 65; | |
475 | } else if (hx >= 0x7fff || (lx & 0x8000000000000000ULL) == 0) | |
476 | RETURN1(rp, x + x); /* log(Inf or NaN) = Inf or qNaN */ | |
477 | /* log(pseudo-Inf) = qNaN */ | |
478 | /* log(pseudo-NaN) = qNaN */ | |
479 | /* log(unnormal) = qNaN */ | |
480 | #ifndef STRUCT_RETURN | |
481 | ENTERI(); | |
482 | #endif | |
483 | k += hx; | |
484 | ix = lx & 0x7fffffffffffffffULL; | |
485 | dk = k; | |
486 | ||
487 | /* Scale x to be in [1, 2). */ | |
488 | SET_LDBL_EXPSIGN(x, 0x3fff); | |
489 | ||
490 | /* 0 <= i <= INTERVALS: */ | |
491 | #define L2I (64 - LOG2_INTERVALS) | |
492 | i = (ix + (1LL << (L2I - 2))) >> (L2I - 1); | |
493 | ||
494 | /* | |
495 | * -0.005280 < d < 0.004838. In particular, the infinite- | |
496 | * precision |d| is <= 2**-7. Rounding of G(i) to 8 bits | |
497 | * ensures that d is representable without extra precision for | |
498 | * this bound on |d| (since when this calculation is expressed | |
499 | * as x*G(i)-1, the multiplication needs as many extra bits as | |
500 | * G(i) has and the subtraction cancels 8 bits). But for | |
501 | * most i (107 cases out of 129), the infinite-precision |d| | |
502 | * is <= 2**-8. G(i) is rounded to 9 bits for such i to give | |
503 | * better accuracy (this works by improving the bound on |d|, | |
504 | * which in turn allows rounding to 9 bits in more cases). | |
505 | * This is only important when the original x is near 1 -- it | |
506 | * lets us avoid using a special method to give the desired | |
507 | * accuracy for such x. | |
508 | */ | |
509 | if (0) | |
510 | d = x * G(i) - 1; | |
511 | else { | |
512 | #ifdef USE_UTAB | |
513 | d = (x - H(i)) * G(i) + E(i); | |
514 | #else | |
515 | long double x_hi, x_lo; | |
516 | float fx_hi; | |
517 | ||
518 | /* | |
519 | * Split x into x_hi + x_lo to calculate x*G(i)-1 exactly. | |
520 | * G(i) has at most 9 bits, so the splitting point is not | |
521 | * critical. | |
522 | */ | |
523 | SET_FLOAT_WORD(fx_hi, (lx >> 40) | 0x3f800000); | |
524 | x_hi = fx_hi; | |
525 | x_lo = x - x_hi; | |
526 | d = x_hi * G(i) - 1 + x_lo * G(i); | |
527 | #endif | |
528 | } | |
529 | ||
530 | /* | |
531 | * Our algorithm depends on exact cancellation of F_lo(i) and | |
532 | * F_hi(i) with dk*ln_2_lo and dk*ln2_hi when k is -1 and i is | |
533 | * at the end of the table. This and other technical complications | |
534 | * make it difficult to avoid the double scaling in (dk*ln2) * | |
535 | * log(base) for base != e without losing more accuracy and/or | |
536 | * efficiency than is gained. | |
537 | */ | |
538 | z = d * d; | |
539 | val_lo = z * d * z * (z * (d * P8 + P7) + (d * P6 + P5)) + | |
540 | (F_lo(i) + dk * ln2_lo + z * d * (d * P4 + P3)) + z * P2; | |
541 | val_hi = d; | |
542 | #ifdef DEBUG | |
543 | if (fetestexcept(FE_UNDERFLOW)) | |
544 | breakpoint(); | |
545 | #endif | |
546 | ||
547 | _3sumF(val_hi, val_lo, F_hi(i) + dk * ln2_hi); | |
548 | RETURN2(rp, val_hi, val_lo); | |
549 | } | |
550 | ||
551 | long double | |
552 | log1pl(long double x) | |
553 | { | |
554 | long double d, d_hi, d_lo, dk, f_lo, val_hi, val_lo, z; | |
555 | long double f_hi, twopminusk; | |
556 | uint64_t ix, lx; | |
557 | int i, k; | |
558 | int16_t ax, hx; | |
559 | ||
560 | DOPRINT_START(&x); | |
561 | EXTRACT_LDBL80_WORDS(hx, lx, x); | |
562 | if (hx < 0x3fff) { /* x < 1, or x neg NaN */ | |
563 | ax = hx & 0x7fff; | |
564 | if (ax >= 0x3fff) { /* x <= -1, or x neg NaN */ | |
565 | if (ax == 0x3fff && lx == 0x8000000000000000ULL) | |
566 | RETURNP(-1 / zero); /* log1p(-1) = -Inf */ | |
567 | /* log1p(x < 1, or x [pseudo-]NaN) = qNaN: */ | |
568 | RETURNP((x - x) / (x - x)); | |
569 | } | |
570 | if (ax <= 0x3fbe) { /* |x| < 2**-64 */ | |
571 | if ((int)x == 0) | |
572 | RETURNP(x); /* x with inexact if x != 0 */ | |
573 | } | |
574 | f_hi = 1; | |
575 | f_lo = x; | |
576 | } else if (hx >= 0x7fff) { /* x +Inf or non-neg NaN */ | |
577 | RETURNP(x + x); /* log1p(Inf or NaN) = Inf or qNaN */ | |
578 | /* log1p(pseudo-Inf) = qNaN */ | |
579 | /* log1p(pseudo-NaN) = qNaN */ | |
580 | /* log1p(unnormal) = qNaN */ | |
581 | } else if (hx < 0x407f) { /* 1 <= x < 2**128 */ | |
582 | f_hi = x; | |
583 | f_lo = 1; | |
584 | } else { /* 2**128 <= x < +Inf */ | |
585 | f_hi = x; | |
586 | f_lo = 0; /* avoid underflow of the P5 term */ | |
587 | } | |
588 | ENTERI(); | |
589 | x = f_hi + f_lo; | |
590 | f_lo = (f_hi - x) + f_lo; | |
591 | ||
592 | EXTRACT_LDBL80_WORDS(hx, lx, x); | |
593 | k = -16383; | |
594 | ||
595 | k += hx; | |
596 | ix = lx & 0x7fffffffffffffffULL; | |
597 | dk = k; | |
598 | ||
599 | SET_LDBL_EXPSIGN(x, 0x3fff); | |
600 | twopminusk = 1; | |
601 | SET_LDBL_EXPSIGN(twopminusk, 0x7ffe - (hx & 0x7fff)); | |
602 | f_lo *= twopminusk; | |
603 | ||
604 | i = (ix + (1LL << (L2I - 2))) >> (L2I - 1); | |
605 | ||
606 | /* | |
607 | * x*G(i)-1 (with a reduced x) can be represented exactly, as | |
608 | * above, but now we need to evaluate the polynomial on d = | |
609 | * (x+f_lo)*G(i)-1 and extra precision is needed for that. | |
610 | * Since x+x_lo is a hi+lo decomposition and subtracting 1 | |
611 | * doesn't lose too many bits, an inexact calculation for | |
612 | * f_lo*G(i) is good enough. | |
613 | */ | |
614 | if (0) | |
615 | d_hi = x * G(i) - 1; | |
616 | else { | |
617 | #ifdef USE_UTAB | |
618 | d_hi = (x - H(i)) * G(i) + E(i); | |
619 | #else | |
620 | long double x_hi, x_lo; | |
621 | float fx_hi; | |
622 | ||
623 | SET_FLOAT_WORD(fx_hi, (lx >> 40) | 0x3f800000); | |
624 | x_hi = fx_hi; | |
625 | x_lo = x - x_hi; | |
626 | d_hi = x_hi * G(i) - 1 + x_lo * G(i); | |
627 | #endif | |
628 | } | |
629 | d_lo = f_lo * G(i); | |
630 | ||
631 | /* | |
632 | * This is _2sumF(d_hi, d_lo) inlined. The condition | |
633 | * (d_hi == 0 || |d_hi| >= |d_lo|) for using _2sumF() is not | |
634 | * always satisifed, so it is not clear that this works, but | |
635 | * it works in practice. It works even if it gives a wrong | |
636 | * normalized d_lo, since |d_lo| > |d_hi| implies that i is | |
637 | * nonzero and d is tiny, so the F(i) term dominates d_lo. | |
638 | * In float precision: | |
639 | * (By exhaustive testing, the worst case is d_hi = 0x1.bp-25. | |
640 | * And if d is only a little tinier than that, we would have | |
641 | * another underflow problem for the P3 term; this is also ruled | |
642 | * out by exhaustive testing.) | |
643 | */ | |
644 | d = d_hi + d_lo; | |
645 | d_lo = d_hi - d + d_lo; | |
646 | d_hi = d; | |
647 | ||
648 | z = d * d; | |
649 | val_lo = z * d * z * (z * (d * P8 + P7) + (d * P6 + P5)) + | |
650 | (F_lo(i) + dk * ln2_lo + d_lo + z * d * (d * P4 + P3)) + z * P2; | |
651 | val_hi = d_hi; | |
652 | #ifdef DEBUG | |
653 | if (fetestexcept(FE_UNDERFLOW)) | |
654 | breakpoint(); | |
655 | #endif | |
656 | ||
657 | _3sumF(val_hi, val_lo, F_hi(i) + dk * ln2_hi); | |
658 | RETURN2PI(val_hi, val_lo); | |
659 | } | |
660 | ||
661 | #ifdef STRUCT_RETURN | |
662 | ||
663 | long double | |
664 | logl(long double x) | |
665 | { | |
666 | struct ld r; | |
667 | ||
668 | ENTERI(); | |
669 | DOPRINT_START(&x); | |
670 | k_logl(x, &r); | |
671 | RETURNSPI(&r); | |
672 | } | |
673 | ||
674 | static const double | |
675 | invln10_hi = 4.3429448190317999e-1, /* 0x1bcb7b1526e000.0p-54 */ | |
676 | invln10_lo = 7.1842412889749798e-14, /* 0x1438ca9aadd558.0p-96 */ | |
677 | invln2_hi = 1.4426950408887933e0, /* 0x171547652b8000.0p-52 */ | |
678 | invln2_lo = 1.7010652264631490e-13; /* 0x17f0bbbe87fed0.0p-95 */ | |
679 | ||
680 | long double | |
681 | log10l(long double x) | |
682 | { | |
683 | struct ld r; | |
684 | long double hi, lo; | |
685 | ||
686 | ENTERI(); | |
687 | DOPRINT_START(&x); | |
688 | k_logl(x, &r); | |
689 | if (!r.lo_set) | |
690 | RETURNPI(r.hi); | |
691 | _2sumF(r.hi, r.lo); | |
692 | hi = (float)r.hi; | |
693 | lo = r.lo + (r.hi - hi); | |
694 | RETURN2PI(invln10_hi * hi, | |
695 | (invln10_lo + invln10_hi) * lo + invln10_lo * hi); | |
696 | } | |
697 | ||
698 | long double | |
699 | log2l(long double x) | |
700 | { | |
701 | struct ld r; | |
702 | long double hi, lo; | |
703 | ||
704 | ENTERI(); | |
705 | DOPRINT_START(&x); | |
706 | k_logl(x, &r); | |
707 | if (!r.lo_set) | |
708 | RETURNPI(r.hi); | |
709 | _2sumF(r.hi, r.lo); | |
710 | hi = (float)r.hi; | |
711 | lo = r.lo + (r.hi - hi); | |
712 | RETURN2PI(invln2_hi * hi, | |
713 | (invln2_lo + invln2_hi) * lo + invln2_lo * hi); | |
714 | } | |
715 | ||
716 | #endif /* STRUCT_RETURN */ |