| Commit | Line | Data |
|---|---|---|
| fc3f9779 SS |
1 | /*- |
| 2 | * Copyright (c) 2003 Peter Wemm. | |
| 3 | * Copyright (c) 1990 Andrew Moore, Talke Studio | |
| 4 | * All rights reserved. | |
| 5 | * | |
| 6 | * Redistribution and use in source and binary forms, with or without | |
| 7 | * modification, are permitted provided that the following conditions | |
| 8 | * are met: | |
| 9 | * 1. Redistributions of source code must retain the above copyright | |
| 10 | * notice, this list of conditions and the following disclaimer. | |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer in the | |
| 13 | * documentation and/or other materials provided with the distribution. | |
| 14 | * 3. All advertising materials mentioning features or use of this software | |
| 15 | * must display the following acknowledgement: | |
| 16 | * This product includes software developed by the University of | |
| 17 | * California, Berkeley and its contributors. | |
| 18 | * 4. Neither the name of the University nor the names of its contributors | |
| 19 | * may be used to endorse or promote products derived from this software | |
| 20 | * without specific prior written permission. | |
| 21 | * | |
| 22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 32 | * SUCH DAMAGE. | |
| 33 | * | |
| 34 | * from: @(#) ieeefp.h 1.0 (Berkeley) 9/23/93 | |
| fe667cd2 | 35 | * $FreeBSD: src/sys/amd64/include/ieeefp.h,v 1.19 2008/01/11 17:11:32 bde Exp $ |
| fc3f9779 SS |
36 | */ |
| 37 | ||
| fc3f9779 SS |
38 | #ifndef _CPU_IEEEFP_H_ |
| 39 | #define _CPU_IEEEFP_H_ | |
| 40 | ||
| 41 | /* | |
| a2a636cc MD |
42 | * IEEE floating point type, constant and function definitions. |
| 43 | * XXX: {FP,SSE}*FLD and {FP,SSE}*OFF are undocumented pollution. | |
| 44 | */ | |
| 45 | ||
| 46 | /* | |
| 47 | * Rounding modes. | |
| fc3f9779 SS |
48 | */ |
| 49 | typedef enum { | |
| 50 | FP_RN=0, /* round to nearest */ | |
| a2a636cc MD |
51 | FP_RM, /* round down towards minus infinity */ |
| 52 | FP_RP, /* round up towards plus infinity */ | |
| fc3f9779 SS |
53 | FP_RZ /* truncate */ |
| 54 | } fp_rnd_t; | |
| 55 | ||
| 56 | /* | |
| a2a636cc | 57 | * Precision (i.e., rounding precision) modes. |
| fc3f9779 SS |
58 | */ |
| 59 | typedef enum { | |
| 60 | FP_PS=0, /* 24 bit (single-precision) */ | |
| 61 | FP_PRS, /* reserved */ | |
| 62 | FP_PD, /* 53 bit (double-precision) */ | |
| 63 | FP_PE /* 64 bit (extended-precision) */ | |
| 64 | } fp_prec_t; | |
| 65 | ||
| 66 | #define fp_except_t int | |
| 67 | ||
| 68 | /* | |
| a2a636cc | 69 | * Exception bit masks. |
| fc3f9779 SS |
70 | */ |
| 71 | #define FP_X_INV 0x01 /* invalid operation */ | |
| 72 | #define FP_X_DNML 0x02 /* denormal */ | |
| 73 | #define FP_X_DZ 0x04 /* zero divide */ | |
| 74 | #define FP_X_OFL 0x08 /* overflow */ | |
| 75 | #define FP_X_UFL 0x10 /* underflow */ | |
| 76 | #define FP_X_IMP 0x20 /* (im)precision */ | |
| 77 | #define FP_X_STK 0x40 /* stack fault */ | |
| 78 | ||
| 79 | /* | |
| a2a636cc | 80 | * FPU control word bit-field masks. |
| fc3f9779 | 81 | */ |
| a2a636cc MD |
82 | #define FP_MSKS_FLD 0x3f /* exception masks field */ |
| 83 | #define FP_PRC_FLD 0x300 /* precision control field */ | |
| 84 | #define FP_RND_FLD 0xc00 /* rounding control field */ | |
| fc3f9779 SS |
85 | |
| 86 | /* | |
| a2a636cc | 87 | * FPU status word bit-field masks. |
| fc3f9779 | 88 | */ |
| fc3f9779 SS |
89 | #define FP_STKY_FLD 0x3f /* sticky flags field */ |
| 90 | ||
| 91 | /* | |
| a2a636cc | 92 | * SSE mxcsr register bit-field masks. |
| fc3f9779 SS |
93 | */ |
| 94 | #define SSE_STKY_FLD 0x3f /* exception flags */ | |
| 95 | #define SSE_DAZ_FLD 0x40 /* Denormals are zero */ | |
| 96 | #define SSE_MSKS_FLD 0x1f80 /* exception masks field */ | |
| 97 | #define SSE_RND_FLD 0x6000 /* rounding control */ | |
| 98 | #define SSE_FZ_FLD 0x8000 /* flush to zero on underflow */ | |
| 99 | ||
| 100 | /* | |
| a2a636cc | 101 | * FPU control word bit-field offsets (shift counts). |
| fc3f9779 SS |
102 | */ |
| 103 | #define FP_MSKS_OFF 0 /* exception masks offset */ | |
| 104 | #define FP_PRC_OFF 8 /* precision control offset */ | |
| a2a636cc MD |
105 | #define FP_RND_OFF 10 /* rounding control offset */ |
| 106 | ||
| 107 | /* | |
| 108 | * FPU status word bit-field offsets (shift counts). | |
| 109 | */ | |
| fc3f9779 SS |
110 | #define FP_STKY_OFF 0 /* sticky flags offset */ |
| 111 | ||
| 112 | /* | |
| a2a636cc | 113 | * SSE mxcsr register bit-field offsets (shift counts). |
| fc3f9779 SS |
114 | */ |
| 115 | #define SSE_STKY_OFF 0 /* exception flags offset */ | |
| 116 | #define SSE_DAZ_OFF 6 /* DAZ exception mask offset */ | |
| 117 | #define SSE_MSKS_OFF 7 /* other exception masks offset */ | |
| 118 | #define SSE_RND_OFF 13 /* rounding control offset */ | |
| 119 | #define SSE_FZ_OFF 15 /* flush to zero offset */ | |
| 120 | ||
| 121 | #if defined(__GNUC__) && !defined(__cplusplus) | |
| 122 | ||
| 5a8d63ad PA |
123 | #define _fldcw(addr) __asm __volatile("fldcw %0" : : "m" (*(addr))) |
| 124 | #define _fldenv(addr) __asm __volatile("fldenv %0" : : "m" (*(addr))) | |
| 125 | #define _fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) | |
| 126 | #define _fnstenv(addr) __asm __volatile("fnstenv %0" : "=m" (*(addr))) | |
| 127 | #define _fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) | |
| 128 | #define _ldmxcsr(addr) __asm __volatile("ldmxcsr %0" : : "m" (*(addr))) | |
| 129 | #define _stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) | |
| fc3f9779 SS |
130 | |
| 131 | /* | |
| a2a636cc MD |
132 | * Load the control word. Be careful not to trap if there is a currently |
| 133 | * unmasked exception (ones that will become freshly unmasked are not a | |
| 134 | * problem). This case must be handled by a save/restore of the | |
| 135 | * environment or even of the full x87 state. Accessing the environment | |
| 136 | * is very inefficient, so only do it when necessary. | |
| 137 | */ | |
| 138 | static __inline void | |
| 5a8d63ad | 139 | _fnldcw(unsigned short _cw, unsigned short _newcw) |
| a2a636cc MD |
140 | { |
| 141 | struct { | |
| 142 | unsigned _cw; | |
| 143 | unsigned _other[6]; | |
| 144 | } _env; | |
| 145 | unsigned short _sw; | |
| 146 | ||
| 147 | if ((_cw & FP_MSKS_FLD) != FP_MSKS_FLD) { | |
| 5a8d63ad | 148 | _fnstsw(&_sw); |
| a2a636cc | 149 | if (((_sw & ~_cw) & FP_STKY_FLD) != 0) { |
| 5a8d63ad | 150 | _fnstenv(&_env); |
| a2a636cc | 151 | _env._cw = _newcw; |
| 5a8d63ad | 152 | _fldenv(&_env); |
| a2a636cc MD |
153 | return; |
| 154 | } | |
| 155 | } | |
| 5a8d63ad | 156 | _fldcw(&_newcw); |
| a2a636cc MD |
157 | } |
| 158 | ||
| 159 | /* | |
| fc3f9779 SS |
160 | * General notes about conflicting SSE vs FP status bits. |
| 161 | * This code assumes that software will not fiddle with the control | |
| 162 | * bits of the SSE and x87 in such a way to get them out of sync and | |
| 163 | * still expect this to work. Break this at your peril. | |
| 164 | * Because I based this on the i386 port, the x87 state is used for | |
| 165 | * the fpget*() functions, and is shadowed into the SSE state for | |
| 166 | * the fpset*() functions. For dual source fpget*() functions, I | |
| 167 | * merge the two together. I think. | |
| 168 | */ | |
| 169 | ||
| a2a636cc | 170 | static __inline fp_rnd_t |
| 5a8d63ad | 171 | _fpgetround(void) |
| fc3f9779 SS |
172 | { |
| 173 | unsigned short _cw; | |
| 174 | ||
| 5a8d63ad | 175 | _fnstcw(&_cw); |
| a2a636cc | 176 | return ((fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF)); |
| fc3f9779 SS |
177 | } |
| 178 | ||
| a2a636cc | 179 | static __inline fp_rnd_t |
| 5a8d63ad | 180 | _fpsetround(fp_rnd_t _m) |
| fc3f9779 | 181 | { |
| fc3f9779 | 182 | fp_rnd_t _p; |
| a2a636cc MD |
183 | unsigned _mxcsr; |
| 184 | unsigned short _cw, _newcw; | |
| fc3f9779 | 185 | |
| 5a8d63ad | 186 | _fnstcw(&_cw); |
| a2a636cc MD |
187 | _p = (fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF); |
| 188 | _newcw = _cw & ~FP_RND_FLD; | |
| 189 | _newcw |= (_m << FP_RND_OFF) & FP_RND_FLD; | |
| 5a8d63ad PA |
190 | _fnldcw(_cw, _newcw); |
| 191 | _stmxcsr(&_mxcsr); | |
| fc3f9779 SS |
192 | _mxcsr &= ~SSE_RND_FLD; |
| 193 | _mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD; | |
| 5a8d63ad | 194 | _ldmxcsr(&_mxcsr); |
| fc3f9779 SS |
195 | return (_p); |
| 196 | } | |
| 197 | ||
| 198 | /* | |
| a2a636cc | 199 | * Get or set the rounding precision for x87 arithmetic operations. |
| fc3f9779 SS |
200 | * There is no equivalent SSE mode or control. |
| 201 | */ | |
| a2a636cc MD |
202 | |
| 203 | static __inline fp_prec_t | |
| 5a8d63ad | 204 | _fpgetprec(void) |
| fc3f9779 SS |
205 | { |
| 206 | unsigned short _cw; | |
| 207 | ||
| 5a8d63ad | 208 | _fnstcw(&_cw); |
| a2a636cc | 209 | return ((fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF)); |
| fc3f9779 SS |
210 | } |
| 211 | ||
| a2a636cc | 212 | static __inline fp_prec_t |
| 5a8d63ad | 213 | _fpsetprec(fp_prec_t _m) |
| fc3f9779 | 214 | { |
| fc3f9779 | 215 | fp_prec_t _p; |
| a2a636cc | 216 | unsigned short _cw, _newcw; |
| fc3f9779 | 217 | |
| 5a8d63ad | 218 | _fnstcw(&_cw); |
| a2a636cc MD |
219 | _p = (fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF); |
| 220 | _newcw = _cw & ~FP_PRC_FLD; | |
| 221 | _newcw |= (_m << FP_PRC_OFF) & FP_PRC_FLD; | |
| 5a8d63ad | 222 | _fnldcw(_cw, _newcw); |
| fc3f9779 SS |
223 | return (_p); |
| 224 | } | |
| 225 | ||
| 226 | /* | |
| a2a636cc MD |
227 | * Get or set the exception mask. |
| 228 | * Note that the x87 mask bits are inverted by the API -- a mask bit of 1 | |
| 229 | * means disable for x87 and SSE, but for fp*mask() it means enable. | |
| fc3f9779 | 230 | */ |
| a2a636cc MD |
231 | |
| 232 | static __inline fp_except_t | |
| 5a8d63ad | 233 | _fpgetmask(void) |
| fc3f9779 SS |
234 | { |
| 235 | unsigned short _cw; | |
| 236 | ||
| 5a8d63ad | 237 | _fnstcw(&_cw); |
| a2a636cc | 238 | return ((~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF); |
| fc3f9779 SS |
239 | } |
| 240 | ||
| a2a636cc | 241 | static __inline fp_except_t |
| 5a8d63ad | 242 | _fpsetmask(fp_except_t _m) |
| fc3f9779 | 243 | { |
| fc3f9779 | 244 | fp_except_t _p; |
| a2a636cc MD |
245 | unsigned _mxcsr; |
| 246 | unsigned short _cw, _newcw; | |
| fc3f9779 | 247 | |
| 5a8d63ad | 248 | _fnstcw(&_cw); |
| a2a636cc MD |
249 | _p = (~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF; |
| 250 | _newcw = _cw & ~FP_MSKS_FLD; | |
| 251 | _newcw |= (~_m << FP_MSKS_OFF) & FP_MSKS_FLD; | |
| 5a8d63ad PA |
252 | _fnldcw(_cw, _newcw); |
| 253 | _stmxcsr(&_mxcsr); | |
| fc3f9779 SS |
254 | /* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */ |
| 255 | _mxcsr &= ~SSE_MSKS_FLD; | |
| a2a636cc | 256 | _mxcsr |= (~_m << SSE_MSKS_OFF) & SSE_MSKS_FLD; |
| 5a8d63ad | 257 | _ldmxcsr(&_mxcsr); |
| fc3f9779 SS |
258 | return (_p); |
| 259 | } | |
| 260 | ||
| a2a636cc | 261 | static __inline fp_except_t |
| 5a8d63ad | 262 | _fpgetsticky(void) |
| fc3f9779 | 263 | { |
| a2a636cc | 264 | unsigned _ex, _mxcsr; |
| fc3f9779 | 265 | unsigned short _sw; |
| fc3f9779 | 266 | |
| 5a8d63ad | 267 | _fnstsw(&_sw); |
| a2a636cc | 268 | _ex = (_sw & FP_STKY_FLD) >> FP_STKY_OFF; |
| 5a8d63ad | 269 | _stmxcsr(&_mxcsr); |
| a2a636cc MD |
270 | _ex |= (_mxcsr & SSE_STKY_FLD) >> SSE_STKY_OFF; |
| 271 | return ((fp_except_t)_ex); | |
| fc3f9779 SS |
272 | } |
| 273 | ||
| 274 | #endif /* __GNUC__ && !__cplusplus */ | |
| 275 | ||
| 276 | #if !defined(__IEEEFP_NOINLINES__) && !defined(__cplusplus) && defined(__GNUC__) | |
| 277 | ||
| 5a8d63ad PA |
278 | #define fpgetmask() _fpgetmask() |
| 279 | #define fpgetprec() _fpgetprec() | |
| 280 | #define fpgetround() _fpgetround() | |
| 281 | #define fpgetsticky() _fpgetsticky() | |
| 282 | #define fpsetmask(m) _fpsetmask(m) | |
| 283 | #define fpsetprec(m) _fpsetprec(m) | |
| 284 | #define fpsetround(m) _fpsetround(m) | |
| fc3f9779 SS |
285 | |
| 286 | /* Suppress prototypes in the MI header. */ | |
| 287 | #define _IEEEFP_INLINED_ 1 | |
| 288 | ||
| 289 | #else /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUC__ */ | |
| 290 | ||
| a2a636cc | 291 | /* Augment the userland declarations. */ |
| fc3f9779 | 292 | __BEGIN_DECLS |
| a2a636cc MD |
293 | fp_prec_t fpgetprec(void); |
| 294 | fp_prec_t fpsetprec(fp_prec_t); | |
| fc3f9779 SS |
295 | __END_DECLS |
| 296 | ||
| 297 | #endif /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUC__ */ | |
| 298 | ||
| 299 | #endif /* !_CPU_IEEEFP_H_ */ |