| Commit | Line | Data |
|---|---|---|
| b34b60bc JS |
1 | /* |
| 2 | * Written by J.T. Conklin <jtc@NetBSD.org>. | |
| 3 | * Public domain. | |
| 4 | * | |
| 5 | * $NetBSD: s_log1pf.S,v 1.10 2003/09/16 18:17:11 wennmach Exp $ | |
| b34b60bc JS |
6 | */ |
| 7 | ||
| 8 | /* | |
| 9 | * Modified by Lex Wennmacher <wennmach@NetBSD.org> | |
| 10 | * Still public domain. | |
| 11 | */ | |
| 12 | ||
| 13 | #include <machine/asm.h> | |
| 14 | ||
| 15 | #include "abi.h" | |
| 16 | ||
| 17 | /* | |
| 18 | * The log1pf() function is provided to compute an accurate value of | |
| 19 | * log(1 + x), even for tiny values of x. The i387 FPU provides the | |
| 20 | * fyl2xp1 instruction for this purpose. However, the range of this | |
| 21 | * instruction is limited to: | |
| 22 | * -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1 | |
| 23 | * -0.292893 <= x <= 0.414214 | |
| 24 | * at least on older processor versions. | |
| 25 | * | |
| 26 | * log1pf() is implemented by testing the range of the argument. | |
| 27 | * If it is appropriate for fyl2xp1, this instruction is used. | |
| 28 | * Else, we compute log1pf(x) = ln(2)*ld(1 + x) the traditional way | |
| 29 | * (using fyl2x). | |
| 30 | * | |
| 31 | * The range testing costs speed, but as the rationale for the very | |
| 32 | * existence of this function is accuracy, we accept that. | |
| 33 | * | |
| 34 | * In order to reduce the cost for testing the range, we check if | |
| 35 | * the argument is in the range | |
| 36 | * -0.25 <= x <= 0.25 | |
| 37 | * which can be done with just one conditional branch. If x is | |
| 38 | * inside this range, we use fyl2xp1. Outside of this range, | |
| 39 | * the use of fyl2x is accurate enough. | |
| 40 | * | |
| 41 | */ | |
| 42 | ||
| 43 | .text | |
| 44 | .align 4 | |
| 45 | ENTRY(log1pf) | |
| 46 | XMM_ONE_ARG_FLOAT_PROLOGUE | |
| 47 | flds ARG_FLOAT_ONE | |
| 48 | fabs | |
| 49 | fld1 /* ... x 1 */ | |
| 50 | fadd %st(0) /* ... x 2 */ | |
| 51 | fadd %st(0) /* ... x 4 */ | |
| 52 | fld1 /* ... 4 1 */ | |
| 53 | fdivp /* ... x 0.25 */ | |
| 54 | fcompp | |
| 55 | fnstsw %ax | |
| 56 | andb $69,%ah | |
| 57 | jne use_fyl2x | |
| 58 | jmp use_fyl2xp1 | |
| 59 | ||
| 60 | .align 4 | |
| 61 | use_fyl2x: | |
| 62 | fldln2 | |
| 63 | flds ARG_FLOAT_ONE | |
| 64 | fld1 | |
| 65 | faddp | |
| 66 | fyl2x | |
| 67 | XMM_FLOAT_EPILOGUE | |
| 68 | ret | |
| 69 | ||
| 70 | .align 4 | |
| 71 | use_fyl2xp1: | |
| 72 | fldln2 | |
| 73 | flds ARG_FLOAT_ONE | |
| 74 | fyl2xp1 | |
| 75 | XMM_FLOAT_EPILOGUE | |
| 76 | ret | |
| d04e8698 | 77 | END(log1pf) |
| 70e34eb2 JM |
78 | |
| 79 | .section .note.GNU-stack,"",%progbits |