5 * Fixed point arithmetic square root evaluation.
8 * void wm_sqrt(FPU_REG *n, unsigned int control_word)
11 * Copyright (C) 1992,1993,1994
12 * W. Metzenthen, 22 Parker St, Ormond, Vic 3163,
13 * Australia. E-mail billm@vaxc.cc.monash.edu.au
14 * All rights reserved.
16 * This copyright notice covers the redistribution and use of the
17 * FPU emulator developed by W. Metzenthen. It covers only its use
18 * in the 386BSD, FreeBSD and NetBSD operating systems. Any other
19 * use is not permitted under this copyright.
21 * Redistribution and use in source and binary forms, with or without
22 * modification, are permitted provided that the following conditions
24 * 1. Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * 2. Redistributions in binary form must include information specifying
27 * that source code for the emulator is freely available and include
29 * a) an offer to provide the source code for a nominal distribution
31 * b) list at least two alternative methods whereby the source
32 * can be obtained, e.g. a publically accessible bulletin board
33 * and an anonymous ftp site from which the software can be
35 * 3. All advertising materials specifically mentioning features or use of
36 * this emulator must acknowledge that it was developed by W. Metzenthen.
37 * 4. The name of W. Metzenthen may not be used to endorse or promote
38 * products derived from this software without specific prior written
41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
42 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
43 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
44 * W. METZENTHEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
46 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
47 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
48 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
49 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
50 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 * The purpose of this copyright, based upon the Berkeley copyright, is to
54 * ensure that the covered software remains freely available to everyone.
56 * The software (with necessary differences) is also available, but under
57 * the terms of the GNU copyleft, for the Linux operating system and for
58 * the djgpp ms-dos extender.
60 * W. Metzenthen June 1994.
63 * $FreeBSD: src/sys/gnu/i386/fpemul/wm_sqrt.s,v 1.9.2.1 2000/07/07 00:38:42 obrien Exp $
64 * $DragonFly: src/sys/i386/gnu/fpemul/Attic/wm_sqrt.s,v 1.2 2003/06/17 04:28:34 dillon Exp $
69 /*---------------------------------------------------------------------------+
70 | wm_sqrt(FPU_REG *n, unsigned int control_word) |
71 | returns the square root of n in n. |
73 | Use Newton's method to compute the square root of a number, which must |
74 | be in the range [1.0 .. 4.0), to 64 bits accuracy. |
75 | Does not check the sign or tag of the argument. |
76 | Sets the exponent, but not the sign or tag of the result. |
78 | The guess is kept in %esi:%edi |
79 +---------------------------------------------------------------------------*/
81 #include <gnu/i386/fpemul/fpu_asm.h>
98 /* The de-normalised argument:
100 // b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
101 // ^ binary point here */
103 .long 0 /* ms word */
107 .long 0 /* ls word, at most the ms bit is set */
124 /* We use a rough linear estimate for the first guess.. */
126 cmpl EXP_BIAS,EXP(%esi)
129 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
134 /* From here on, n is never accessed directly again until it is
135 // replaced by the answer. */
137 movl %eax,fsqrt_arg_2 /* ms word of n */
138 movl %ecx,fsqrt_arg_1
139 movl %edx,fsqrt_arg_0
141 /* Make a linear first estimate */
143 addl $0x40000000,%eax
144 movl $0xaaaaaaaa,%ecx
146 shll %edx /* max result was 7fff... */
147 testl $0x80000000,%edx /* but min was 3fff... */
148 jnz sqrt_prelim_no_adjust
150 movl $0x80000000,%edx /* round up */
152 sqrt_prelim_no_adjust:
153 movl %edx,%esi /* Our first guess */
155 /* We have now computed (approx) (2 + x) / 3, which forms the basis
156 for a few iterations of Newton's method */
158 movl fsqrt_arg_2,%ecx /* ms word */
160 /* From our initial estimate, three iterations are enough to get us
161 // to 30 bits or so. This will then allow two iterations at better
162 // precision to complete the process.
164 // Compute (g + n/g)/2 at each iteration (g is the guess). */
165 shrl %ecx /* Doing this first will prevent a divide */
166 /* overflow later. */
168 movl %ecx,%edx /* msw of the arg / 2 */
169 divl %esi /* current estimate */
170 shrl %esi /* divide by 2 */
171 addl %eax,%esi /* the new estimate */
183 /* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
184 // we improve it to 60 bits or so.
186 // The strategy from now on is to compute new estimates from
187 // guess := guess + (n - guess^2) / (2 * guess) */
189 /* First, find the square of the guess */
192 /* guess^2 now in %edx:%eax */
194 movl fsqrt_arg_1,%ecx
196 movl fsqrt_arg_2,%ecx /* ms word of normalized n */
198 jnc sqrt_stage_2_positive
199 /* subtraction gives a negative result
200 // negate the result before division */
211 jmp sqrt_stage_2_finish
213 sqrt_stage_2_positive:
226 sarl $1,%ecx /* divide by 2 */
229 /* Form the new estimate in %esi:%edi */
233 jnz sqrt_stage_2_done /* result should be [1..2) */
236 /* It should be possible to get here only if the arg is ffff....ffff*/
237 cmp $0xffffffff,fsqrt_arg_1
238 jnz sqrt_stage_2_error
241 /* The best rounded result.*/
246 movl $0x7fffffff,%eax
247 jmp sqrt_round_result
251 pushl EX_INTERNAL|0x213
257 /* Now the square root has been computed to better than 60 bits */
259 /* Find the square of the guess*/
260 movl %edi,%eax /* ls word of guess*/
281 /* guess^2 now in accum_3:accum_2:accum_1*/
283 movl fsqrt_arg_0,%eax /* get normalized n*/
285 movl fsqrt_arg_1,%eax
287 movl fsqrt_arg_2,%eax /* ms word of normalized n*/
289 jnc sqrt_stage_3_positive
291 /* subtraction gives a negative result*/
292 /* negate the result before division */
300 adcl $0,accum_3 /* This must be zero */
301 jz sqrt_stage_3_no_error
304 pushl EX_INTERNAL|0x207
307 sqrt_stage_3_no_error:
318 sarl $1,%ecx /* divide by 2*/
321 /* prepare to round the result*/
326 jmp sqrt_stage_3_finished
328 sqrt_stage_3_positive:
337 sarl $1,%ecx /* divide by 2*/
340 /* prepare to round the result*/
342 notl %eax /* Negate the correction term*/
345 adcl $0,%ecx /* carry here ==> correction == 0*/
346 adcl $0xffffffff,%esi
351 sqrt_stage_3_finished:
353 /* The result in %esi:%edi:%esi should be good to about 90 bits here,
354 // and the rounding information here does not have sufficient accuracy
355 // in a few rare cases. */
356 cmpl $0xffffffe0,%eax
359 cmpl $0x00000020,%eax
362 cmpl $0x7fffffe0,%eax
365 cmpl $0x80000020,%eax
366 jb sqrt_get_more_precision
369 /* Set up for rounding operations*/
374 movl EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0)*/
380 /* First, the estimate must be rounded up.*/
385 /* This is an easy case because x^1/2 is monotonic.
386 // We need just find the square of our estimate, compare it
387 // with the argument, and deduce whether our estimate is
388 // above, below, or exact. We use the fact that the estimate
389 // is known to be accurate to about 90 bits. */
390 movl %edi,%eax /* ls word of guess*/
392 movl %edx,%ebx /* 2nd ls word of square*/
393 movl %eax,%ecx /* ls word of square*/
402 jb sqrt_near_exact_ok
405 ja sqrt_near_exact_ok
407 pushl EX_INTERNAL|0x214
414 js sqrt_near_exact_small
416 jnz sqrt_near_exact_large
419 jnz sqrt_near_exact_large
421 /* Our estimate is exactly the right answer*/
423 jmp sqrt_round_result
425 sqrt_near_exact_small:
426 /* Our estimate is too small*/
427 movl $0x000000ff,%eax
428 jmp sqrt_round_result
430 sqrt_near_exact_large:
431 /* Our estimate is too large, we need to decrement it*/
434 movl $0xffffff00,%eax
435 jmp sqrt_round_result
438 sqrt_get_more_precision:
439 /* This case is almost the same as the above, except we start*/
440 /* with an extra bit of precision in the estimate.*/
441 stc /* The extra bit.*/
442 rcll $1,%edi /* Shift the estimate left one bit*/
445 movl %edi,%eax /* ls word of guess*/
447 movl %edx,%ebx /* 2nd ls word of square*/
448 movl %eax,%ecx /* ls word of square*/
455 /* Put our estimate back to its original value*/
457 rcrl $1,%esi /* Shift the estimate left one bit*/
467 pushl EX_INTERNAL|0x215
474 js sqrt_more_prec_small
476 jnz sqrt_more_prec_large
479 jnz sqrt_more_prec_large
481 /* Our estimate is exactly the right answer*/
482 movl $0x80000000,%eax
483 jmp sqrt_round_result
485 sqrt_more_prec_small:
486 /* Our estimate is too small*/
487 movl $0x800000ff,%eax
488 jmp sqrt_round_result
490 sqrt_more_prec_large:
491 /* Our estimate is too large*/
492 movl $0x7fffff00,%eax
493 jmp sqrt_round_result