2 * linux/kernel/math/math_emulate.c
4 * (C) 1991 Linus Torvalds
6 * [expediant "port" of linux 8087 emulator to 386BSD, with apologies -wfj]
9 * $FreeBSD: src/sys/i386/i386/math_emulate.c,v 1.35 1999/08/28 00:43:47 peter Exp $
10 * $DragonFly: src/sys/i386/i386/Attic/math_emulate.c,v 1.3 2003/06/18 06:33:24 dillon Exp $
14 * Limited emulation 27.12.91 - mostly loads/stores, which gcc wants
15 * even for soft-float, unless you use bruce evans' patches. The patches
16 * are great, but they have to be re-applied for every version, and the
17 * library is different for soft-float and 80387. So emulation is more
18 * practical, even though it's slower.
20 * 28.12.91 - loads/stores work, even BCD. I'll have to start thinking
21 * about add/sub/mul/div. Urgel. I should find some good source, but I'll
22 * just fake up something.
24 * 30.12.91 - add/sub/mul/div/com seem to work mostly. I should really
25 * test every possible combination.
29 * This file is full of ugly macros etc: one problem was that gcc simply
30 * didn't want to make the structures as they should be: it has to try to
31 * align them. Sickening code, but at least I've hidden the ugly things
32 * in this one file: the other files don't need to know about these things.
34 * The other files also don't care about ST(x) etc - they just get addresses
35 * to 80-bit temporary reals, and do with them as they please. I wanted to
36 * hide most of the 387-specific things here.
39 #include <sys/param.h>
40 #include <sys/systm.h>
42 #include <machine/frame.h>
43 #include <machine/reg.h>
46 #include <sys/kernel.h>
51 #include <vm/vm_map.h>
54 #define __ALIGNED_TEMP_REAL 1
55 #include <i386/i386/math_emu.h>
57 #define bswapw(x) __asm__("xchgb %%al,%%ah":"=a" (x):"0" ((short)x))
58 #define ST(x) (*__st((x)))
59 #define PST(x) ((const temp_real *) __st((x)))
60 #define math_abort(tfp, signo) tfp->tf_eip = oldeip; return (signo);
63 * We don't want these inlined - it gets too messy in the machine-code.
65 static void fpop(void);
66 static void fpush(void);
67 static void fxchg(temp_real_unaligned * a, temp_real_unaligned * b);
68 static temp_real_unaligned * __st(int i);
71 get_fs_byte(char *adr)
72 { return(fubyte(adr)); }
75 get_fs_word(unsigned short *adr)
76 { return(fuword(adr)); }
79 get_fs_long(u_int32_t *adr)
80 { return(fuword(adr)); }
83 put_fs_byte(unsigned char val, char *adr)
84 { (void)subyte(adr,val); }
87 put_fs_word(unsigned short val, short *adr)
88 { (void)susword(adr,val); }
91 put_fs_long(u_long val, u_int32_t *adr)
92 { (void)suword(adr,val); }
95 math_emulate(struct trapframe * info)
103 /* YYY NOTE: pcb will be moved out of uarea! */
104 if ((((struct pcb *)curproc->p_addr)->pcb_flags & FP_SOFTFP) == 0) {
105 ((struct pcb *)curproc->p_addr)->pcb_flags |= FP_SOFTFP;
111 if (I387.cwd & I387.swd & 0x3f)
115 oldeip = info->tf_eip;
116 /* 0x001f means user code space */
117 if ((u_short)info->tf_cs != 0x001F) {
118 printf("math_emulate: %04x:%08lx\n", (u_short)info->tf_cs,
120 panic("?Math emulation needed in kernel?");
122 /* completely ignore an operand-size prefix */
123 if (get_fs_byte((char *) info->tf_eip) == 0x66)
125 code = get_fs_word((unsigned short *) info->tf_eip);
129 *(unsigned short *) &I387.fcs = (u_short) info->tf_cs;
130 *(1+(unsigned short *) &I387.fcs) = code;
133 case 0x1d0: /* fnop */
135 case 0x1d1: case 0x1d2: case 0x1d3: /* fst to 32-bit mem */
136 case 0x1d4: case 0x1d5: case 0x1d6: case 0x1d7:
137 math_abort(info,SIGILL);
138 case 0x1e0: /* fchs */
139 ST(0).exponent ^= 0x8000;
141 case 0x1e1: /* fabs */
142 ST(0).exponent &= 0x7fff;
144 case 0x1e2: case 0x1e3:
145 math_abort(info,SIGILL);
146 case 0x1e4: /* ftst */
149 case 0x1e5: /* fxam */
150 printf("fxam not implemented\n");
151 math_abort(info,SIGILL);
152 case 0x1e6: case 0x1e7: /* fldenv */
153 math_abort(info,SIGILL);
154 case 0x1e8: /* fld1 */
158 case 0x1e9: /* fld2t */
162 case 0x1ea: /* fld2e */
166 case 0x1eb: /* fldpi */
170 case 0x1ec: /* fldlg2 */
174 case 0x1ed: /* fldln2 */
178 case 0x1ee: /* fldz */
183 math_abort(info,SIGILL);
184 case 0x1f0: /* f2xm1 */
185 case 0x1f1: /* fyl2x */
186 case 0x1f2: /* fptan */
187 case 0x1f3: /* fpatan */
188 case 0x1f4: /* fxtract */
189 case 0x1f5: /* fprem1 */
190 case 0x1f6: /* fdecstp */
191 case 0x1f7: /* fincstp */
192 case 0x1f8: /* fprem */
193 case 0x1f9: /* fyl2xp1 */
194 case 0x1fa: /* fsqrt */
195 case 0x1fb: /* fsincos */
196 case 0x1fe: /* fsin */
197 case 0x1ff: /* fcos */
199 "math_emulate: instruction %04x not implemented\n",
201 math_abort(info,SIGILL);
202 case 0x1fc: /* frndint */
203 frndint(PST(0),&tmp);
204 real_to_real(&tmp,&ST(0));
206 case 0x1fd: /* fscale */
207 /* incomplete and totally inadequate -wfj */
208 Fscale(PST(0), PST(1), &tmp);
209 real_to_real(&tmp,&ST(0));
210 return(0); /* 19 Sep 92*/
211 case 0x2e9: /* ????? */
212 /* if this should be a fucomp ST(0),ST(1) , it must be a 0x3e9 ATS */
213 fucom(PST(1),PST(0));
216 case 0x3d0: case 0x3d1: /* fist ?? */
218 case 0x3e2: /* fclex */
221 case 0x3e3: /* fninit */
228 case 0x6d9: /* fcompp */
232 case 0x7e0: /* fstsw ax */
233 *(short *) &info->tf_eax = I387.swd;
237 case 0x18: /* fadd */
238 fadd(PST(0),PST(code & 7),&tmp);
239 real_to_real(&tmp,&ST(0));
241 case 0x19: /* fmul */
242 fmul(PST(0),PST(code & 7),&tmp);
243 real_to_real(&tmp,&ST(0));
245 case 0x1a: /* fcom */
246 fcom(PST(code & 7),PST(0));
248 case 0x1b: /* fcomp */
249 fcom(PST(code & 7),PST(0));
252 case 0x1c: /* fsubr */
253 real_to_real(&ST(code & 7),&tmp);
254 tmp.exponent ^= 0x8000;
255 fadd(PST(0),&tmp,&tmp);
256 real_to_real(&tmp,&ST(0));
258 case 0x1d: /* fsub */
259 ST(0).exponent ^= 0x8000;
260 fadd(PST(0),PST(code & 7),&tmp);
261 real_to_real(&tmp,&ST(0));
263 case 0x1e: /* fdivr */
264 fdiv(PST(0),PST(code & 7),&tmp);
265 real_to_real(&tmp,&ST(0));
267 case 0x1f: /* fdiv */
268 fdiv(PST(code & 7),PST(0),&tmp);
269 real_to_real(&tmp,&ST(0));
273 ST(0) = ST((code & 7)+1); /* why plus 1 ????? ATS */
275 case 0x39: /* fxch */
276 fxchg(&ST(0),&ST(code & 7));
278 case 0x3b: /* ??? ??? wrong ???? ATS */
279 ST(code & 7) = ST(0);
282 case 0x98: /* fadd */
283 fadd(PST(0),PST(code & 7),&tmp);
284 real_to_real(&tmp,&ST(code & 7));
286 case 0x99: /* fmul */
287 fmul(PST(0),PST(code & 7),&tmp);
288 real_to_real(&tmp,&ST(code & 7));
290 case 0x9a: /* ???? , my manual don't list a direction bit
291 for fcom , ??? ATS */
292 fcom(PST(code & 7),PST(0));
294 case 0x9b: /* same as above , ATS */
295 fcom(PST(code & 7),PST(0));
298 case 0x9c: /* fsubr */
299 ST(code & 7).exponent ^= 0x8000;
300 fadd(PST(0),PST(code & 7),&tmp);
301 real_to_real(&tmp,&ST(code & 7));
303 case 0x9d: /* fsub */
304 real_to_real(&ST(0),&tmp);
305 tmp.exponent ^= 0x8000;
306 fadd(PST(code & 7),&tmp,&tmp);
307 real_to_real(&tmp,&ST(code & 7));
309 case 0x9e: /* fdivr */
310 fdiv(PST(0),PST(code & 7),&tmp);
311 real_to_real(&tmp,&ST(code & 7));
313 case 0x9f: /* fdiv */
314 fdiv(PST(code & 7),PST(0),&tmp);
315 real_to_real(&tmp,&ST(code & 7));
317 case 0xb8: /* ffree */
318 printf("ffree not implemented\n");
319 math_abort(info,SIGILL);
320 case 0xb9: /* fstp ???? where is the pop ? ATS */
321 fxchg(&ST(0),&ST(code & 7));
324 ST(code & 7) = ST(0);
326 case 0xbb: /* ????? encoding of fstp to mem ? ATS */
327 ST(code & 7) = ST(0);
330 case 0xbc: /* fucom */
331 fucom(PST(code & 7),PST(0));
333 case 0xbd: /* fucomp */
334 fucom(PST(code & 7),PST(0));
337 case 0xd8: /* faddp */
338 fadd(PST(code & 7),PST(0),&tmp);
339 real_to_real(&tmp,&ST(code & 7));
342 case 0xd9: /* fmulp */
343 fmul(PST(code & 7),PST(0),&tmp);
344 real_to_real(&tmp,&ST(code & 7));
347 case 0xda: /* ??? encoding of ficom with 16 bit mem ? ATS */
348 fcom(PST(code & 7),PST(0));
351 case 0xdc: /* fsubrp */
352 ST(code & 7).exponent ^= 0x8000;
353 fadd(PST(0),PST(code & 7),&tmp);
354 real_to_real(&tmp,&ST(code & 7));
357 case 0xdd: /* fsubp */
358 real_to_real(&ST(0),&tmp);
359 tmp.exponent ^= 0x8000;
360 fadd(PST(code & 7),&tmp,&tmp);
361 real_to_real(&tmp,&ST(code & 7));
364 case 0xde: /* fdivrp */
365 fdiv(PST(0),PST(code & 7),&tmp);
366 real_to_real(&tmp,&ST(code & 7));
369 case 0xdf: /* fdivp */
370 fdiv(PST(code & 7),PST(0),&tmp);
371 real_to_real(&tmp,&ST(code & 7));
374 case 0xf8: /* fild 16-bit mem ???? ATS */
375 printf("ffree not implemented\n");
376 math_abort(info,SIGILL);
379 case 0xf9: /* ????? ATS */
380 fxchg(&ST(0),&ST(code & 7));
382 case 0xfa: /* fist 16-bit mem ? ATS */
383 case 0xfb: /* fistp 16-bit mem ? ATS */
384 ST(code & 7) = ST(0);
388 switch ((code>>3) & 0xe7) {
390 put_short_real(PST(0),info,code);
393 put_short_real(PST(0),info,code);
397 address = ea(info,code);
398 for (code = 0 ; code < 7 ; code++) {
399 ((int32_t *) & I387)[code] =
400 get_fs_long((u_int32_t *) address);
405 address = ea(info,code);
406 *(unsigned short *) &I387.cwd =
407 get_fs_word((unsigned short *) address);
410 address = ea(info,code);
411 /*verify_area(address,28);*/
412 for (code = 0 ; code < 7 ; code++) {
413 put_fs_long( ((int32_t *) & I387)[code],
414 (u_int32_t *) address);
419 address = ea(info,code);
420 /*verify_area(address,2);*/
421 put_fs_word(I387.cwd,(short *) address);
424 put_long_int(PST(0),info,code);
427 put_long_int(PST(0),info,code);
432 get_temp_real(&tmp,info,code);
433 real_to_real(&tmp,&ST(0));
436 put_temp_real(PST(0),info,code);
440 put_long_real(PST(0),info,code);
443 put_long_real(PST(0),info,code);
447 address = ea(info,code);
448 for (code = 0 ; code < 27 ; code++) {
449 ((int32_t *) & I387)[code] =
450 get_fs_long((u_int32_t *) address);
455 address = ea(info,code);
456 /*verify_area(address,108);*/
457 for (code = 0 ; code < 27 ; code++) {
458 put_fs_long( ((int32_t *) & I387)[code],
459 (u_int32_t *) address);
467 address = ea(info,code);
468 /*verify_area(address,2);*/
469 put_fs_word(I387.swd,(short *) address);
472 put_short_int(PST(0),info,code);
475 put_short_int(PST(0),info,code);
480 get_BCD(&tmp,info,code);
481 real_to_real(&tmp,&ST(0));
485 get_longlong_int(&tmp,info,code);
486 real_to_real(&tmp,&ST(0));
489 put_BCD(PST(0),info,code);
493 put_longlong_int(PST(0),info,code);
499 get_short_real(&tmp,info,code);
502 get_long_int(&tmp,info,code);
505 get_long_real(&tmp,info,code);
508 get_short_int(&tmp,info,code);
510 switch ((code>>3) & 0x27) {
512 fadd(&tmp,PST(0),&tmp);
513 real_to_real(&tmp,&ST(0));
516 fmul(&tmp,PST(0),&tmp);
517 real_to_real(&tmp,&ST(0));
527 tmp.exponent ^= 0x8000;
528 fadd(&tmp,PST(0),&tmp);
529 real_to_real(&tmp,&ST(0));
532 ST(0).exponent ^= 0x8000;
533 fadd(&tmp,PST(0),&tmp);
534 real_to_real(&tmp,&ST(0));
537 fdiv(PST(0),&tmp,&tmp);
538 real_to_real(&tmp,&ST(0));
541 fdiv(&tmp,PST(0),&tmp);
542 real_to_real(&tmp,&ST(0));
545 if ((code & 0x138) == 0x100) {
547 real_to_real(&tmp,&ST(0));
550 printf("Unknown math-insns: %04x:%08x %04x\n",(u_short)info->tf_cs,
552 math_abort(info,SIGFPE);
560 tmp = I387.swd & 0xffffc7ffUL;
561 I387.swd += 0x00000800;
562 I387.swd &= 0x00003800;
571 tmp = I387.swd & 0xffffc7ffUL;
572 I387.swd += 0x00003800;
573 I387.swd &= 0x00003800;
578 fxchg(temp_real_unaligned * a, temp_real_unaligned * b)
580 temp_real_unaligned c;
587 static temp_real_unaligned *
592 return (temp_real_unaligned *) (i*10 + (char *)(I387.st_space));
596 * linux/kernel/math/ea.c
598 * (C) 1991 Linus Torvalds
602 * Calculate the effective address.
606 static int __regoffset[] = {
607 tEAX, tECX, tEDX, tEBX, tESP, tEBP, tESI, tEDI
610 #define REG(x) (((int *)curproc->p_md.md_regs)[__regoffset[(x)]])
613 sib(struct trapframe * info, int mod)
615 unsigned char ss,index,base;
618 base = get_fs_byte((char *) info->tf_eip);
621 index = (base >> 3) & 7;
628 if (mod || base != 5)
631 offset += (signed char) get_fs_byte((char *) info->tf_eip);
633 } else if (mod == 2 || base == 5) {
634 offset += (signed) get_fs_long((u_int32_t *) info->tf_eip);
639 return (char *) offset;
643 ea(struct trapframe * info, unsigned short code)
645 unsigned char mod,rm;
649 mod = (code >> 6) & 3;
651 if (rm == 4 && mod != 3)
652 return sib(info,mod);
653 if (rm == 5 && !mod) {
654 offset = get_fs_long((u_int32_t *) info->tf_eip);
658 return (char *) offset;
660 tmp = (int32_t *) ®(rm);
662 case 0: offset = 0; break;
664 offset = (signed char) get_fs_byte((char *) info->tf_eip);
668 offset = (signed) get_fs_long((u_int32_t *) info->tf_eip);
673 math_abort(info,1<<(SIGILL-1));
678 return offset + (char *) *tmp;
681 * linux/kernel/math/get_put.c
683 * (C) 1991 Linus Torvalds
687 * This file handles all accesses to user memory: getting and putting
688 * ints/reals/BCD etc. This is the only part that concerns itself with
689 * other than temporary real format. All other cals are strictly temp_real.
693 get_short_real(temp_real * tmp, struct trapframe * info, unsigned short code)
698 addr = ea(info,code);
699 sr = get_fs_long((u_int32_t *) addr);
700 short_to_temp(&sr,tmp);
704 get_long_real(temp_real * tmp, struct trapframe * info, unsigned short code)
709 addr = ea(info,code);
710 lr.a = get_fs_long((u_int32_t *) addr);
711 lr.b = get_fs_long(1 + (u_int32_t *) addr);
712 long_to_temp(&lr,tmp);
716 get_temp_real(temp_real * tmp, struct trapframe * info, unsigned short code)
720 addr = ea(info,code);
721 tmp->a = get_fs_long((u_int32_t *) addr);
722 tmp->b = get_fs_long(1 + (u_int32_t *) addr);
723 tmp->exponent = get_fs_word(4 + (unsigned short *) addr);
727 get_short_int(temp_real * tmp, struct trapframe * info, unsigned short code)
732 addr = ea(info,code);
733 ti.a = (signed short) get_fs_word((unsigned short *) addr);
735 if ((ti.sign = (ti.a < 0)) != 0)
737 int_to_real(&ti,tmp);
741 get_long_int(temp_real * tmp, struct trapframe * info, unsigned short code)
746 addr = ea(info,code);
747 ti.a = get_fs_long((u_int32_t *) addr);
749 if ((ti.sign = (ti.a < 0)) != 0)
751 int_to_real(&ti,tmp);
755 get_longlong_int(temp_real * tmp, struct trapframe * info, unsigned short code)
760 addr = ea(info,code);
761 ti.a = get_fs_long((u_int32_t *) addr);
762 ti.b = get_fs_long(1 + (u_int32_t *) addr);
763 if ((ti.sign = (ti.b < 0)) != 0)
764 __asm__("notl %0 ; notl %1\n\t"
765 "addl $1,%0 ; adcl $0,%1"
766 :"=r" (ti.a),"=r" (ti.b)
767 :"0" (ti.a),"1" (ti.b));
768 int_to_real(&ti,tmp);
771 #define MUL10(low,high) \
772 __asm__("addl %0,%0 ; adcl %1,%1\n\t" \
773 "movl %0,%%ecx ; movl %1,%%ebx\n\t" \
774 "addl %0,%0 ; adcl %1,%1\n\t" \
775 "addl %0,%0 ; adcl %1,%1\n\t" \
776 "addl %%ecx,%0 ; adcl %%ebx,%1" \
777 :"=a" (low),"=d" (high) \
778 :"0" (low),"1" (high):"cx","bx")
780 #define ADD64(val,low,high) \
781 __asm__("addl %4,%0 ; adcl $0,%1":"=r" (low),"=r" (high) \
782 :"0" (low),"1" (high),"r" ((u_int32_t) (val)))
785 get_BCD(temp_real * tmp, struct trapframe * info, unsigned short code)
792 addr = ea(info,code);
794 i.sign = 0x80 & get_fs_byte(addr--);
796 for (k = 0; k < 9; k++) {
797 c = get_fs_byte(addr--);
799 ADD64((c>>4), i.a, i.b);
801 ADD64((c&0xf), i.a, i.b);
807 put_short_real(const temp_real * tmp,
808 struct trapframe * info, unsigned short code)
813 addr = ea(info,code);
814 /*verify_area(addr,4);*/
815 temp_to_short(tmp,&sr);
816 put_fs_long(sr,(u_int32_t *) addr);
820 put_long_real(const temp_real * tmp,
821 struct trapframe * info, unsigned short code)
826 addr = ea(info,code);
827 /*verify_area(addr,8);*/
828 temp_to_long(tmp,&lr);
829 put_fs_long(lr.a, (u_int32_t *) addr);
830 put_fs_long(lr.b, 1 + (u_int32_t *) addr);
834 put_temp_real(const temp_real * tmp,
835 struct trapframe * info, unsigned short code)
839 addr = ea(info,code);
840 /*verify_area(addr,10);*/
841 put_fs_long(tmp->a, (u_int32_t *) addr);
842 put_fs_long(tmp->b, 1 + (u_int32_t *) addr);
843 put_fs_word(tmp->exponent, 4 + (short *) addr);
847 put_short_int(const temp_real * tmp,
848 struct trapframe * info, unsigned short code)
853 addr = ea(info,code);
854 real_to_int(tmp,&ti);
855 /*verify_area(addr,2);*/
858 put_fs_word(ti.a,(short *) addr);
862 put_long_int(const temp_real * tmp,
863 struct trapframe * info, unsigned short code)
868 addr = ea(info,code);
869 real_to_int(tmp,&ti);
870 /*verify_area(addr,4);*/
873 put_fs_long(ti.a,(u_int32_t *) addr);
877 put_longlong_int(const temp_real * tmp,
878 struct trapframe * info, unsigned short code)
883 addr = ea(info,code);
884 real_to_int(tmp,&ti);
885 /*verify_area(addr,8);*/
887 __asm__("notl %0 ; notl %1\n\t"
888 "addl $1,%0 ; adcl $0,%1"
889 :"=r" (ti.a),"=r" (ti.b)
890 :"0" (ti.a),"1" (ti.b));
891 put_fs_long(ti.a,(u_int32_t *) addr);
892 put_fs_long(ti.b,1 + (u_int32_t *) addr);
895 #define DIV10(low,high,rem) \
896 __asm__("divl %6 ; xchgl %1,%2 ; divl %6" \
897 :"=d" (rem),"=a" (low),"=r" (high) \
898 :"0" (0),"1" (high),"2" (low),"c" (10))
901 put_BCD(const temp_real * tmp,struct trapframe * info, unsigned short code)
908 addr = ea(info,code);
909 /*verify_area(addr,10);*/
912 put_fs_byte(0x80, addr+9);
914 put_fs_byte(0, addr+9);
915 for (k = 0; k < 9; k++) {
920 put_fs_byte(c,addr++);
925 * linux/kernel/math/mul.c
927 * (C) 1991 Linus Torvalds
931 * temporary real multiplication routine.
938 __asm__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
939 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
940 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
941 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
946 mul64(const temp_real * a, const temp_real * b, int * c)
948 __asm__("movl (%0),%%eax\n\t"
950 "movl %%eax,(%2)\n\t"
951 "movl %%edx,4(%2)\n\t"
952 "movl 4(%0),%%eax\n\t"
954 "movl %%eax,8(%2)\n\t"
955 "movl %%edx,12(%2)\n\t"
956 "movl (%0),%%eax\n\t"
958 "addl %%eax,4(%2)\n\t"
959 "adcl %%edx,8(%2)\n\t"
961 "movl 4(%0),%%eax\n\t"
963 "addl %%eax,4(%2)\n\t"
964 "adcl %%edx,8(%2)\n\t"
966 ::"S" (a),"c" (b),"D" (c)
971 fmul(const temp_real * src1, const temp_real * src2, temp_real * result)
974 int tmp[4] = {0,0,0,0};
976 sign = (src1->exponent ^ src2->exponent) & 0x8000;
977 i = (src1->exponent & 0x7fff) + (src2->exponent & 0x7fff) - 16383 + 1;
979 result->exponent = sign;
980 result->a = result->b = 0;
987 mul64(src1,src2,tmp);
988 if (tmp[0] || tmp[1] || tmp[2] || tmp[3])
989 while (i && tmp[3] >= 0) {
995 result->exponent = i | sign;
1001 * linux/kernel/math/div.c
1003 * (C) 1991 Linus Torvalds
1007 * temporary real division routine.
1013 __asm__ __volatile__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
1014 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
1015 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
1016 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
1021 shift_right(int * c)
1023 __asm__("shrl $1,12(%0) ; rcrl $1,8(%0) ; rcrl $1,4(%0) ; rcrl $1,(%0)"
1028 try_sub(int * a, int * b)
1032 __asm__ __volatile__("movl (%1),%%eax ; subl %%eax,(%2)\n\t"
1033 "movl 4(%1),%%eax ; sbbl %%eax,4(%2)\n\t"
1034 "movl 8(%1),%%eax ; sbbl %%eax,8(%2)\n\t"
1035 "movl 12(%1),%%eax ; sbbl %%eax,12(%2)\n\t"
1036 "setae %%al":"=a" (ok):"c" (a),"d" (b));
1041 div64(int * a, int * b, int * c)
1045 unsigned int mask = 0;
1048 for (i = 0 ; i<64 ; i++) {
1049 if (!(mask >>= 1)) {
1051 mask = 0x80000000UL;
1053 tmp[0] = a[0]; tmp[1] = a[1];
1054 tmp[2] = a[2]; tmp[3] = a[3];
1055 if (try_sub(b,tmp)) {
1057 a[0] = tmp[0]; a[1] = tmp[1];
1058 a[2] = tmp[2]; a[3] = tmp[3];
1065 fdiv(const temp_real * src1, const temp_real * src2, temp_real * result)
1068 int a[4],b[4],tmp[4] = {0,0,0,0};
1070 sign = (src1->exponent ^ src2->exponent) & 0x8000;
1071 if (!(src2->a || src2->b)) {
1075 i = (src1->exponent & 0x7fff) - (src2->exponent & 0x7fff) + 16383;
1078 result->exponent = sign;
1079 result->a = result->b = 0;
1093 if (tmp[0] || tmp[1] || tmp[2] || tmp[3]) {
1094 while (i && tmp[3] >= 0) {
1106 if (tmp[0] || tmp[1])
1108 result->exponent = i | sign;
1114 * linux/kernel/math/add.c
1116 * (C) 1991 Linus Torvalds
1120 * temporary real addition routine.
1122 * NOTE! These aren't exact: they are only 62 bits wide, and don't do
1123 * correct rounding. Fast hack. The reason is that we shift right the
1124 * values by two, in order not to have overflow (1 bit), and to be able
1125 * to move the sign into the mantissa (1 bit). Much simpler algorithms,
1126 * and 62 bits (61 really - no rounding) accuracy is usually enough. The
1127 * only time you should notice anything weird is when adding 64-bit
1128 * integers together. When using doubles (52 bits accuracy), the
1129 * 61-bit accuracy never shows at all.
1133 __asm__("notl %0 ; notl %1 ; addl $1,%0 ; adcl $0,%1" \
1134 :"=r" (a->a),"=r" (a->b) \
1135 :"0" (a->a),"1" (a->b))
1137 static void signify(temp_real * a)
1140 __asm__("shrdl $2,%1,%0 ; shrl $2,%1"
1141 :"=r" (a->a),"=r" (a->b)
1142 :"0" (a->a),"1" (a->b));
1143 if (a->exponent < 0)
1145 a->exponent &= 0x7fff;
1148 static void unsignify(temp_real * a)
1150 if (!(a->a || a->b)) {
1154 a->exponent &= 0x7fff;
1157 a->exponent |= 0x8000;
1161 __asm__("addl %0,%0 ; adcl %1,%1"
1162 :"=r" (a->a),"=r" (a->b)
1163 :"0" (a->a),"1" (a->b));
1168 fadd(const temp_real * src1, const temp_real * src2, temp_real * result)
1173 x1 = src1->exponent & 0x7fff;
1174 x2 = src2->exponent & 0x7fff;
1193 __asm__("shrdl %4,%1,%0 ; shrl %4,%1"
1194 :"=r" (b.a),"=r" (b.b)
1195 :"0" (b.a),"1" (b.b),"c" ((char) shift));
1198 __asm__("addl %4,%0 ; adcl %5,%1"
1199 :"=r" (a.a),"=r" (a.b)
1200 :"0" (a.a),"1" (a.b),"g" (b.a),"g" (b.b));
1206 * linux/kernel/math/compare.c
1208 * (C) 1991 Linus Torvalds
1212 * temporary real comparison routines
1216 #define clear_Cx() (I387.swd &= ~0x4500)
1219 normalize(temp_real * a)
1221 int i = a->exponent & 0x7fff;
1222 int sign = a->exponent & 0x8000;
1224 if (!(a->a || a->b)) {
1228 while (i && a->b >= 0) {
1230 __asm__("addl %0,%0 ; adcl %1,%1"
1231 :"=r" (a->a),"=r" (a->b)
1232 :"0" (a->a),"1" (a->b));
1234 a->exponent = i | sign;
1238 ftst(const temp_real * a)
1245 if (b.a || b.b || b.exponent) {
1253 fcom(const temp_real * src1, const temp_real * src2)
1258 a.exponent ^= 0x8000;
1264 fucom(const temp_real * src1, const temp_real * src2)
1270 * linux/kernel/math/convert.c
1272 * (C) 1991 Linus Torvalds
1277 * NOTE!!! There is some "non-obvious" optimisations in the temp_to_long
1278 * and temp_to_short conversion routines: don't touch them if you don't
1279 * know what's going on. They are the adding of one in the rounding: the
1280 * overflow bit is also used for adding one into the exponent. Thus it
1281 * looks like the overflow would be incorrectly handled, but due to the
1282 * way the IEEE numbers work, things are correct.
1284 * There is no checking for total overflow in the conversions, though (ie
1285 * if the temp-real number simply won't fit in a short- or long-real.)
1289 short_to_temp(const short_real * a, temp_real * b)
1291 if (!(*a & 0x7fffffff)) {
1294 b->exponent = 0x8000;
1299 b->exponent = ((*a>>23) & 0xff)-127+16383;
1301 b->exponent |= 0x8000;
1302 b->b = (*a<<8) | 0x80000000UL;
1307 long_to_temp(const long_real * a, temp_real * b)
1309 if (!a->a && !(a->b & 0x7fffffff)) {
1312 b->exponent = 0x8000;
1317 b->exponent = ((a->b >> 20) & 0x7ff)-1023+16383;
1319 b->exponent |= 0x8000;
1320 b->b = 0x80000000UL | (a->b<<11) | (((u_int32_t)a->a)>>21);
1325 temp_to_short(const temp_real * a, short_real * b)
1327 if (!(a->exponent & 0x7fff)) {
1328 *b = (a->exponent)?0x80000000UL:0;
1331 *b = ((((int32_t) a->exponent)-16383+127) << 23) & 0x7f800000;
1332 if (a->exponent < 0)
1334 *b |= (a->b >> 8) & 0x007fffff;
1335 switch ((int)ROUNDING) {
1337 if ((a->b & 0xff) > 0x80)
1341 if ((a->exponent & 0x8000) && (a->b & 0xff))
1345 if (!(a->exponent & 0x8000) && (a->b & 0xff))
1352 temp_to_long(const temp_real * a, long_real * b)
1354 if (!(a->exponent & 0x7fff)) {
1356 b->b = (a->exponent)?0x80000000UL:0;
1359 b->b = (((0x7fff & (int32_t) a->exponent)-16383+1023) << 20) &
1361 if (a->exponent < 0)
1362 b->b |= 0x80000000UL;
1363 b->b |= (a->b >> 11) & 0x000fffff;
1365 b->a |= (a->a >> 11) & 0x001fffff;
1366 switch ((int)ROUNDING) {
1368 if ((a->a & 0x7ff) > 0x400)
1369 __asm__("addl $1,%0 ; adcl $0,%1"
1370 :"=r" (b->a),"=r" (b->b)
1371 :"0" (b->a),"1" (b->b));
1374 if ((a->exponent & 0x8000) && (a->b & 0xff))
1375 __asm__("addl $1,%0 ; adcl $0,%1"
1376 :"=r" (b->a),"=r" (b->b)
1377 :"0" (b->a),"1" (b->b));
1380 if (!(a->exponent & 0x8000) && (a->b & 0xff))
1381 __asm__("addl $1,%0 ; adcl $0,%1"
1382 :"=r" (b->a),"=r" (b->b)
1383 :"0" (b->a),"1" (b->b));
1389 frndint(const temp_real * a, temp_real * b)
1391 int shift = 16383 + 63 - (a->exponent & 0x7fff);
1392 u_int32_t underflow;
1394 if ((shift < 0) || (shift == 16383+63)) {
1398 b->a = b->b = underflow = 0;
1399 b->exponent = a->exponent;
1401 b->b = a->b; b->a = a->a;
1402 } else if (shift < 64) {
1403 b->a = a->b; underflow = a->a;
1406 } else if (shift < 96) {
1414 b->exponent += shift;
1415 __asm__("shrdl %2,%1,%0"
1416 :"=r" (underflow),"=r" (b->a)
1417 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1418 __asm__("shrdl %2,%1,%0"
1419 :"=r" (b->a),"=r" (b->b)
1420 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1421 __asm__("shrl %1,%0"
1423 :"c" ((char) shift),"0" (b->b));
1424 switch ((int)ROUNDING) {
1426 __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1427 :"=r" (b->a),"=r" (b->b)
1428 :"0" (b->a),"1" (b->b)
1429 ,"r" (0x7fffffff + (b->a & 1))
1430 ,"m" (*&underflow));
1433 if ((b->exponent >= 0) && underflow)
1434 __asm__("addl $1,%0 ; adcl $0,%1"
1435 :"=r" (b->a),"=r" (b->b)
1436 :"0" (b->a),"1" (b->b));
1439 if ((b->exponent < 0) && underflow)
1440 __asm__("addl $1,%0 ; adcl $0,%1"
1441 :"=r" (b->a),"=r" (b->b)
1442 :"0" (b->a),"1" (b->b));
1448 __asm__("addl %0,%0 ; adcl %1,%1"
1449 :"=r" (b->a),"=r" (b->b)
1450 :"0" (b->a),"1" (b->b));
1457 Fscale(const temp_real *a, const temp_real *b, temp_real *c)
1462 if(!c->a && !c->b) { /* 19 Sep 92*/
1466 real_to_int(b, &ti);
1468 c->exponent -= ti.a;
1470 c->exponent += ti.a;
1474 real_to_int(const temp_real * a, temp_int * b)
1476 int shift = 16383 + 63 - (a->exponent & 0x7fff);
1477 u_int32_t underflow;
1479 b->a = b->b = underflow = 0;
1480 b->sign = (a->exponent < 0);
1486 b->b = a->b; b->a = a->a;
1487 } else if (shift < 64) {
1488 b->a = a->b; underflow = a->a;
1490 } else if (shift < 96) {
1497 __asm__("shrdl %2,%1,%0"
1498 :"=r" (underflow),"=r" (b->a)
1499 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1500 __asm__("shrdl %2,%1,%0"
1501 :"=r" (b->a),"=r" (b->b)
1502 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1503 __asm__("shrl %1,%0"
1505 :"c" ((char) shift),"0" (b->b));
1506 switch ((int)ROUNDING) {
1508 __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1509 :"=r" (b->a),"=r" (b->b)
1510 :"0" (b->a),"1" (b->b)
1511 ,"r" (0x7fffffff + (b->a & 1))
1512 ,"m" (*&underflow));
1515 if (!b->sign && underflow)
1516 __asm__("addl $1,%0 ; adcl $0,%1"
1517 :"=r" (b->a),"=r" (b->b)
1518 :"0" (b->a),"1" (b->b));
1521 if (b->sign && underflow)
1522 __asm__("addl $1,%0 ; adcl $0,%1"
1523 :"=r" (b->a),"=r" (b->b)
1524 :"0" (b->a),"1" (b->b));
1530 int_to_real(const temp_int * a, temp_real * b)
1535 b->exponent = 16383 + 63 + (a->sign? 0x8000:0);
1542 __asm__("addl %0,%0 ; adcl %1,%1"
1543 :"=r" (b->a),"=r" (b->b)
1544 :"0" (b->a),"1" (b->b));
1549 fpu_modevent(module_t mod, int type, void *unused)
1553 if (pmath_emulate) {
1554 printf("Another Math emulator already present\n");
1557 pmath_emulate = math_emulate;
1559 printf("Math emulator present\n");
1562 if (pmath_emulate != math_emulate) {
1563 printf("Cannot unload another math emulator\n");
1568 printf("Math emulator unloaded\n");
1575 static moduledata_t fpumod = {
1580 DECLARE_MODULE(fpu, fpumod, SI_SUB_DRIVERS, SI_ORDER_ANY);