Initial import from FreeBSD RELENG_4:
[dragonfly.git] / sys / i386 / i386 / math_emulate.c
1 /*
2  * linux/kernel/math/math_emulate.c
3  *
4  * (C) 1991 Linus Torvalds
5  *
6  * [expediant "port" of linux 8087 emulator to 386BSD, with apologies -wfj]
7  *
8  *      from: 386BSD 0.1
9  * $FreeBSD: src/sys/i386/i386/math_emulate.c,v 1.35 1999/08/28 00:43:47 peter Exp $
10  */
11
12 /*
13  * Limited emulation 27.12.91 - mostly loads/stores, which gcc wants
14  * even for soft-float, unless you use bruce evans' patches. The patches
15  * are great, but they have to be re-applied for every version, and the
16  * library is different for soft-float and 80387. So emulation is more
17  * practical, even though it's slower.
18  *
19  * 28.12.91 - loads/stores work, even BCD. I'll have to start thinking
20  * about add/sub/mul/div. Urgel. I should find some good source, but I'll
21  * just fake up something.
22  *
23  * 30.12.91 - add/sub/mul/div/com seem to work mostly. I should really
24  * test every possible combination.
25  */
26
27 /*
28  * This file is full of ugly macros etc: one problem was that gcc simply
29  * didn't want to make the structures as they should be: it has to try to
30  * align them. Sickening code, but at least I've hidden the ugly things
31  * in this one file: the other files don't need to know about these things.
32  *
33  * The other files also don't care about ST(x) etc - they just get addresses
34  * to 80-bit temporary reals, and do with them as they please. I wanted to
35  * hide most of the 387-specific things here.
36  */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40
41 #include <machine/frame.h>
42 #include <machine/reg.h>
43
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46
47 #include <vm/vm.h>
48 #include <sys/lock.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_map.h>
51 #include <sys/user.h>
52
53 #define __ALIGNED_TEMP_REAL 1
54 #include <i386/i386/math_emu.h>
55
56 #define bswapw(x) __asm__("xchgb %%al,%%ah":"=a" (x):"0" ((short)x))
57 #define ST(x) (*__st((x)))
58 #define PST(x) ((const temp_real *) __st((x)))
59 #define math_abort(tfp, signo) tfp->tf_eip = oldeip; return (signo);
60
61 /*
62  * We don't want these inlined - it gets too messy in the machine-code.
63  */
64 static void fpop(void);
65 static void fpush(void);
66 static void fxchg(temp_real_unaligned * a, temp_real_unaligned * b);
67 static temp_real_unaligned * __st(int i);
68
69 static unsigned char
70 get_fs_byte(char *adr) 
71         { return(fubyte(adr)); }
72
73 static unsigned short
74 get_fs_word(unsigned short *adr)
75         { return(fuword(adr)); }
76
77 static u_int32_t
78 get_fs_long(u_int32_t *adr)
79         { return(fuword(adr)); }
80
81 static void 
82 put_fs_byte(unsigned char val, char *adr)
83         { (void)subyte(adr,val); }
84
85 static void 
86 put_fs_word(unsigned short val, short *adr)
87         { (void)susword(adr,val); }
88
89 static void 
90 put_fs_long(u_long val, u_int32_t *adr)
91         { (void)suword(adr,val); }
92
93 static int
94 math_emulate(struct trapframe * info)
95 {
96         unsigned short code;
97         temp_real tmp;
98         char * address;
99         u_int32_t oldeip;
100
101         /* ever used fp? */
102         if ((((struct pcb *)curproc->p_addr)->pcb_flags & FP_SOFTFP) == 0) {
103                 ((struct pcb *)curproc->p_addr)->pcb_flags |= FP_SOFTFP;
104                 I387.cwd = 0x037f;
105                 I387.swd = 0x0000;
106                 I387.twd = 0x0000;
107         }
108
109         if (I387.cwd & I387.swd & 0x3f)
110                 I387.swd |= 0x8000;
111         else
112                 I387.swd &= 0x7fff;
113         oldeip = info->tf_eip;
114 /* 0x001f means user code space */
115         if ((u_short)info->tf_cs != 0x001F) {
116                 printf("math_emulate: %04x:%08lx\n", (u_short)info->tf_cs,
117                         (u_long)oldeip);
118                 panic("?Math emulation needed in kernel?");
119         }
120         /* completely ignore an operand-size prefix */
121         if (get_fs_byte((char *) info->tf_eip) == 0x66)
122                 info->tf_eip++;
123         code = get_fs_word((unsigned short *) info->tf_eip);
124         bswapw(code);
125         code &= 0x7ff;
126         I387.fip = oldeip;
127         *(unsigned short *) &I387.fcs = (u_short) info->tf_cs;
128         *(1+(unsigned short *) &I387.fcs) = code;
129         info->tf_eip += 2;
130         switch (code) {
131                 case 0x1d0: /* fnop */
132                         return(0);
133                 case 0x1d1: case 0x1d2: case 0x1d3:  /* fst to 32-bit mem */
134                 case 0x1d4: case 0x1d5: case 0x1d6: case 0x1d7:
135                         math_abort(info,SIGILL);
136                 case 0x1e0: /* fchs */
137                         ST(0).exponent ^= 0x8000;
138                         return(0);
139                 case 0x1e1: /* fabs */
140                         ST(0).exponent &= 0x7fff;
141                         return(0);
142                 case 0x1e2: case 0x1e3:
143                         math_abort(info,SIGILL);
144                 case 0x1e4: /* ftst */
145                         ftst(PST(0));
146                         return(0);
147                 case 0x1e5: /* fxam */
148                         printf("fxam not implemented\n");
149                         math_abort(info,SIGILL);
150                 case 0x1e6: case 0x1e7: /* fldenv */
151                         math_abort(info,SIGILL);
152                 case 0x1e8: /* fld1 */
153                         fpush();
154                         ST(0) = CONST1;
155                         return(0);
156                 case 0x1e9: /* fld2t */
157                         fpush();
158                         ST(0) = CONSTL2T;
159                         return(0);
160                 case 0x1ea: /* fld2e */
161                         fpush();
162                         ST(0) = CONSTL2E;
163                         return(0);
164                 case 0x1eb: /* fldpi */
165                         fpush();
166                         ST(0) = CONSTPI;
167                         return(0);
168                 case 0x1ec: /* fldlg2 */
169                         fpush();
170                         ST(0) = CONSTLG2;
171                         return(0);
172                 case 0x1ed: /* fldln2 */
173                         fpush();
174                         ST(0) = CONSTLN2;
175                         return(0);
176                 case 0x1ee: /* fldz */
177                         fpush();
178                         ST(0) = CONSTZ;
179                         return(0);
180                 case 0x1ef:
181                         math_abort(info,SIGILL);
182                 case 0x1f0: /* f2xm1 */
183                 case 0x1f1: /* fyl2x */
184                 case 0x1f2: /* fptan */
185                 case 0x1f3: /* fpatan */
186                 case 0x1f4: /* fxtract */
187                 case 0x1f5: /* fprem1 */
188                 case 0x1f6: /* fdecstp */
189                 case 0x1f7: /* fincstp */
190                 case 0x1f8: /* fprem */
191                 case 0x1f9: /* fyl2xp1 */
192                 case 0x1fa: /* fsqrt */
193                 case 0x1fb: /* fsincos */
194                 case 0x1fe: /* fsin */
195                 case 0x1ff: /* fcos */
196                         uprintf(
197                          "math_emulate: instruction %04x not implemented\n",
198                           code + 0xd800);
199                         math_abort(info,SIGILL);
200                 case 0x1fc: /* frndint */
201                         frndint(PST(0),&tmp);
202                         real_to_real(&tmp,&ST(0));
203                         return(0);
204                 case 0x1fd: /* fscale */
205                         /* incomplete and totally inadequate -wfj */
206                         Fscale(PST(0), PST(1), &tmp);
207                         real_to_real(&tmp,&ST(0));
208                         return(0);                      /* 19 Sep 92*/
209                 case 0x2e9: /* ????? */
210 /* if this should be a fucomp ST(0),ST(1) , it must be a 0x3e9  ATS */
211                         fucom(PST(1),PST(0));
212                         fpop(); fpop();
213                         return(0);
214                 case 0x3d0: case 0x3d1: /* fist ?? */
215                         return(0);
216                 case 0x3e2: /* fclex */
217                         I387.swd &= 0x7f00;
218                         return(0);
219                 case 0x3e3: /* fninit */
220                         I387.cwd = 0x037f;
221                         I387.swd = 0x0000;
222                         I387.twd = 0x0000;
223                         return(0);
224                 case 0x3e4:
225                         return(0);
226                 case 0x6d9: /* fcompp */
227                         fcom(PST(1),PST(0));
228                         fpop(); fpop();
229                         return(0);
230                 case 0x7e0: /* fstsw ax */
231                         *(short *) &info->tf_eax = I387.swd;
232                         return(0);
233         }
234         switch (code >> 3) {
235                 case 0x18: /* fadd */
236                         fadd(PST(0),PST(code & 7),&tmp);
237                         real_to_real(&tmp,&ST(0));
238                         return(0);
239                 case 0x19: /* fmul */
240                         fmul(PST(0),PST(code & 7),&tmp);
241                         real_to_real(&tmp,&ST(0));
242                         return(0);
243                 case 0x1a: /* fcom */
244                         fcom(PST(code & 7),PST(0));
245                         return(0);
246                 case 0x1b: /* fcomp */
247                         fcom(PST(code & 7),PST(0));
248                         fpop();
249                         return(0);
250                 case 0x1c: /* fsubr */
251                         real_to_real(&ST(code & 7),&tmp);
252                         tmp.exponent ^= 0x8000;
253                         fadd(PST(0),&tmp,&tmp);
254                         real_to_real(&tmp,&ST(0));
255                         return(0);
256                 case 0x1d: /* fsub */
257                         ST(0).exponent ^= 0x8000;
258                         fadd(PST(0),PST(code & 7),&tmp);
259                         real_to_real(&tmp,&ST(0));
260                         return(0);
261                 case 0x1e: /* fdivr */
262                         fdiv(PST(0),PST(code & 7),&tmp);
263                         real_to_real(&tmp,&ST(0));
264                         return(0);
265                 case 0x1f: /* fdiv */
266                         fdiv(PST(code & 7),PST(0),&tmp);
267                         real_to_real(&tmp,&ST(0));
268                         return(0);
269                 case 0x38: /* fld */
270                         fpush();
271                         ST(0) = ST((code & 7)+1);  /* why plus 1 ????? ATS */
272                         return(0);
273                 case 0x39: /* fxch */
274                         fxchg(&ST(0),&ST(code & 7));
275                         return(0);
276                 case 0x3b: /*  ??? ??? wrong ???? ATS */
277                         ST(code & 7) = ST(0);
278                         fpop();
279                         return(0);
280                 case 0x98: /* fadd */
281                         fadd(PST(0),PST(code & 7),&tmp);
282                         real_to_real(&tmp,&ST(code & 7));
283                         return(0);
284                 case 0x99: /* fmul */
285                         fmul(PST(0),PST(code & 7),&tmp);
286                         real_to_real(&tmp,&ST(code & 7));
287                         return(0);
288                 case 0x9a: /* ???? , my manual don't list a direction bit
289 for fcom , ??? ATS */
290                         fcom(PST(code & 7),PST(0));
291                         return(0);
292                 case 0x9b: /* same as above , ATS */
293                         fcom(PST(code & 7),PST(0));
294                         fpop();
295                         return(0);
296                 case 0x9c: /* fsubr */
297                         ST(code & 7).exponent ^= 0x8000;
298                         fadd(PST(0),PST(code & 7),&tmp);
299                         real_to_real(&tmp,&ST(code & 7));
300                         return(0);
301                 case 0x9d: /* fsub */
302                         real_to_real(&ST(0),&tmp);
303                         tmp.exponent ^= 0x8000;
304                         fadd(PST(code & 7),&tmp,&tmp);
305                         real_to_real(&tmp,&ST(code & 7));
306                         return(0);
307                 case 0x9e: /* fdivr */
308                         fdiv(PST(0),PST(code & 7),&tmp);
309                         real_to_real(&tmp,&ST(code & 7));
310                         return(0);
311                 case 0x9f: /* fdiv */
312                         fdiv(PST(code & 7),PST(0),&tmp);
313                         real_to_real(&tmp,&ST(code & 7));
314                         return(0);
315                 case 0xb8: /* ffree */
316                         printf("ffree not implemented\n");
317                         math_abort(info,SIGILL);
318                 case 0xb9: /* fstp ???? where is the pop ? ATS */
319                         fxchg(&ST(0),&ST(code & 7));
320                         return(0);
321                 case 0xba: /* fst */
322                         ST(code & 7) = ST(0);
323                         return(0);
324                 case 0xbb: /* ????? encoding of fstp to mem ? ATS */
325                         ST(code & 7) = ST(0);
326                         fpop();
327                         return(0);
328                 case 0xbc: /* fucom */
329                         fucom(PST(code & 7),PST(0));
330                         return(0);
331                 case 0xbd: /* fucomp */
332                         fucom(PST(code & 7),PST(0));
333                         fpop();
334                         return(0);
335                 case 0xd8: /* faddp */
336                         fadd(PST(code & 7),PST(0),&tmp);
337                         real_to_real(&tmp,&ST(code & 7));
338                         fpop();
339                         return(0);
340                 case 0xd9: /* fmulp */
341                         fmul(PST(code & 7),PST(0),&tmp);
342                         real_to_real(&tmp,&ST(code & 7));
343                         fpop();
344                         return(0);
345                 case 0xda: /* ??? encoding of ficom with 16 bit mem ? ATS */
346                         fcom(PST(code & 7),PST(0));
347                         fpop();
348                         return(0);
349                 case 0xdc: /* fsubrp */
350                         ST(code & 7).exponent ^= 0x8000;
351                         fadd(PST(0),PST(code & 7),&tmp);
352                         real_to_real(&tmp,&ST(code & 7));
353                         fpop();
354                         return(0);
355                 case 0xdd: /* fsubp */
356                         real_to_real(&ST(0),&tmp);
357                         tmp.exponent ^= 0x8000;
358                         fadd(PST(code & 7),&tmp,&tmp);
359                         real_to_real(&tmp,&ST(code & 7));
360                         fpop();
361                         return(0);
362                 case 0xde: /* fdivrp */
363                         fdiv(PST(0),PST(code & 7),&tmp);
364                         real_to_real(&tmp,&ST(code & 7));
365                         fpop();
366                         return(0);
367                 case 0xdf: /* fdivp */
368                         fdiv(PST(code & 7),PST(0),&tmp);
369                         real_to_real(&tmp,&ST(code & 7));
370                         fpop();
371                         return(0);
372                 case 0xf8: /* fild 16-bit mem ???? ATS */
373                         printf("ffree not implemented\n");
374                         math_abort(info,SIGILL);
375                         fpop();
376                         return(0);
377                 case 0xf9: /*  ????? ATS */
378                         fxchg(&ST(0),&ST(code & 7));
379                         return(0);
380                 case 0xfa: /* fist 16-bit mem ? ATS */
381                 case 0xfb: /* fistp 16-bit mem ? ATS */
382                         ST(code & 7) = ST(0);
383                         fpop();
384                         return(0);
385         }
386         switch ((code>>3) & 0xe7) {
387                 case 0x22:
388                         put_short_real(PST(0),info,code);
389                         return(0);
390                 case 0x23:
391                         put_short_real(PST(0),info,code);
392                         fpop();
393                         return(0);
394                 case 0x24:
395                         address = ea(info,code);
396                         for (code = 0 ; code < 7 ; code++) {
397                                 ((int32_t *) & I387)[code] =
398                                    get_fs_long((u_int32_t *) address);
399                                 address += 4;
400                         }
401                         return(0);
402                 case 0x25:
403                         address = ea(info,code);
404                         *(unsigned short *) &I387.cwd =
405                                 get_fs_word((unsigned short *) address);
406                         return(0);
407                 case 0x26:
408                         address = ea(info,code);
409                         /*verify_area(address,28);*/
410                         for (code = 0 ; code < 7 ; code++) {
411                                 put_fs_long( ((int32_t *) & I387)[code],
412                                         (u_int32_t *) address);
413                                 address += 4;
414                         }
415                         return(0);
416                 case 0x27:
417                         address = ea(info,code);
418                         /*verify_area(address,2);*/
419                         put_fs_word(I387.cwd,(short *) address);
420                         return(0);
421                 case 0x62:
422                         put_long_int(PST(0),info,code);
423                         return(0);
424                 case 0x63:
425                         put_long_int(PST(0),info,code);
426                         fpop();
427                         return(0);
428                 case 0x65:
429                         fpush();
430                         get_temp_real(&tmp,info,code);
431                         real_to_real(&tmp,&ST(0));
432                         return(0);
433                 case 0x67:
434                         put_temp_real(PST(0),info,code);
435                         fpop();
436                         return(0);
437                 case 0xa2:
438                         put_long_real(PST(0),info,code);
439                         return(0);
440                 case 0xa3:
441                         put_long_real(PST(0),info,code);
442                         fpop();
443                         return(0);
444                 case 0xa4:
445                         address = ea(info,code);
446                         for (code = 0 ; code < 27 ; code++) {
447                                 ((int32_t *) & I387)[code] =
448                                    get_fs_long((u_int32_t *) address);
449                                 address += 4;
450                         }
451                         return(0);
452                 case 0xa6:
453                         address = ea(info,code);
454                         /*verify_area(address,108);*/
455                         for (code = 0 ; code < 27 ; code++) {
456                                 put_fs_long( ((int32_t *) & I387)[code],
457                                         (u_int32_t *) address);
458                                 address += 4;
459                         }
460                         I387.cwd = 0x037f;
461                         I387.swd = 0x0000;
462                         I387.twd = 0x0000;
463                         return(0);
464                 case 0xa7:
465                         address = ea(info,code);
466                         /*verify_area(address,2);*/
467                         put_fs_word(I387.swd,(short *) address);
468                         return(0);
469                 case 0xe2:
470                         put_short_int(PST(0),info,code);
471                         return(0);
472                 case 0xe3:
473                         put_short_int(PST(0),info,code);
474                         fpop();
475                         return(0);
476                 case 0xe4:
477                         fpush();
478                         get_BCD(&tmp,info,code);
479                         real_to_real(&tmp,&ST(0));
480                         return(0);
481                 case 0xe5:
482                         fpush();
483                         get_longlong_int(&tmp,info,code);
484                         real_to_real(&tmp,&ST(0));
485                         return(0);
486                 case 0xe6:
487                         put_BCD(PST(0),info,code);
488                         fpop();
489                         return(0);
490                 case 0xe7:
491                         put_longlong_int(PST(0),info,code);
492                         fpop();
493                         return(0);
494         }
495         switch (code >> 9) {
496                 case 0:
497                         get_short_real(&tmp,info,code);
498                         break;
499                 case 1:
500                         get_long_int(&tmp,info,code);
501                         break;
502                 case 2:
503                         get_long_real(&tmp,info,code);
504                         break;
505                 case 4:
506                         get_short_int(&tmp,info,code);
507         }
508         switch ((code>>3) & 0x27) {
509                 case 0:
510                         fadd(&tmp,PST(0),&tmp);
511                         real_to_real(&tmp,&ST(0));
512                         return(0);
513                 case 1:
514                         fmul(&tmp,PST(0),&tmp);
515                         real_to_real(&tmp,&ST(0));
516                         return(0);
517                 case 2:
518                         fcom(&tmp,PST(0));
519                         return(0);
520                 case 3:
521                         fcom(&tmp,PST(0));
522                         fpop();
523                         return(0);
524                 case 4:
525                         tmp.exponent ^= 0x8000;
526                         fadd(&tmp,PST(0),&tmp);
527                         real_to_real(&tmp,&ST(0));
528                         return(0);
529                 case 5:
530                         ST(0).exponent ^= 0x8000;
531                         fadd(&tmp,PST(0),&tmp);
532                         real_to_real(&tmp,&ST(0));
533                         return(0);
534                 case 6:
535                         fdiv(PST(0),&tmp,&tmp);
536                         real_to_real(&tmp,&ST(0));
537                         return(0);
538                 case 7:
539                         fdiv(&tmp,PST(0),&tmp);
540                         real_to_real(&tmp,&ST(0));
541                         return(0);
542         }
543         if ((code & 0x138) == 0x100) {
544                         fpush();
545                         real_to_real(&tmp,&ST(0));
546                         return(0);
547         }
548         printf("Unknown math-insns: %04x:%08x %04x\n",(u_short)info->tf_cs,
549                 info->tf_eip,code);
550         math_abort(info,SIGFPE);
551 }
552
553 static void
554 fpop(void)
555 {
556         u_int32_t tmp;
557
558         tmp = I387.swd & 0xffffc7ffUL;
559         I387.swd += 0x00000800;
560         I387.swd &= 0x00003800;
561         I387.swd |= tmp;
562 }
563
564 static void
565 fpush(void)
566 {
567         u_int32_t tmp;
568
569         tmp = I387.swd & 0xffffc7ffUL;
570         I387.swd += 0x00003800;
571         I387.swd &= 0x00003800;
572         I387.swd |= tmp;
573 }
574
575 static void 
576 fxchg(temp_real_unaligned * a, temp_real_unaligned * b)
577 {
578         temp_real_unaligned c;
579
580         c = *a;
581         *a = *b;
582         *b = c;
583 }
584
585 static temp_real_unaligned *
586 __st(int i)
587 {
588         i += I387.swd >> 11;
589         i &= 7;
590         return (temp_real_unaligned *) (i*10 + (char *)(I387.st_space));
591 }
592
593 /*
594  * linux/kernel/math/ea.c
595  *
596  * (C) 1991 Linus Torvalds
597  */
598
599 /*
600  * Calculate the effective address.
601  */
602
603
604 static int __regoffset[] = {
605         tEAX, tECX, tEDX, tEBX, tESP, tEBP, tESI, tEDI
606 };
607
608 #define REG(x) (((int *)curproc->p_md.md_regs)[__regoffset[(x)]])
609
610 static char *
611 sib(struct trapframe * info, int mod)
612 {
613         unsigned char ss,index,base;
614         int32_t offset = 0;
615
616         base = get_fs_byte((char *) info->tf_eip);
617         info->tf_eip++;
618         ss = base >> 6;
619         index = (base >> 3) & 7;
620         base &= 7;
621         if (index == 4)
622                 offset = 0;
623         else
624                 offset = REG(index);
625         offset <<= ss;
626         if (mod || base != 5)
627                 offset += REG(base);
628         if (mod == 1) {
629                 offset += (signed char) get_fs_byte((char *) info->tf_eip);
630                 info->tf_eip++;
631         } else if (mod == 2 || base == 5) {
632                 offset += (signed) get_fs_long((u_int32_t *) info->tf_eip);
633                 info->tf_eip += 4;
634         }
635         I387.foo = offset;
636         I387.fos = 0x17;
637         return (char *) offset;
638 }
639
640 static char *
641 ea(struct trapframe * info, unsigned short code)
642 {
643         unsigned char mod,rm;
644         int32_t * tmp;
645         int offset = 0;
646
647         mod = (code >> 6) & 3;
648         rm = code & 7;
649         if (rm == 4 && mod != 3)
650                 return sib(info,mod);
651         if (rm == 5 && !mod) {
652                 offset = get_fs_long((u_int32_t *) info->tf_eip);
653                 info->tf_eip += 4;
654                 I387.foo = offset;
655                 I387.fos = 0x17;
656                 return (char *) offset;
657         }
658         tmp = (int32_t *) &REG(rm);
659         switch (mod) {
660                 case 0: offset = 0; break;
661                 case 1:
662                         offset = (signed char) get_fs_byte((char *) info->tf_eip);
663                         info->tf_eip++;
664                         break;
665                 case 2:
666                         offset = (signed) get_fs_long((u_int32_t *) info->tf_eip);
667                         info->tf_eip += 4;
668                         break;
669 #ifdef notyet
670                 case 3:
671                         math_abort(info,1<<(SIGILL-1));
672 #endif
673         }
674         I387.foo = offset;
675         I387.fos = 0x17;
676         return offset + (char *) *tmp;
677 }
678 /*
679  * linux/kernel/math/get_put.c
680  *
681  * (C) 1991 Linus Torvalds
682  */
683
684 /*
685  * This file handles all accesses to user memory: getting and putting
686  * ints/reals/BCD etc. This is the only part that concerns itself with
687  * other than temporary real format. All other cals are strictly temp_real.
688  */
689
690 static void 
691 get_short_real(temp_real * tmp, struct trapframe * info, unsigned short code)
692 {
693         char * addr;
694         short_real sr;
695
696         addr = ea(info,code);
697         sr = get_fs_long((u_int32_t *) addr);
698         short_to_temp(&sr,tmp);
699 }
700
701 static void
702 get_long_real(temp_real * tmp, struct trapframe * info, unsigned short code)
703 {
704         char * addr;
705         long_real lr;
706
707         addr = ea(info,code);
708         lr.a = get_fs_long((u_int32_t *) addr);
709         lr.b = get_fs_long(1 + (u_int32_t *) addr);
710         long_to_temp(&lr,tmp);
711 }
712
713 static void
714 get_temp_real(temp_real * tmp, struct trapframe * info, unsigned short code)
715 {
716         char * addr;
717
718         addr = ea(info,code);
719         tmp->a = get_fs_long((u_int32_t *) addr);
720         tmp->b = get_fs_long(1 + (u_int32_t *) addr);
721         tmp->exponent = get_fs_word(4 + (unsigned short *) addr);
722 }
723
724 static void
725 get_short_int(temp_real * tmp, struct trapframe * info, unsigned short code)
726 {
727         char * addr;
728         temp_int ti;
729
730         addr = ea(info,code);
731         ti.a = (signed short) get_fs_word((unsigned short *) addr);
732         ti.b = 0;
733         if ((ti.sign = (ti.a < 0)) != 0)
734                 ti.a = - ti.a;
735         int_to_real(&ti,tmp);
736 }
737
738 static void
739 get_long_int(temp_real * tmp, struct trapframe * info, unsigned short code)
740 {
741         char * addr;
742         temp_int ti;
743
744         addr = ea(info,code);
745         ti.a = get_fs_long((u_int32_t *) addr);
746         ti.b = 0;
747         if ((ti.sign = (ti.a < 0)) != 0)
748                 ti.a = - ti.a;
749         int_to_real(&ti,tmp);
750 }
751
752 static void 
753 get_longlong_int(temp_real * tmp, struct trapframe * info, unsigned short code)
754 {
755         char * addr;
756         temp_int ti;
757
758         addr = ea(info,code);
759         ti.a = get_fs_long((u_int32_t *) addr);
760         ti.b = get_fs_long(1 + (u_int32_t *) addr);
761         if ((ti.sign = (ti.b < 0)) != 0)
762                 __asm__("notl %0 ; notl %1\n\t"
763                         "addl $1,%0 ; adcl $0,%1"
764                         :"=r" (ti.a),"=r" (ti.b)
765                         :"0" (ti.a),"1" (ti.b));
766         int_to_real(&ti,tmp);
767 }
768
769 #define MUL10(low,high) \
770 __asm__("addl %0,%0 ; adcl %1,%1\n\t" \
771 "movl %0,%%ecx ; movl %1,%%ebx\n\t" \
772 "addl %0,%0 ; adcl %1,%1\n\t" \
773 "addl %0,%0 ; adcl %1,%1\n\t" \
774 "addl %%ecx,%0 ; adcl %%ebx,%1" \
775 :"=a" (low),"=d" (high) \
776 :"0" (low),"1" (high):"cx","bx")
777
778 #define ADD64(val,low,high) \
779 __asm__("addl %4,%0 ; adcl $0,%1":"=r" (low),"=r" (high) \
780 :"0" (low),"1" (high),"r" ((u_int32_t) (val)))
781
782 static void
783 get_BCD(temp_real * tmp, struct trapframe * info, unsigned short code)
784 {
785         int k;
786         char * addr;
787         temp_int i;
788         unsigned char c;
789
790         addr = ea(info,code);
791         addr += 9;
792         i.sign = 0x80 & get_fs_byte(addr--);
793         i.a = i.b = 0;
794         for (k = 0; k < 9; k++) {
795                 c = get_fs_byte(addr--);
796                 MUL10(i.a, i.b);
797                 ADD64((c>>4), i.a, i.b);
798                 MUL10(i.a, i.b);
799                 ADD64((c&0xf), i.a, i.b);
800         }
801         int_to_real(&i,tmp);
802 }
803
804 static void 
805 put_short_real(const temp_real * tmp,
806         struct trapframe * info, unsigned short code)
807 {
808         char * addr;
809         short_real sr;
810
811         addr = ea(info,code);
812         /*verify_area(addr,4);*/
813         temp_to_short(tmp,&sr);
814         put_fs_long(sr,(u_int32_t *) addr);
815 }
816
817 static void
818 put_long_real(const temp_real * tmp,
819         struct trapframe * info, unsigned short code)
820 {
821         char * addr;
822         long_real lr;
823
824         addr = ea(info,code);
825         /*verify_area(addr,8);*/
826         temp_to_long(tmp,&lr);
827         put_fs_long(lr.a, (u_int32_t *) addr);
828         put_fs_long(lr.b, 1 + (u_int32_t *) addr);
829 }
830
831 static void
832 put_temp_real(const temp_real * tmp,
833         struct trapframe * info, unsigned short code)
834 {
835         char * addr;
836
837         addr = ea(info,code);
838         /*verify_area(addr,10);*/
839         put_fs_long(tmp->a, (u_int32_t *) addr);
840         put_fs_long(tmp->b, 1 + (u_int32_t *) addr);
841         put_fs_word(tmp->exponent, 4 + (short *) addr);
842 }
843
844 static void
845 put_short_int(const temp_real * tmp,
846         struct trapframe * info, unsigned short code)
847 {
848         char * addr;
849         temp_int ti;
850
851         addr = ea(info,code);
852         real_to_int(tmp,&ti);
853         /*verify_area(addr,2);*/
854         if (ti.sign)
855                 ti.a = -ti.a;
856         put_fs_word(ti.a,(short *) addr);
857 }
858
859 static void
860 put_long_int(const temp_real * tmp,
861         struct trapframe * info, unsigned short code)
862 {
863         char * addr;
864         temp_int ti;
865
866         addr = ea(info,code);
867         real_to_int(tmp,&ti);
868         /*verify_area(addr,4);*/
869         if (ti.sign)
870                 ti.a = -ti.a;
871         put_fs_long(ti.a,(u_int32_t *) addr);
872 }
873
874 static void
875 put_longlong_int(const temp_real * tmp,
876         struct trapframe * info, unsigned short code)
877 {
878         char * addr;
879         temp_int ti;
880
881         addr = ea(info,code);
882         real_to_int(tmp,&ti);
883         /*verify_area(addr,8);*/
884         if (ti.sign)
885                 __asm__("notl %0 ; notl %1\n\t"
886                         "addl $1,%0 ; adcl $0,%1"
887                         :"=r" (ti.a),"=r" (ti.b)
888                         :"0" (ti.a),"1" (ti.b));
889         put_fs_long(ti.a,(u_int32_t *) addr);
890         put_fs_long(ti.b,1 + (u_int32_t *) addr);
891 }
892
893 #define DIV10(low,high,rem) \
894 __asm__("divl %6 ; xchgl %1,%2 ; divl %6" \
895         :"=d" (rem),"=a" (low),"=r" (high) \
896         :"0" (0),"1" (high),"2" (low),"c" (10))
897
898 static void
899 put_BCD(const temp_real * tmp,struct trapframe * info, unsigned short code)
900 {
901         int k,rem;
902         char * addr;
903         temp_int i;
904         unsigned char c;
905
906         addr = ea(info,code);
907         /*verify_area(addr,10);*/
908         real_to_int(tmp,&i);
909         if (i.sign)
910                 put_fs_byte(0x80, addr+9);
911         else
912                 put_fs_byte(0, addr+9);
913         for (k = 0; k < 9; k++) {
914                 DIV10(i.a,i.b,rem);
915                 c = rem;
916                 DIV10(i.a,i.b,rem);
917                 c += rem<<4;
918                 put_fs_byte(c,addr++);
919         }
920 }
921
922 /*
923  * linux/kernel/math/mul.c
924  *
925  * (C) 1991 Linus Torvalds
926  */
927
928 /*
929  * temporary real multiplication routine.
930  */
931
932
933 static void
934 shift(int * c)
935 {
936         __asm__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
937                 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
938                 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
939                 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
940                 ::"r" (c):"ax");
941 }
942
943 static void
944 mul64(const temp_real * a, const temp_real * b, int * c)
945 {
946         __asm__("movl (%0),%%eax\n\t"
947                 "mull (%1)\n\t"
948                 "movl %%eax,(%2)\n\t"
949                 "movl %%edx,4(%2)\n\t"
950                 "movl 4(%0),%%eax\n\t"
951                 "mull 4(%1)\n\t"
952                 "movl %%eax,8(%2)\n\t"
953                 "movl %%edx,12(%2)\n\t"
954                 "movl (%0),%%eax\n\t"
955                 "mull 4(%1)\n\t"
956                 "addl %%eax,4(%2)\n\t"
957                 "adcl %%edx,8(%2)\n\t"
958                 "adcl $0,12(%2)\n\t"
959                 "movl 4(%0),%%eax\n\t"
960                 "mull (%1)\n\t"
961                 "addl %%eax,4(%2)\n\t"
962                 "adcl %%edx,8(%2)\n\t"
963                 "adcl $0,12(%2)"
964                 ::"S" (a),"c" (b),"D" (c)
965                 :"ax","dx");
966 }
967
968 static void
969 fmul(const temp_real * src1, const temp_real * src2, temp_real * result)
970 {
971         int i,sign;
972         int tmp[4] = {0,0,0,0};
973
974         sign = (src1->exponent ^ src2->exponent) & 0x8000;
975         i = (src1->exponent & 0x7fff) + (src2->exponent & 0x7fff) - 16383 + 1;
976         if (i<0) {
977                 result->exponent = sign;
978                 result->a = result->b = 0;
979                 return;
980         }
981         if (i>0x7fff) {
982                 set_OE();
983                 return;
984         }
985         mul64(src1,src2,tmp);
986         if (tmp[0] || tmp[1] || tmp[2] || tmp[3])
987                 while (i && tmp[3] >= 0) {
988                         i--;
989                         shift(tmp);
990                 }
991         else
992                 i = 0;
993         result->exponent = i | sign;
994         result->a = tmp[2];
995         result->b = tmp[3];
996 }
997
998 /*
999  * linux/kernel/math/div.c
1000  *
1001  * (C) 1991 Linus Torvalds
1002  */
1003
1004 /*
1005  * temporary real division routine.
1006  */
1007
1008 static void 
1009 shift_left(int * c)
1010 {
1011         __asm__ __volatile__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
1012                 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
1013                 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
1014                 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
1015                 ::"r" (c):"ax");
1016 }
1017
1018 static void
1019 shift_right(int * c)
1020 {
1021         __asm__("shrl $1,12(%0) ; rcrl $1,8(%0) ; rcrl $1,4(%0) ; rcrl $1,(%0)"
1022                 ::"r" (c));
1023 }
1024
1025 static int
1026 try_sub(int * a, int * b)
1027 {
1028         char ok;
1029
1030         __asm__ __volatile__("movl (%1),%%eax ; subl %%eax,(%2)\n\t"
1031                 "movl 4(%1),%%eax ; sbbl %%eax,4(%2)\n\t"
1032                 "movl 8(%1),%%eax ; sbbl %%eax,8(%2)\n\t"
1033                 "movl 12(%1),%%eax ; sbbl %%eax,12(%2)\n\t"
1034                 "setae %%al":"=a" (ok):"c" (a),"d" (b));
1035         return ok;
1036 }
1037
1038 static void
1039 div64(int * a, int * b, int * c)
1040 {
1041         int tmp[4];
1042         int i;
1043         unsigned int mask = 0;
1044
1045         c += 4;
1046         for (i = 0 ; i<64 ; i++) {
1047                 if (!(mask >>= 1)) {
1048                         c--;
1049                         mask = 0x80000000UL;
1050                 }
1051                 tmp[0] = a[0]; tmp[1] = a[1];
1052                 tmp[2] = a[2]; tmp[3] = a[3];
1053                 if (try_sub(b,tmp)) {
1054                         *c |= mask;
1055                         a[0] = tmp[0]; a[1] = tmp[1];
1056                         a[2] = tmp[2]; a[3] = tmp[3];
1057                 }
1058                 shift_right(b);
1059         }
1060 }
1061
1062 static void
1063 fdiv(const temp_real * src1, const temp_real * src2, temp_real * result)
1064 {
1065         int i,sign;
1066         int a[4],b[4],tmp[4] = {0,0,0,0};
1067
1068         sign = (src1->exponent ^ src2->exponent) & 0x8000;
1069         if (!(src2->a || src2->b)) {
1070                 set_ZE();
1071                 return;
1072         }
1073         i = (src1->exponent & 0x7fff) - (src2->exponent & 0x7fff) + 16383;
1074         if (i<0) {
1075                 set_UE();
1076                 result->exponent = sign;
1077                 result->a = result->b = 0;
1078                 return;
1079         }
1080         a[0] = a[1] = 0;
1081         a[2] = src1->a;
1082         a[3] = src1->b;
1083         b[0] = b[1] = 0;
1084         b[2] = src2->a;
1085         b[3] = src2->b;
1086         while (b[3] >= 0) {
1087                 i++;
1088                 shift_left(b);
1089         }
1090         div64(a,b,tmp);
1091         if (tmp[0] || tmp[1] || tmp[2] || tmp[3]) {
1092                 while (i && tmp[3] >= 0) {
1093                         i--;
1094                         shift_left(tmp);
1095                 }
1096                 if (tmp[3] >= 0)
1097                         set_DE();
1098         } else
1099                 i = 0;
1100         if (i>0x7fff) {
1101                 set_OE();
1102                 return;
1103         }
1104         if (tmp[0] || tmp[1])
1105                 set_PE();
1106         result->exponent = i | sign;
1107         result->a = tmp[2];
1108         result->b = tmp[3];
1109 }
1110
1111 /*
1112  * linux/kernel/math/add.c
1113  *
1114  * (C) 1991 Linus Torvalds
1115  */
1116
1117 /*
1118  * temporary real addition routine.
1119  *
1120  * NOTE! These aren't exact: they are only 62 bits wide, and don't do
1121  * correct rounding. Fast hack. The reason is that we shift right the
1122  * values by two, in order not to have overflow (1 bit), and to be able
1123  * to move the sign into the mantissa (1 bit). Much simpler algorithms,
1124  * and 62 bits (61 really - no rounding) accuracy is usually enough. The
1125  * only time you should notice anything weird is when adding 64-bit
1126  * integers together. When using doubles (52 bits accuracy), the
1127  * 61-bit accuracy never shows at all.
1128  */
1129
1130 #define NEGINT(a) \
1131 __asm__("notl %0 ; notl %1 ; addl $1,%0 ; adcl $0,%1" \
1132         :"=r" (a->a),"=r" (a->b) \
1133         :"0" (a->a),"1" (a->b))
1134
1135 static void signify(temp_real * a)
1136 {
1137         a->exponent += 2;
1138         __asm__("shrdl $2,%1,%0 ; shrl $2,%1"
1139                 :"=r" (a->a),"=r" (a->b)
1140                 :"0" (a->a),"1" (a->b));
1141         if (a->exponent < 0)
1142                 NEGINT(a);
1143         a->exponent &= 0x7fff;
1144 }
1145
1146 static void unsignify(temp_real * a)
1147 {
1148         if (!(a->a || a->b)) {
1149                 a->exponent = 0;
1150                 return;
1151         }
1152         a->exponent &= 0x7fff;
1153         if (a->b < 0) {
1154                 NEGINT(a);
1155                 a->exponent |= 0x8000;
1156         }
1157         while (a->b >= 0) {
1158                 a->exponent--;
1159                 __asm__("addl %0,%0 ; adcl %1,%1"
1160                         :"=r" (a->a),"=r" (a->b)
1161                         :"0" (a->a),"1" (a->b));
1162         }
1163 }
1164
1165 static void
1166 fadd(const temp_real * src1, const temp_real * src2, temp_real * result)
1167 {
1168         temp_real a,b;
1169         int x1,x2,shift;
1170
1171         x1 = src1->exponent & 0x7fff;
1172         x2 = src2->exponent & 0x7fff;
1173         if (x1 > x2) {
1174                 a = *src1;
1175                 b = *src2;
1176                 shift = x1-x2;
1177         } else {
1178                 a = *src2;
1179                 b = *src1;
1180                 shift = x2-x1;
1181         }
1182         if (shift >= 64) {
1183                 *result = a;
1184                 return;
1185         }
1186         if (shift >= 32) {
1187                 b.a = b.b;
1188                 b.b = 0;
1189                 shift -= 32;
1190         }
1191         __asm__("shrdl %4,%1,%0 ; shrl %4,%1"
1192                 :"=r" (b.a),"=r" (b.b)
1193                 :"0" (b.a),"1" (b.b),"c" ((char) shift));
1194         signify(&a);
1195         signify(&b);
1196         __asm__("addl %4,%0 ; adcl %5,%1"
1197                 :"=r" (a.a),"=r" (a.b)
1198                 :"0" (a.a),"1" (a.b),"g" (b.a),"g" (b.b));
1199         unsignify(&a);
1200         *result = a;
1201 }
1202
1203 /*
1204  * linux/kernel/math/compare.c
1205  *
1206  * (C) 1991 Linus Torvalds
1207  */
1208
1209 /*
1210  * temporary real comparison routines
1211  */
1212
1213
1214 #define clear_Cx() (I387.swd &= ~0x4500)
1215
1216 static void 
1217 normalize(temp_real * a)
1218 {
1219         int i = a->exponent & 0x7fff;
1220         int sign = a->exponent & 0x8000;
1221
1222         if (!(a->a || a->b)) {
1223                 a->exponent = 0;
1224                 return;
1225         }
1226         while (i && a->b >= 0) {
1227                 i--;
1228                 __asm__("addl %0,%0 ; adcl %1,%1"
1229                         :"=r" (a->a),"=r" (a->b)
1230                         :"0" (a->a),"1" (a->b));
1231         }
1232         a->exponent = i | sign;
1233 }
1234
1235 static void
1236 ftst(const temp_real * a)
1237 {
1238         temp_real b;
1239
1240         clear_Cx();
1241         b = *a;
1242         normalize(&b);
1243         if (b.a || b.b || b.exponent) {
1244                 if (b.exponent < 0)
1245                         set_C0();
1246         } else
1247                 set_C3();
1248 }
1249
1250 static void
1251 fcom(const temp_real * src1, const temp_real * src2)
1252 {
1253         temp_real a;
1254
1255         a = *src1;
1256         a.exponent ^= 0x8000;
1257         fadd(&a,src2,&a);
1258         ftst(&a);
1259 }
1260
1261 static void
1262 fucom(const temp_real * src1, const temp_real * src2)
1263 {
1264         fcom(src1,src2);
1265 }
1266
1267 /*
1268  * linux/kernel/math/convert.c
1269  *
1270  * (C) 1991 Linus Torvalds
1271  */
1272
1273
1274 /*
1275  * NOTE!!! There is some "non-obvious" optimisations in the temp_to_long
1276  * and temp_to_short conversion routines: don't touch them if you don't
1277  * know what's going on. They are the adding of one in the rounding: the
1278  * overflow bit is also used for adding one into the exponent. Thus it
1279  * looks like the overflow would be incorrectly handled, but due to the
1280  * way the IEEE numbers work, things are correct.
1281  *
1282  * There is no checking for total overflow in the conversions, though (ie
1283  * if the temp-real number simply won't fit in a short- or long-real.)
1284  */
1285
1286 static void
1287 short_to_temp(const short_real * a, temp_real * b)
1288 {
1289         if (!(*a & 0x7fffffff)) {
1290                 b->a = b->b = 0;
1291                 if (*a)
1292                         b->exponent = 0x8000;
1293                 else
1294                         b->exponent = 0;
1295                 return;
1296         }
1297         b->exponent = ((*a>>23) & 0xff)-127+16383;
1298         if (*a<0)
1299                 b->exponent |= 0x8000;
1300         b->b = (*a<<8) | 0x80000000UL;
1301         b->a = 0;
1302 }
1303
1304 static void
1305 long_to_temp(const long_real * a, temp_real * b)
1306 {
1307         if (!a->a && !(a->b & 0x7fffffff)) {
1308                 b->a = b->b = 0;
1309                 if (a->b)
1310                         b->exponent = 0x8000;
1311                 else
1312                         b->exponent = 0;
1313                 return;
1314         }
1315         b->exponent = ((a->b >> 20) & 0x7ff)-1023+16383;
1316         if (a->b<0)
1317                 b->exponent |= 0x8000;
1318         b->b = 0x80000000UL | (a->b<<11) | (((u_int32_t)a->a)>>21);
1319         b->a = a->a<<11;
1320 }
1321
1322 static void 
1323 temp_to_short(const temp_real * a, short_real * b)
1324 {
1325         if (!(a->exponent & 0x7fff)) {
1326                 *b = (a->exponent)?0x80000000UL:0;
1327                 return;
1328         }
1329         *b = ((((int32_t) a->exponent)-16383+127) << 23) & 0x7f800000;
1330         if (a->exponent < 0)
1331                 *b |= 0x80000000UL;
1332         *b |= (a->b >> 8) & 0x007fffff;
1333         switch ((int)ROUNDING) {
1334                 case ROUND_NEAREST:
1335                         if ((a->b & 0xff) > 0x80)
1336                                 ++*b;
1337                         break;
1338                 case ROUND_DOWN:
1339                         if ((a->exponent & 0x8000) && (a->b & 0xff))
1340                                 ++*b;
1341                         break;
1342                 case ROUND_UP:
1343                         if (!(a->exponent & 0x8000) && (a->b & 0xff))
1344                                 ++*b;
1345                         break;
1346         }
1347 }
1348
1349 static void
1350 temp_to_long(const temp_real * a, long_real * b)
1351 {
1352         if (!(a->exponent & 0x7fff)) {
1353                 b->a = 0;
1354                 b->b = (a->exponent)?0x80000000UL:0;
1355                 return;
1356         }
1357         b->b = (((0x7fff & (int32_t) a->exponent)-16383+1023) << 20) &
1358             0x7ff00000;
1359         if (a->exponent < 0)
1360                 b->b |= 0x80000000UL;
1361         b->b |= (a->b >> 11) & 0x000fffff;
1362         b->a = a->b << 21;
1363         b->a |= (a->a >> 11) & 0x001fffff;
1364         switch ((int)ROUNDING) {
1365                 case ROUND_NEAREST:
1366                         if ((a->a & 0x7ff) > 0x400)
1367                                 __asm__("addl $1,%0 ; adcl $0,%1"
1368                                         :"=r" (b->a),"=r" (b->b)
1369                                         :"0" (b->a),"1" (b->b));
1370                         break;
1371                 case ROUND_DOWN:
1372                         if ((a->exponent & 0x8000) && (a->b & 0xff))
1373                                 __asm__("addl $1,%0 ; adcl $0,%1"
1374                                         :"=r" (b->a),"=r" (b->b)
1375                                         :"0" (b->a),"1" (b->b));
1376                         break;
1377                 case ROUND_UP:
1378                         if (!(a->exponent & 0x8000) && (a->b & 0xff))
1379                                 __asm__("addl $1,%0 ; adcl $0,%1"
1380                                         :"=r" (b->a),"=r" (b->b)
1381                                         :"0" (b->a),"1" (b->b));
1382                         break;
1383         }
1384 }
1385
1386 static void 
1387 frndint(const temp_real * a, temp_real * b)
1388 {
1389         int shift =  16383 + 63 - (a->exponent & 0x7fff);
1390         u_int32_t underflow;
1391
1392         if ((shift < 0) || (shift == 16383+63)) {
1393                 *b = *a;
1394                 return;
1395         }
1396         b->a = b->b = underflow = 0;
1397         b->exponent = a->exponent;
1398         if (shift < 32) {
1399                 b->b = a->b; b->a = a->a;
1400         } else if (shift < 64) {
1401                 b->a = a->b; underflow = a->a;
1402                 shift -= 32;
1403                 b->exponent += 32;
1404         } else if (shift < 96) {
1405                 underflow = a->b;
1406                 shift -= 64;
1407                 b->exponent += 64;
1408         } else {
1409                 underflow = 1;
1410                 shift = 0;
1411         }
1412         b->exponent += shift;
1413         __asm__("shrdl %2,%1,%0"
1414                 :"=r" (underflow),"=r" (b->a)
1415                 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1416         __asm__("shrdl %2,%1,%0"
1417                 :"=r" (b->a),"=r" (b->b)
1418                 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1419         __asm__("shrl %1,%0"
1420                 :"=r" (b->b)
1421                 :"c" ((char) shift),"0" (b->b));
1422         switch ((int)ROUNDING) {
1423                 case ROUND_NEAREST:
1424                         __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1425                                 :"=r" (b->a),"=r" (b->b)
1426                                 :"0" (b->a),"1" (b->b)
1427                                 ,"r" (0x7fffffff + (b->a & 1))
1428                                 ,"m" (*&underflow));
1429                         break;
1430                 case ROUND_UP:
1431                         if ((b->exponent >= 0) && underflow)
1432                                 __asm__("addl $1,%0 ; adcl $0,%1"
1433                                         :"=r" (b->a),"=r" (b->b)
1434                                         :"0" (b->a),"1" (b->b));
1435                         break;
1436                 case ROUND_DOWN:
1437                         if ((b->exponent < 0) && underflow)
1438                                 __asm__("addl $1,%0 ; adcl $0,%1"
1439                                         :"=r" (b->a),"=r" (b->b)
1440                                         :"0" (b->a),"1" (b->b));
1441                         break;
1442         }
1443         if (b->a || b->b)
1444                 while (b->b >= 0) {
1445                         b->exponent--;
1446                         __asm__("addl %0,%0 ; adcl %1,%1"
1447                                 :"=r" (b->a),"=r" (b->b)
1448                                 :"0" (b->a),"1" (b->b));
1449                 }
1450         else
1451                 b->exponent = 0;
1452 }
1453
1454 static void
1455 Fscale(const temp_real *a, const temp_real *b, temp_real *c)
1456 {
1457         temp_int ti;
1458
1459         *c = *a;
1460         if(!c->a && !c->b) {                            /* 19 Sep 92*/
1461                 c->exponent = 0;
1462                 return;
1463         }
1464         real_to_int(b, &ti);
1465         if(ti.sign)
1466                 c->exponent -= ti.a;
1467         else
1468                 c->exponent += ti.a;
1469 }
1470
1471 static void
1472 real_to_int(const temp_real * a, temp_int * b)
1473 {
1474         int shift =  16383 + 63 - (a->exponent & 0x7fff);
1475         u_int32_t underflow;
1476
1477         b->a = b->b = underflow = 0;
1478         b->sign = (a->exponent < 0);
1479         if (shift < 0) {
1480                 set_OE();
1481                 return;
1482         }
1483         if (shift < 32) {
1484                 b->b = a->b; b->a = a->a;
1485         } else if (shift < 64) {
1486                 b->a = a->b; underflow = a->a;
1487                 shift -= 32;
1488         } else if (shift < 96) {
1489                 underflow = a->b;
1490                 shift -= 64;
1491         } else {
1492                 underflow = 1;
1493                 shift = 0;
1494         }
1495         __asm__("shrdl %2,%1,%0"
1496                 :"=r" (underflow),"=r" (b->a)
1497                 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1498         __asm__("shrdl %2,%1,%0"
1499                 :"=r" (b->a),"=r" (b->b)
1500                 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1501         __asm__("shrl %1,%0"
1502                 :"=r" (b->b)
1503                 :"c" ((char) shift),"0" (b->b));
1504         switch ((int)ROUNDING) {
1505                 case ROUND_NEAREST:
1506                         __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1507                                 :"=r" (b->a),"=r" (b->b)
1508                                 :"0" (b->a),"1" (b->b)
1509                                 ,"r" (0x7fffffff + (b->a & 1))
1510                                 ,"m" (*&underflow));
1511                         break;
1512                 case ROUND_UP:
1513                         if (!b->sign && underflow)
1514                                 __asm__("addl $1,%0 ; adcl $0,%1"
1515                                         :"=r" (b->a),"=r" (b->b)
1516                                         :"0" (b->a),"1" (b->b));
1517                         break;
1518                 case ROUND_DOWN:
1519                         if (b->sign && underflow)
1520                                 __asm__("addl $1,%0 ; adcl $0,%1"
1521                                         :"=r" (b->a),"=r" (b->b)
1522                                         :"0" (b->a),"1" (b->b));
1523                         break;
1524         }
1525 }
1526
1527 static void
1528 int_to_real(const temp_int * a, temp_real * b)
1529 {
1530         b->a = a->a;
1531         b->b = a->b;
1532         if (b->a || b->b)
1533                 b->exponent = 16383 + 63 + (a->sign? 0x8000:0);
1534         else {
1535                 b->exponent = 0;
1536                 return;
1537         }
1538         while (b->b >= 0) {
1539                 b->exponent--;
1540                 __asm__("addl %0,%0 ; adcl %1,%1"
1541                         :"=r" (b->a),"=r" (b->b)
1542                         :"0" (b->a),"1" (b->b));
1543         }
1544 }
1545
1546 static int
1547 fpu_modevent(module_t mod, int type, void *unused)
1548 {
1549         switch (type) {
1550         case MOD_LOAD:
1551                 if (pmath_emulate) {
1552                         printf("Another Math emulator already present\n");
1553                         return EBUSY;
1554                 }
1555                 pmath_emulate = math_emulate;
1556                 if (bootverbose)
1557                         printf("Math emulator present\n");
1558                 break;
1559         case MOD_UNLOAD:
1560                 if (pmath_emulate != math_emulate) {
1561                         printf("Cannot unload another math emulator\n");
1562                         return EACCES;
1563                 }
1564                 pmath_emulate = 0;
1565                 if (bootverbose)
1566                         printf("Math emulator unloaded\n");
1567                 break;
1568         default:
1569                 break;
1570         }
1571         return 0;
1572 }
1573 static moduledata_t fpumod = {
1574         "fpu",
1575         fpu_modevent,
1576         0
1577 };
1578 DECLARE_MODULE(fpu, fpumod, SI_SUB_DRIVERS, SI_ORDER_ANY);