thread stage 1: convert curproc to curthread, embed struct thread in proc.
[dragonfly.git] / sys / i386 / i386 / math_emulate.c
1 /*
2  * linux/kernel/math/math_emulate.c
3  *
4  * (C) 1991 Linus Torvalds
5  *
6  * [expediant "port" of linux 8087 emulator to 386BSD, with apologies -wfj]
7  *
8  *      from: 386BSD 0.1
9  * $FreeBSD: src/sys/i386/i386/math_emulate.c,v 1.35 1999/08/28 00:43:47 peter Exp $
10  * $DragonFly: src/sys/i386/i386/Attic/math_emulate.c,v 1.3 2003/06/18 06:33:24 dillon Exp $
11  */
12
13 /*
14  * Limited emulation 27.12.91 - mostly loads/stores, which gcc wants
15  * even for soft-float, unless you use bruce evans' patches. The patches
16  * are great, but they have to be re-applied for every version, and the
17  * library is different for soft-float and 80387. So emulation is more
18  * practical, even though it's slower.
19  *
20  * 28.12.91 - loads/stores work, even BCD. I'll have to start thinking
21  * about add/sub/mul/div. Urgel. I should find some good source, but I'll
22  * just fake up something.
23  *
24  * 30.12.91 - add/sub/mul/div/com seem to work mostly. I should really
25  * test every possible combination.
26  */
27
28 /*
29  * This file is full of ugly macros etc: one problem was that gcc simply
30  * didn't want to make the structures as they should be: it has to try to
31  * align them. Sickening code, but at least I've hidden the ugly things
32  * in this one file: the other files don't need to know about these things.
33  *
34  * The other files also don't care about ST(x) etc - they just get addresses
35  * to 80-bit temporary reals, and do with them as they please. I wanted to
36  * hide most of the 387-specific things here.
37  */
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41
42 #include <machine/frame.h>
43 #include <machine/reg.h>
44
45 #include <sys/proc.h>
46 #include <sys/kernel.h>
47
48 #include <vm/vm.h>
49 #include <sys/lock.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <sys/user.h>
53
54 #define __ALIGNED_TEMP_REAL 1
55 #include <i386/i386/math_emu.h>
56
57 #define bswapw(x) __asm__("xchgb %%al,%%ah":"=a" (x):"0" ((short)x))
58 #define ST(x) (*__st((x)))
59 #define PST(x) ((const temp_real *) __st((x)))
60 #define math_abort(tfp, signo) tfp->tf_eip = oldeip; return (signo);
61
62 /*
63  * We don't want these inlined - it gets too messy in the machine-code.
64  */
65 static void fpop(void);
66 static void fpush(void);
67 static void fxchg(temp_real_unaligned * a, temp_real_unaligned * b);
68 static temp_real_unaligned * __st(int i);
69
70 static unsigned char
71 get_fs_byte(char *adr) 
72         { return(fubyte(adr)); }
73
74 static unsigned short
75 get_fs_word(unsigned short *adr)
76         { return(fuword(adr)); }
77
78 static u_int32_t
79 get_fs_long(u_int32_t *adr)
80         { return(fuword(adr)); }
81
82 static void 
83 put_fs_byte(unsigned char val, char *adr)
84         { (void)subyte(adr,val); }
85
86 static void 
87 put_fs_word(unsigned short val, short *adr)
88         { (void)susword(adr,val); }
89
90 static void 
91 put_fs_long(u_long val, u_int32_t *adr)
92         { (void)suword(adr,val); }
93
94 static int
95 math_emulate(struct trapframe * info)
96 {
97         unsigned short code;
98         temp_real tmp;
99         char * address;
100         u_int32_t oldeip;
101
102         /* ever used fp? */
103         /* YYY NOTE: pcb will be moved out of uarea! */
104         if ((((struct pcb *)curproc->p_addr)->pcb_flags & FP_SOFTFP) == 0) {
105                 ((struct pcb *)curproc->p_addr)->pcb_flags |= FP_SOFTFP;
106                 I387.cwd = 0x037f;
107                 I387.swd = 0x0000;
108                 I387.twd = 0x0000;
109         }
110
111         if (I387.cwd & I387.swd & 0x3f)
112                 I387.swd |= 0x8000;
113         else
114                 I387.swd &= 0x7fff;
115         oldeip = info->tf_eip;
116 /* 0x001f means user code space */
117         if ((u_short)info->tf_cs != 0x001F) {
118                 printf("math_emulate: %04x:%08lx\n", (u_short)info->tf_cs,
119                         (u_long)oldeip);
120                 panic("?Math emulation needed in kernel?");
121         }
122         /* completely ignore an operand-size prefix */
123         if (get_fs_byte((char *) info->tf_eip) == 0x66)
124                 info->tf_eip++;
125         code = get_fs_word((unsigned short *) info->tf_eip);
126         bswapw(code);
127         code &= 0x7ff;
128         I387.fip = oldeip;
129         *(unsigned short *) &I387.fcs = (u_short) info->tf_cs;
130         *(1+(unsigned short *) &I387.fcs) = code;
131         info->tf_eip += 2;
132         switch (code) {
133                 case 0x1d0: /* fnop */
134                         return(0);
135                 case 0x1d1: case 0x1d2: case 0x1d3:  /* fst to 32-bit mem */
136                 case 0x1d4: case 0x1d5: case 0x1d6: case 0x1d7:
137                         math_abort(info,SIGILL);
138                 case 0x1e0: /* fchs */
139                         ST(0).exponent ^= 0x8000;
140                         return(0);
141                 case 0x1e1: /* fabs */
142                         ST(0).exponent &= 0x7fff;
143                         return(0);
144                 case 0x1e2: case 0x1e3:
145                         math_abort(info,SIGILL);
146                 case 0x1e4: /* ftst */
147                         ftst(PST(0));
148                         return(0);
149                 case 0x1e5: /* fxam */
150                         printf("fxam not implemented\n");
151                         math_abort(info,SIGILL);
152                 case 0x1e6: case 0x1e7: /* fldenv */
153                         math_abort(info,SIGILL);
154                 case 0x1e8: /* fld1 */
155                         fpush();
156                         ST(0) = CONST1;
157                         return(0);
158                 case 0x1e9: /* fld2t */
159                         fpush();
160                         ST(0) = CONSTL2T;
161                         return(0);
162                 case 0x1ea: /* fld2e */
163                         fpush();
164                         ST(0) = CONSTL2E;
165                         return(0);
166                 case 0x1eb: /* fldpi */
167                         fpush();
168                         ST(0) = CONSTPI;
169                         return(0);
170                 case 0x1ec: /* fldlg2 */
171                         fpush();
172                         ST(0) = CONSTLG2;
173                         return(0);
174                 case 0x1ed: /* fldln2 */
175                         fpush();
176                         ST(0) = CONSTLN2;
177                         return(0);
178                 case 0x1ee: /* fldz */
179                         fpush();
180                         ST(0) = CONSTZ;
181                         return(0);
182                 case 0x1ef:
183                         math_abort(info,SIGILL);
184                 case 0x1f0: /* f2xm1 */
185                 case 0x1f1: /* fyl2x */
186                 case 0x1f2: /* fptan */
187                 case 0x1f3: /* fpatan */
188                 case 0x1f4: /* fxtract */
189                 case 0x1f5: /* fprem1 */
190                 case 0x1f6: /* fdecstp */
191                 case 0x1f7: /* fincstp */
192                 case 0x1f8: /* fprem */
193                 case 0x1f9: /* fyl2xp1 */
194                 case 0x1fa: /* fsqrt */
195                 case 0x1fb: /* fsincos */
196                 case 0x1fe: /* fsin */
197                 case 0x1ff: /* fcos */
198                         uprintf(
199                          "math_emulate: instruction %04x not implemented\n",
200                           code + 0xd800);
201                         math_abort(info,SIGILL);
202                 case 0x1fc: /* frndint */
203                         frndint(PST(0),&tmp);
204                         real_to_real(&tmp,&ST(0));
205                         return(0);
206                 case 0x1fd: /* fscale */
207                         /* incomplete and totally inadequate -wfj */
208                         Fscale(PST(0), PST(1), &tmp);
209                         real_to_real(&tmp,&ST(0));
210                         return(0);                      /* 19 Sep 92*/
211                 case 0x2e9: /* ????? */
212 /* if this should be a fucomp ST(0),ST(1) , it must be a 0x3e9  ATS */
213                         fucom(PST(1),PST(0));
214                         fpop(); fpop();
215                         return(0);
216                 case 0x3d0: case 0x3d1: /* fist ?? */
217                         return(0);
218                 case 0x3e2: /* fclex */
219                         I387.swd &= 0x7f00;
220                         return(0);
221                 case 0x3e3: /* fninit */
222                         I387.cwd = 0x037f;
223                         I387.swd = 0x0000;
224                         I387.twd = 0x0000;
225                         return(0);
226                 case 0x3e4:
227                         return(0);
228                 case 0x6d9: /* fcompp */
229                         fcom(PST(1),PST(0));
230                         fpop(); fpop();
231                         return(0);
232                 case 0x7e0: /* fstsw ax */
233                         *(short *) &info->tf_eax = I387.swd;
234                         return(0);
235         }
236         switch (code >> 3) {
237                 case 0x18: /* fadd */
238                         fadd(PST(0),PST(code & 7),&tmp);
239                         real_to_real(&tmp,&ST(0));
240                         return(0);
241                 case 0x19: /* fmul */
242                         fmul(PST(0),PST(code & 7),&tmp);
243                         real_to_real(&tmp,&ST(0));
244                         return(0);
245                 case 0x1a: /* fcom */
246                         fcom(PST(code & 7),PST(0));
247                         return(0);
248                 case 0x1b: /* fcomp */
249                         fcom(PST(code & 7),PST(0));
250                         fpop();
251                         return(0);
252                 case 0x1c: /* fsubr */
253                         real_to_real(&ST(code & 7),&tmp);
254                         tmp.exponent ^= 0x8000;
255                         fadd(PST(0),&tmp,&tmp);
256                         real_to_real(&tmp,&ST(0));
257                         return(0);
258                 case 0x1d: /* fsub */
259                         ST(0).exponent ^= 0x8000;
260                         fadd(PST(0),PST(code & 7),&tmp);
261                         real_to_real(&tmp,&ST(0));
262                         return(0);
263                 case 0x1e: /* fdivr */
264                         fdiv(PST(0),PST(code & 7),&tmp);
265                         real_to_real(&tmp,&ST(0));
266                         return(0);
267                 case 0x1f: /* fdiv */
268                         fdiv(PST(code & 7),PST(0),&tmp);
269                         real_to_real(&tmp,&ST(0));
270                         return(0);
271                 case 0x38: /* fld */
272                         fpush();
273                         ST(0) = ST((code & 7)+1);  /* why plus 1 ????? ATS */
274                         return(0);
275                 case 0x39: /* fxch */
276                         fxchg(&ST(0),&ST(code & 7));
277                         return(0);
278                 case 0x3b: /*  ??? ??? wrong ???? ATS */
279                         ST(code & 7) = ST(0);
280                         fpop();
281                         return(0);
282                 case 0x98: /* fadd */
283                         fadd(PST(0),PST(code & 7),&tmp);
284                         real_to_real(&tmp,&ST(code & 7));
285                         return(0);
286                 case 0x99: /* fmul */
287                         fmul(PST(0),PST(code & 7),&tmp);
288                         real_to_real(&tmp,&ST(code & 7));
289                         return(0);
290                 case 0x9a: /* ???? , my manual don't list a direction bit
291 for fcom , ??? ATS */
292                         fcom(PST(code & 7),PST(0));
293                         return(0);
294                 case 0x9b: /* same as above , ATS */
295                         fcom(PST(code & 7),PST(0));
296                         fpop();
297                         return(0);
298                 case 0x9c: /* fsubr */
299                         ST(code & 7).exponent ^= 0x8000;
300                         fadd(PST(0),PST(code & 7),&tmp);
301                         real_to_real(&tmp,&ST(code & 7));
302                         return(0);
303                 case 0x9d: /* fsub */
304                         real_to_real(&ST(0),&tmp);
305                         tmp.exponent ^= 0x8000;
306                         fadd(PST(code & 7),&tmp,&tmp);
307                         real_to_real(&tmp,&ST(code & 7));
308                         return(0);
309                 case 0x9e: /* fdivr */
310                         fdiv(PST(0),PST(code & 7),&tmp);
311                         real_to_real(&tmp,&ST(code & 7));
312                         return(0);
313                 case 0x9f: /* fdiv */
314                         fdiv(PST(code & 7),PST(0),&tmp);
315                         real_to_real(&tmp,&ST(code & 7));
316                         return(0);
317                 case 0xb8: /* ffree */
318                         printf("ffree not implemented\n");
319                         math_abort(info,SIGILL);
320                 case 0xb9: /* fstp ???? where is the pop ? ATS */
321                         fxchg(&ST(0),&ST(code & 7));
322                         return(0);
323                 case 0xba: /* fst */
324                         ST(code & 7) = ST(0);
325                         return(0);
326                 case 0xbb: /* ????? encoding of fstp to mem ? ATS */
327                         ST(code & 7) = ST(0);
328                         fpop();
329                         return(0);
330                 case 0xbc: /* fucom */
331                         fucom(PST(code & 7),PST(0));
332                         return(0);
333                 case 0xbd: /* fucomp */
334                         fucom(PST(code & 7),PST(0));
335                         fpop();
336                         return(0);
337                 case 0xd8: /* faddp */
338                         fadd(PST(code & 7),PST(0),&tmp);
339                         real_to_real(&tmp,&ST(code & 7));
340                         fpop();
341                         return(0);
342                 case 0xd9: /* fmulp */
343                         fmul(PST(code & 7),PST(0),&tmp);
344                         real_to_real(&tmp,&ST(code & 7));
345                         fpop();
346                         return(0);
347                 case 0xda: /* ??? encoding of ficom with 16 bit mem ? ATS */
348                         fcom(PST(code & 7),PST(0));
349                         fpop();
350                         return(0);
351                 case 0xdc: /* fsubrp */
352                         ST(code & 7).exponent ^= 0x8000;
353                         fadd(PST(0),PST(code & 7),&tmp);
354                         real_to_real(&tmp,&ST(code & 7));
355                         fpop();
356                         return(0);
357                 case 0xdd: /* fsubp */
358                         real_to_real(&ST(0),&tmp);
359                         tmp.exponent ^= 0x8000;
360                         fadd(PST(code & 7),&tmp,&tmp);
361                         real_to_real(&tmp,&ST(code & 7));
362                         fpop();
363                         return(0);
364                 case 0xde: /* fdivrp */
365                         fdiv(PST(0),PST(code & 7),&tmp);
366                         real_to_real(&tmp,&ST(code & 7));
367                         fpop();
368                         return(0);
369                 case 0xdf: /* fdivp */
370                         fdiv(PST(code & 7),PST(0),&tmp);
371                         real_to_real(&tmp,&ST(code & 7));
372                         fpop();
373                         return(0);
374                 case 0xf8: /* fild 16-bit mem ???? ATS */
375                         printf("ffree not implemented\n");
376                         math_abort(info,SIGILL);
377                         fpop();
378                         return(0);
379                 case 0xf9: /*  ????? ATS */
380                         fxchg(&ST(0),&ST(code & 7));
381                         return(0);
382                 case 0xfa: /* fist 16-bit mem ? ATS */
383                 case 0xfb: /* fistp 16-bit mem ? ATS */
384                         ST(code & 7) = ST(0);
385                         fpop();
386                         return(0);
387         }
388         switch ((code>>3) & 0xe7) {
389                 case 0x22:
390                         put_short_real(PST(0),info,code);
391                         return(0);
392                 case 0x23:
393                         put_short_real(PST(0),info,code);
394                         fpop();
395                         return(0);
396                 case 0x24:
397                         address = ea(info,code);
398                         for (code = 0 ; code < 7 ; code++) {
399                                 ((int32_t *) & I387)[code] =
400                                    get_fs_long((u_int32_t *) address);
401                                 address += 4;
402                         }
403                         return(0);
404                 case 0x25:
405                         address = ea(info,code);
406                         *(unsigned short *) &I387.cwd =
407                                 get_fs_word((unsigned short *) address);
408                         return(0);
409                 case 0x26:
410                         address = ea(info,code);
411                         /*verify_area(address,28);*/
412                         for (code = 0 ; code < 7 ; code++) {
413                                 put_fs_long( ((int32_t *) & I387)[code],
414                                         (u_int32_t *) address);
415                                 address += 4;
416                         }
417                         return(0);
418                 case 0x27:
419                         address = ea(info,code);
420                         /*verify_area(address,2);*/
421                         put_fs_word(I387.cwd,(short *) address);
422                         return(0);
423                 case 0x62:
424                         put_long_int(PST(0),info,code);
425                         return(0);
426                 case 0x63:
427                         put_long_int(PST(0),info,code);
428                         fpop();
429                         return(0);
430                 case 0x65:
431                         fpush();
432                         get_temp_real(&tmp,info,code);
433                         real_to_real(&tmp,&ST(0));
434                         return(0);
435                 case 0x67:
436                         put_temp_real(PST(0),info,code);
437                         fpop();
438                         return(0);
439                 case 0xa2:
440                         put_long_real(PST(0),info,code);
441                         return(0);
442                 case 0xa3:
443                         put_long_real(PST(0),info,code);
444                         fpop();
445                         return(0);
446                 case 0xa4:
447                         address = ea(info,code);
448                         for (code = 0 ; code < 27 ; code++) {
449                                 ((int32_t *) & I387)[code] =
450                                    get_fs_long((u_int32_t *) address);
451                                 address += 4;
452                         }
453                         return(0);
454                 case 0xa6:
455                         address = ea(info,code);
456                         /*verify_area(address,108);*/
457                         for (code = 0 ; code < 27 ; code++) {
458                                 put_fs_long( ((int32_t *) & I387)[code],
459                                         (u_int32_t *) address);
460                                 address += 4;
461                         }
462                         I387.cwd = 0x037f;
463                         I387.swd = 0x0000;
464                         I387.twd = 0x0000;
465                         return(0);
466                 case 0xa7:
467                         address = ea(info,code);
468                         /*verify_area(address,2);*/
469                         put_fs_word(I387.swd,(short *) address);
470                         return(0);
471                 case 0xe2:
472                         put_short_int(PST(0),info,code);
473                         return(0);
474                 case 0xe3:
475                         put_short_int(PST(0),info,code);
476                         fpop();
477                         return(0);
478                 case 0xe4:
479                         fpush();
480                         get_BCD(&tmp,info,code);
481                         real_to_real(&tmp,&ST(0));
482                         return(0);
483                 case 0xe5:
484                         fpush();
485                         get_longlong_int(&tmp,info,code);
486                         real_to_real(&tmp,&ST(0));
487                         return(0);
488                 case 0xe6:
489                         put_BCD(PST(0),info,code);
490                         fpop();
491                         return(0);
492                 case 0xe7:
493                         put_longlong_int(PST(0),info,code);
494                         fpop();
495                         return(0);
496         }
497         switch (code >> 9) {
498                 case 0:
499                         get_short_real(&tmp,info,code);
500                         break;
501                 case 1:
502                         get_long_int(&tmp,info,code);
503                         break;
504                 case 2:
505                         get_long_real(&tmp,info,code);
506                         break;
507                 case 4:
508                         get_short_int(&tmp,info,code);
509         }
510         switch ((code>>3) & 0x27) {
511                 case 0:
512                         fadd(&tmp,PST(0),&tmp);
513                         real_to_real(&tmp,&ST(0));
514                         return(0);
515                 case 1:
516                         fmul(&tmp,PST(0),&tmp);
517                         real_to_real(&tmp,&ST(0));
518                         return(0);
519                 case 2:
520                         fcom(&tmp,PST(0));
521                         return(0);
522                 case 3:
523                         fcom(&tmp,PST(0));
524                         fpop();
525                         return(0);
526                 case 4:
527                         tmp.exponent ^= 0x8000;
528                         fadd(&tmp,PST(0),&tmp);
529                         real_to_real(&tmp,&ST(0));
530                         return(0);
531                 case 5:
532                         ST(0).exponent ^= 0x8000;
533                         fadd(&tmp,PST(0),&tmp);
534                         real_to_real(&tmp,&ST(0));
535                         return(0);
536                 case 6:
537                         fdiv(PST(0),&tmp,&tmp);
538                         real_to_real(&tmp,&ST(0));
539                         return(0);
540                 case 7:
541                         fdiv(&tmp,PST(0),&tmp);
542                         real_to_real(&tmp,&ST(0));
543                         return(0);
544         }
545         if ((code & 0x138) == 0x100) {
546                         fpush();
547                         real_to_real(&tmp,&ST(0));
548                         return(0);
549         }
550         printf("Unknown math-insns: %04x:%08x %04x\n",(u_short)info->tf_cs,
551                 info->tf_eip,code);
552         math_abort(info,SIGFPE);
553 }
554
555 static void
556 fpop(void)
557 {
558         u_int32_t tmp;
559
560         tmp = I387.swd & 0xffffc7ffUL;
561         I387.swd += 0x00000800;
562         I387.swd &= 0x00003800;
563         I387.swd |= tmp;
564 }
565
566 static void
567 fpush(void)
568 {
569         u_int32_t tmp;
570
571         tmp = I387.swd & 0xffffc7ffUL;
572         I387.swd += 0x00003800;
573         I387.swd &= 0x00003800;
574         I387.swd |= tmp;
575 }
576
577 static void 
578 fxchg(temp_real_unaligned * a, temp_real_unaligned * b)
579 {
580         temp_real_unaligned c;
581
582         c = *a;
583         *a = *b;
584         *b = c;
585 }
586
587 static temp_real_unaligned *
588 __st(int i)
589 {
590         i += I387.swd >> 11;
591         i &= 7;
592         return (temp_real_unaligned *) (i*10 + (char *)(I387.st_space));
593 }
594
595 /*
596  * linux/kernel/math/ea.c
597  *
598  * (C) 1991 Linus Torvalds
599  */
600
601 /*
602  * Calculate the effective address.
603  */
604
605
606 static int __regoffset[] = {
607         tEAX, tECX, tEDX, tEBX, tESP, tEBP, tESI, tEDI
608 };
609
610 #define REG(x) (((int *)curproc->p_md.md_regs)[__regoffset[(x)]])
611
612 static char *
613 sib(struct trapframe * info, int mod)
614 {
615         unsigned char ss,index,base;
616         int32_t offset = 0;
617
618         base = get_fs_byte((char *) info->tf_eip);
619         info->tf_eip++;
620         ss = base >> 6;
621         index = (base >> 3) & 7;
622         base &= 7;
623         if (index == 4)
624                 offset = 0;
625         else
626                 offset = REG(index);
627         offset <<= ss;
628         if (mod || base != 5)
629                 offset += REG(base);
630         if (mod == 1) {
631                 offset += (signed char) get_fs_byte((char *) info->tf_eip);
632                 info->tf_eip++;
633         } else if (mod == 2 || base == 5) {
634                 offset += (signed) get_fs_long((u_int32_t *) info->tf_eip);
635                 info->tf_eip += 4;
636         }
637         I387.foo = offset;
638         I387.fos = 0x17;
639         return (char *) offset;
640 }
641
642 static char *
643 ea(struct trapframe * info, unsigned short code)
644 {
645         unsigned char mod,rm;
646         int32_t * tmp;
647         int offset = 0;
648
649         mod = (code >> 6) & 3;
650         rm = code & 7;
651         if (rm == 4 && mod != 3)
652                 return sib(info,mod);
653         if (rm == 5 && !mod) {
654                 offset = get_fs_long((u_int32_t *) info->tf_eip);
655                 info->tf_eip += 4;
656                 I387.foo = offset;
657                 I387.fos = 0x17;
658                 return (char *) offset;
659         }
660         tmp = (int32_t *) &REG(rm);
661         switch (mod) {
662                 case 0: offset = 0; break;
663                 case 1:
664                         offset = (signed char) get_fs_byte((char *) info->tf_eip);
665                         info->tf_eip++;
666                         break;
667                 case 2:
668                         offset = (signed) get_fs_long((u_int32_t *) info->tf_eip);
669                         info->tf_eip += 4;
670                         break;
671 #ifdef notyet
672                 case 3:
673                         math_abort(info,1<<(SIGILL-1));
674 #endif
675         }
676         I387.foo = offset;
677         I387.fos = 0x17;
678         return offset + (char *) *tmp;
679 }
680 /*
681  * linux/kernel/math/get_put.c
682  *
683  * (C) 1991 Linus Torvalds
684  */
685
686 /*
687  * This file handles all accesses to user memory: getting and putting
688  * ints/reals/BCD etc. This is the only part that concerns itself with
689  * other than temporary real format. All other cals are strictly temp_real.
690  */
691
692 static void 
693 get_short_real(temp_real * tmp, struct trapframe * info, unsigned short code)
694 {
695         char * addr;
696         short_real sr;
697
698         addr = ea(info,code);
699         sr = get_fs_long((u_int32_t *) addr);
700         short_to_temp(&sr,tmp);
701 }
702
703 static void
704 get_long_real(temp_real * tmp, struct trapframe * info, unsigned short code)
705 {
706         char * addr;
707         long_real lr;
708
709         addr = ea(info,code);
710         lr.a = get_fs_long((u_int32_t *) addr);
711         lr.b = get_fs_long(1 + (u_int32_t *) addr);
712         long_to_temp(&lr,tmp);
713 }
714
715 static void
716 get_temp_real(temp_real * tmp, struct trapframe * info, unsigned short code)
717 {
718         char * addr;
719
720         addr = ea(info,code);
721         tmp->a = get_fs_long((u_int32_t *) addr);
722         tmp->b = get_fs_long(1 + (u_int32_t *) addr);
723         tmp->exponent = get_fs_word(4 + (unsigned short *) addr);
724 }
725
726 static void
727 get_short_int(temp_real * tmp, struct trapframe * info, unsigned short code)
728 {
729         char * addr;
730         temp_int ti;
731
732         addr = ea(info,code);
733         ti.a = (signed short) get_fs_word((unsigned short *) addr);
734         ti.b = 0;
735         if ((ti.sign = (ti.a < 0)) != 0)
736                 ti.a = - ti.a;
737         int_to_real(&ti,tmp);
738 }
739
740 static void
741 get_long_int(temp_real * tmp, struct trapframe * info, unsigned short code)
742 {
743         char * addr;
744         temp_int ti;
745
746         addr = ea(info,code);
747         ti.a = get_fs_long((u_int32_t *) addr);
748         ti.b = 0;
749         if ((ti.sign = (ti.a < 0)) != 0)
750                 ti.a = - ti.a;
751         int_to_real(&ti,tmp);
752 }
753
754 static void 
755 get_longlong_int(temp_real * tmp, struct trapframe * info, unsigned short code)
756 {
757         char * addr;
758         temp_int ti;
759
760         addr = ea(info,code);
761         ti.a = get_fs_long((u_int32_t *) addr);
762         ti.b = get_fs_long(1 + (u_int32_t *) addr);
763         if ((ti.sign = (ti.b < 0)) != 0)
764                 __asm__("notl %0 ; notl %1\n\t"
765                         "addl $1,%0 ; adcl $0,%1"
766                         :"=r" (ti.a),"=r" (ti.b)
767                         :"0" (ti.a),"1" (ti.b));
768         int_to_real(&ti,tmp);
769 }
770
771 #define MUL10(low,high) \
772 __asm__("addl %0,%0 ; adcl %1,%1\n\t" \
773 "movl %0,%%ecx ; movl %1,%%ebx\n\t" \
774 "addl %0,%0 ; adcl %1,%1\n\t" \
775 "addl %0,%0 ; adcl %1,%1\n\t" \
776 "addl %%ecx,%0 ; adcl %%ebx,%1" \
777 :"=a" (low),"=d" (high) \
778 :"0" (low),"1" (high):"cx","bx")
779
780 #define ADD64(val,low,high) \
781 __asm__("addl %4,%0 ; adcl $0,%1":"=r" (low),"=r" (high) \
782 :"0" (low),"1" (high),"r" ((u_int32_t) (val)))
783
784 static void
785 get_BCD(temp_real * tmp, struct trapframe * info, unsigned short code)
786 {
787         int k;
788         char * addr;
789         temp_int i;
790         unsigned char c;
791
792         addr = ea(info,code);
793         addr += 9;
794         i.sign = 0x80 & get_fs_byte(addr--);
795         i.a = i.b = 0;
796         for (k = 0; k < 9; k++) {
797                 c = get_fs_byte(addr--);
798                 MUL10(i.a, i.b);
799                 ADD64((c>>4), i.a, i.b);
800                 MUL10(i.a, i.b);
801                 ADD64((c&0xf), i.a, i.b);
802         }
803         int_to_real(&i,tmp);
804 }
805
806 static void 
807 put_short_real(const temp_real * tmp,
808         struct trapframe * info, unsigned short code)
809 {
810         char * addr;
811         short_real sr;
812
813         addr = ea(info,code);
814         /*verify_area(addr,4);*/
815         temp_to_short(tmp,&sr);
816         put_fs_long(sr,(u_int32_t *) addr);
817 }
818
819 static void
820 put_long_real(const temp_real * tmp,
821         struct trapframe * info, unsigned short code)
822 {
823         char * addr;
824         long_real lr;
825
826         addr = ea(info,code);
827         /*verify_area(addr,8);*/
828         temp_to_long(tmp,&lr);
829         put_fs_long(lr.a, (u_int32_t *) addr);
830         put_fs_long(lr.b, 1 + (u_int32_t *) addr);
831 }
832
833 static void
834 put_temp_real(const temp_real * tmp,
835         struct trapframe * info, unsigned short code)
836 {
837         char * addr;
838
839         addr = ea(info,code);
840         /*verify_area(addr,10);*/
841         put_fs_long(tmp->a, (u_int32_t *) addr);
842         put_fs_long(tmp->b, 1 + (u_int32_t *) addr);
843         put_fs_word(tmp->exponent, 4 + (short *) addr);
844 }
845
846 static void
847 put_short_int(const temp_real * tmp,
848         struct trapframe * info, unsigned short code)
849 {
850         char * addr;
851         temp_int ti;
852
853         addr = ea(info,code);
854         real_to_int(tmp,&ti);
855         /*verify_area(addr,2);*/
856         if (ti.sign)
857                 ti.a = -ti.a;
858         put_fs_word(ti.a,(short *) addr);
859 }
860
861 static void
862 put_long_int(const temp_real * tmp,
863         struct trapframe * info, unsigned short code)
864 {
865         char * addr;
866         temp_int ti;
867
868         addr = ea(info,code);
869         real_to_int(tmp,&ti);
870         /*verify_area(addr,4);*/
871         if (ti.sign)
872                 ti.a = -ti.a;
873         put_fs_long(ti.a,(u_int32_t *) addr);
874 }
875
876 static void
877 put_longlong_int(const temp_real * tmp,
878         struct trapframe * info, unsigned short code)
879 {
880         char * addr;
881         temp_int ti;
882
883         addr = ea(info,code);
884         real_to_int(tmp,&ti);
885         /*verify_area(addr,8);*/
886         if (ti.sign)
887                 __asm__("notl %0 ; notl %1\n\t"
888                         "addl $1,%0 ; adcl $0,%1"
889                         :"=r" (ti.a),"=r" (ti.b)
890                         :"0" (ti.a),"1" (ti.b));
891         put_fs_long(ti.a,(u_int32_t *) addr);
892         put_fs_long(ti.b,1 + (u_int32_t *) addr);
893 }
894
895 #define DIV10(low,high,rem) \
896 __asm__("divl %6 ; xchgl %1,%2 ; divl %6" \
897         :"=d" (rem),"=a" (low),"=r" (high) \
898         :"0" (0),"1" (high),"2" (low),"c" (10))
899
900 static void
901 put_BCD(const temp_real * tmp,struct trapframe * info, unsigned short code)
902 {
903         int k,rem;
904         char * addr;
905         temp_int i;
906         unsigned char c;
907
908         addr = ea(info,code);
909         /*verify_area(addr,10);*/
910         real_to_int(tmp,&i);
911         if (i.sign)
912                 put_fs_byte(0x80, addr+9);
913         else
914                 put_fs_byte(0, addr+9);
915         for (k = 0; k < 9; k++) {
916                 DIV10(i.a,i.b,rem);
917                 c = rem;
918                 DIV10(i.a,i.b,rem);
919                 c += rem<<4;
920                 put_fs_byte(c,addr++);
921         }
922 }
923
924 /*
925  * linux/kernel/math/mul.c
926  *
927  * (C) 1991 Linus Torvalds
928  */
929
930 /*
931  * temporary real multiplication routine.
932  */
933
934
935 static void
936 shift(int * c)
937 {
938         __asm__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
939                 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
940                 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
941                 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
942                 ::"r" (c):"ax");
943 }
944
945 static void
946 mul64(const temp_real * a, const temp_real * b, int * c)
947 {
948         __asm__("movl (%0),%%eax\n\t"
949                 "mull (%1)\n\t"
950                 "movl %%eax,(%2)\n\t"
951                 "movl %%edx,4(%2)\n\t"
952                 "movl 4(%0),%%eax\n\t"
953                 "mull 4(%1)\n\t"
954                 "movl %%eax,8(%2)\n\t"
955                 "movl %%edx,12(%2)\n\t"
956                 "movl (%0),%%eax\n\t"
957                 "mull 4(%1)\n\t"
958                 "addl %%eax,4(%2)\n\t"
959                 "adcl %%edx,8(%2)\n\t"
960                 "adcl $0,12(%2)\n\t"
961                 "movl 4(%0),%%eax\n\t"
962                 "mull (%1)\n\t"
963                 "addl %%eax,4(%2)\n\t"
964                 "adcl %%edx,8(%2)\n\t"
965                 "adcl $0,12(%2)"
966                 ::"S" (a),"c" (b),"D" (c)
967                 :"ax","dx");
968 }
969
970 static void
971 fmul(const temp_real * src1, const temp_real * src2, temp_real * result)
972 {
973         int i,sign;
974         int tmp[4] = {0,0,0,0};
975
976         sign = (src1->exponent ^ src2->exponent) & 0x8000;
977         i = (src1->exponent & 0x7fff) + (src2->exponent & 0x7fff) - 16383 + 1;
978         if (i<0) {
979                 result->exponent = sign;
980                 result->a = result->b = 0;
981                 return;
982         }
983         if (i>0x7fff) {
984                 set_OE();
985                 return;
986         }
987         mul64(src1,src2,tmp);
988         if (tmp[0] || tmp[1] || tmp[2] || tmp[3])
989                 while (i && tmp[3] >= 0) {
990                         i--;
991                         shift(tmp);
992                 }
993         else
994                 i = 0;
995         result->exponent = i | sign;
996         result->a = tmp[2];
997         result->b = tmp[3];
998 }
999
1000 /*
1001  * linux/kernel/math/div.c
1002  *
1003  * (C) 1991 Linus Torvalds
1004  */
1005
1006 /*
1007  * temporary real division routine.
1008  */
1009
1010 static void 
1011 shift_left(int * c)
1012 {
1013         __asm__ __volatile__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
1014                 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
1015                 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
1016                 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
1017                 ::"r" (c):"ax");
1018 }
1019
1020 static void
1021 shift_right(int * c)
1022 {
1023         __asm__("shrl $1,12(%0) ; rcrl $1,8(%0) ; rcrl $1,4(%0) ; rcrl $1,(%0)"
1024                 ::"r" (c));
1025 }
1026
1027 static int
1028 try_sub(int * a, int * b)
1029 {
1030         char ok;
1031
1032         __asm__ __volatile__("movl (%1),%%eax ; subl %%eax,(%2)\n\t"
1033                 "movl 4(%1),%%eax ; sbbl %%eax,4(%2)\n\t"
1034                 "movl 8(%1),%%eax ; sbbl %%eax,8(%2)\n\t"
1035                 "movl 12(%1),%%eax ; sbbl %%eax,12(%2)\n\t"
1036                 "setae %%al":"=a" (ok):"c" (a),"d" (b));
1037         return ok;
1038 }
1039
1040 static void
1041 div64(int * a, int * b, int * c)
1042 {
1043         int tmp[4];
1044         int i;
1045         unsigned int mask = 0;
1046
1047         c += 4;
1048         for (i = 0 ; i<64 ; i++) {
1049                 if (!(mask >>= 1)) {
1050                         c--;
1051                         mask = 0x80000000UL;
1052                 }
1053                 tmp[0] = a[0]; tmp[1] = a[1];
1054                 tmp[2] = a[2]; tmp[3] = a[3];
1055                 if (try_sub(b,tmp)) {
1056                         *c |= mask;
1057                         a[0] = tmp[0]; a[1] = tmp[1];
1058                         a[2] = tmp[2]; a[3] = tmp[3];
1059                 }
1060                 shift_right(b);
1061         }
1062 }
1063
1064 static void
1065 fdiv(const temp_real * src1, const temp_real * src2, temp_real * result)
1066 {
1067         int i,sign;
1068         int a[4],b[4],tmp[4] = {0,0,0,0};
1069
1070         sign = (src1->exponent ^ src2->exponent) & 0x8000;
1071         if (!(src2->a || src2->b)) {
1072                 set_ZE();
1073                 return;
1074         }
1075         i = (src1->exponent & 0x7fff) - (src2->exponent & 0x7fff) + 16383;
1076         if (i<0) {
1077                 set_UE();
1078                 result->exponent = sign;
1079                 result->a = result->b = 0;
1080                 return;
1081         }
1082         a[0] = a[1] = 0;
1083         a[2] = src1->a;
1084         a[3] = src1->b;
1085         b[0] = b[1] = 0;
1086         b[2] = src2->a;
1087         b[3] = src2->b;
1088         while (b[3] >= 0) {
1089                 i++;
1090                 shift_left(b);
1091         }
1092         div64(a,b,tmp);
1093         if (tmp[0] || tmp[1] || tmp[2] || tmp[3]) {
1094                 while (i && tmp[3] >= 0) {
1095                         i--;
1096                         shift_left(tmp);
1097                 }
1098                 if (tmp[3] >= 0)
1099                         set_DE();
1100         } else
1101                 i = 0;
1102         if (i>0x7fff) {
1103                 set_OE();
1104                 return;
1105         }
1106         if (tmp[0] || tmp[1])
1107                 set_PE();
1108         result->exponent = i | sign;
1109         result->a = tmp[2];
1110         result->b = tmp[3];
1111 }
1112
1113 /*
1114  * linux/kernel/math/add.c
1115  *
1116  * (C) 1991 Linus Torvalds
1117  */
1118
1119 /*
1120  * temporary real addition routine.
1121  *
1122  * NOTE! These aren't exact: they are only 62 bits wide, and don't do
1123  * correct rounding. Fast hack. The reason is that we shift right the
1124  * values by two, in order not to have overflow (1 bit), and to be able
1125  * to move the sign into the mantissa (1 bit). Much simpler algorithms,
1126  * and 62 bits (61 really - no rounding) accuracy is usually enough. The
1127  * only time you should notice anything weird is when adding 64-bit
1128  * integers together. When using doubles (52 bits accuracy), the
1129  * 61-bit accuracy never shows at all.
1130  */
1131
1132 #define NEGINT(a) \
1133 __asm__("notl %0 ; notl %1 ; addl $1,%0 ; adcl $0,%1" \
1134         :"=r" (a->a),"=r" (a->b) \
1135         :"0" (a->a),"1" (a->b))
1136
1137 static void signify(temp_real * a)
1138 {
1139         a->exponent += 2;
1140         __asm__("shrdl $2,%1,%0 ; shrl $2,%1"
1141                 :"=r" (a->a),"=r" (a->b)
1142                 :"0" (a->a),"1" (a->b));
1143         if (a->exponent < 0)
1144                 NEGINT(a);
1145         a->exponent &= 0x7fff;
1146 }
1147
1148 static void unsignify(temp_real * a)
1149 {
1150         if (!(a->a || a->b)) {
1151                 a->exponent = 0;
1152                 return;
1153         }
1154         a->exponent &= 0x7fff;
1155         if (a->b < 0) {
1156                 NEGINT(a);
1157                 a->exponent |= 0x8000;
1158         }
1159         while (a->b >= 0) {
1160                 a->exponent--;
1161                 __asm__("addl %0,%0 ; adcl %1,%1"
1162                         :"=r" (a->a),"=r" (a->b)
1163                         :"0" (a->a),"1" (a->b));
1164         }
1165 }
1166
1167 static void
1168 fadd(const temp_real * src1, const temp_real * src2, temp_real * result)
1169 {
1170         temp_real a,b;
1171         int x1,x2,shift;
1172
1173         x1 = src1->exponent & 0x7fff;
1174         x2 = src2->exponent & 0x7fff;
1175         if (x1 > x2) {
1176                 a = *src1;
1177                 b = *src2;
1178                 shift = x1-x2;
1179         } else {
1180                 a = *src2;
1181                 b = *src1;
1182                 shift = x2-x1;
1183         }
1184         if (shift >= 64) {
1185                 *result = a;
1186                 return;
1187         }
1188         if (shift >= 32) {
1189                 b.a = b.b;
1190                 b.b = 0;
1191                 shift -= 32;
1192         }
1193         __asm__("shrdl %4,%1,%0 ; shrl %4,%1"
1194                 :"=r" (b.a),"=r" (b.b)
1195                 :"0" (b.a),"1" (b.b),"c" ((char) shift));
1196         signify(&a);
1197         signify(&b);
1198         __asm__("addl %4,%0 ; adcl %5,%1"
1199                 :"=r" (a.a),"=r" (a.b)
1200                 :"0" (a.a),"1" (a.b),"g" (b.a),"g" (b.b));
1201         unsignify(&a);
1202         *result = a;
1203 }
1204
1205 /*
1206  * linux/kernel/math/compare.c
1207  *
1208  * (C) 1991 Linus Torvalds
1209  */
1210
1211 /*
1212  * temporary real comparison routines
1213  */
1214
1215
1216 #define clear_Cx() (I387.swd &= ~0x4500)
1217
1218 static void 
1219 normalize(temp_real * a)
1220 {
1221         int i = a->exponent & 0x7fff;
1222         int sign = a->exponent & 0x8000;
1223
1224         if (!(a->a || a->b)) {
1225                 a->exponent = 0;
1226                 return;
1227         }
1228         while (i && a->b >= 0) {
1229                 i--;
1230                 __asm__("addl %0,%0 ; adcl %1,%1"
1231                         :"=r" (a->a),"=r" (a->b)
1232                         :"0" (a->a),"1" (a->b));
1233         }
1234         a->exponent = i | sign;
1235 }
1236
1237 static void
1238 ftst(const temp_real * a)
1239 {
1240         temp_real b;
1241
1242         clear_Cx();
1243         b = *a;
1244         normalize(&b);
1245         if (b.a || b.b || b.exponent) {
1246                 if (b.exponent < 0)
1247                         set_C0();
1248         } else
1249                 set_C3();
1250 }
1251
1252 static void
1253 fcom(const temp_real * src1, const temp_real * src2)
1254 {
1255         temp_real a;
1256
1257         a = *src1;
1258         a.exponent ^= 0x8000;
1259         fadd(&a,src2,&a);
1260         ftst(&a);
1261 }
1262
1263 static void
1264 fucom(const temp_real * src1, const temp_real * src2)
1265 {
1266         fcom(src1,src2);
1267 }
1268
1269 /*
1270  * linux/kernel/math/convert.c
1271  *
1272  * (C) 1991 Linus Torvalds
1273  */
1274
1275
1276 /*
1277  * NOTE!!! There is some "non-obvious" optimisations in the temp_to_long
1278  * and temp_to_short conversion routines: don't touch them if you don't
1279  * know what's going on. They are the adding of one in the rounding: the
1280  * overflow bit is also used for adding one into the exponent. Thus it
1281  * looks like the overflow would be incorrectly handled, but due to the
1282  * way the IEEE numbers work, things are correct.
1283  *
1284  * There is no checking for total overflow in the conversions, though (ie
1285  * if the temp-real number simply won't fit in a short- or long-real.)
1286  */
1287
1288 static void
1289 short_to_temp(const short_real * a, temp_real * b)
1290 {
1291         if (!(*a & 0x7fffffff)) {
1292                 b->a = b->b = 0;
1293                 if (*a)
1294                         b->exponent = 0x8000;
1295                 else
1296                         b->exponent = 0;
1297                 return;
1298         }
1299         b->exponent = ((*a>>23) & 0xff)-127+16383;
1300         if (*a<0)
1301                 b->exponent |= 0x8000;
1302         b->b = (*a<<8) | 0x80000000UL;
1303         b->a = 0;
1304 }
1305
1306 static void
1307 long_to_temp(const long_real * a, temp_real * b)
1308 {
1309         if (!a->a && !(a->b & 0x7fffffff)) {
1310                 b->a = b->b = 0;
1311                 if (a->b)
1312                         b->exponent = 0x8000;
1313                 else
1314                         b->exponent = 0;
1315                 return;
1316         }
1317         b->exponent = ((a->b >> 20) & 0x7ff)-1023+16383;
1318         if (a->b<0)
1319                 b->exponent |= 0x8000;
1320         b->b = 0x80000000UL | (a->b<<11) | (((u_int32_t)a->a)>>21);
1321         b->a = a->a<<11;
1322 }
1323
1324 static void 
1325 temp_to_short(const temp_real * a, short_real * b)
1326 {
1327         if (!(a->exponent & 0x7fff)) {
1328                 *b = (a->exponent)?0x80000000UL:0;
1329                 return;
1330         }
1331         *b = ((((int32_t) a->exponent)-16383+127) << 23) & 0x7f800000;
1332         if (a->exponent < 0)
1333                 *b |= 0x80000000UL;
1334         *b |= (a->b >> 8) & 0x007fffff;
1335         switch ((int)ROUNDING) {
1336                 case ROUND_NEAREST:
1337                         if ((a->b & 0xff) > 0x80)
1338                                 ++*b;
1339                         break;
1340                 case ROUND_DOWN:
1341                         if ((a->exponent & 0x8000) && (a->b & 0xff))
1342                                 ++*b;
1343                         break;
1344                 case ROUND_UP:
1345                         if (!(a->exponent & 0x8000) && (a->b & 0xff))
1346                                 ++*b;
1347                         break;
1348         }
1349 }
1350
1351 static void
1352 temp_to_long(const temp_real * a, long_real * b)
1353 {
1354         if (!(a->exponent & 0x7fff)) {
1355                 b->a = 0;
1356                 b->b = (a->exponent)?0x80000000UL:0;
1357                 return;
1358         }
1359         b->b = (((0x7fff & (int32_t) a->exponent)-16383+1023) << 20) &
1360             0x7ff00000;
1361         if (a->exponent < 0)
1362                 b->b |= 0x80000000UL;
1363         b->b |= (a->b >> 11) & 0x000fffff;
1364         b->a = a->b << 21;
1365         b->a |= (a->a >> 11) & 0x001fffff;
1366         switch ((int)ROUNDING) {
1367                 case ROUND_NEAREST:
1368                         if ((a->a & 0x7ff) > 0x400)
1369                                 __asm__("addl $1,%0 ; adcl $0,%1"
1370                                         :"=r" (b->a),"=r" (b->b)
1371                                         :"0" (b->a),"1" (b->b));
1372                         break;
1373                 case ROUND_DOWN:
1374                         if ((a->exponent & 0x8000) && (a->b & 0xff))
1375                                 __asm__("addl $1,%0 ; adcl $0,%1"
1376                                         :"=r" (b->a),"=r" (b->b)
1377                                         :"0" (b->a),"1" (b->b));
1378                         break;
1379                 case ROUND_UP:
1380                         if (!(a->exponent & 0x8000) && (a->b & 0xff))
1381                                 __asm__("addl $1,%0 ; adcl $0,%1"
1382                                         :"=r" (b->a),"=r" (b->b)
1383                                         :"0" (b->a),"1" (b->b));
1384                         break;
1385         }
1386 }
1387
1388 static void 
1389 frndint(const temp_real * a, temp_real * b)
1390 {
1391         int shift =  16383 + 63 - (a->exponent & 0x7fff);
1392         u_int32_t underflow;
1393
1394         if ((shift < 0) || (shift == 16383+63)) {
1395                 *b = *a;
1396                 return;
1397         }
1398         b->a = b->b = underflow = 0;
1399         b->exponent = a->exponent;
1400         if (shift < 32) {
1401                 b->b = a->b; b->a = a->a;
1402         } else if (shift < 64) {
1403                 b->a = a->b; underflow = a->a;
1404                 shift -= 32;
1405                 b->exponent += 32;
1406         } else if (shift < 96) {
1407                 underflow = a->b;
1408                 shift -= 64;
1409                 b->exponent += 64;
1410         } else {
1411                 underflow = 1;
1412                 shift = 0;
1413         }
1414         b->exponent += shift;
1415         __asm__("shrdl %2,%1,%0"
1416                 :"=r" (underflow),"=r" (b->a)
1417                 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1418         __asm__("shrdl %2,%1,%0"
1419                 :"=r" (b->a),"=r" (b->b)
1420                 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1421         __asm__("shrl %1,%0"
1422                 :"=r" (b->b)
1423                 :"c" ((char) shift),"0" (b->b));
1424         switch ((int)ROUNDING) {
1425                 case ROUND_NEAREST:
1426                         __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1427                                 :"=r" (b->a),"=r" (b->b)
1428                                 :"0" (b->a),"1" (b->b)
1429                                 ,"r" (0x7fffffff + (b->a & 1))
1430                                 ,"m" (*&underflow));
1431                         break;
1432                 case ROUND_UP:
1433                         if ((b->exponent >= 0) && underflow)
1434                                 __asm__("addl $1,%0 ; adcl $0,%1"
1435                                         :"=r" (b->a),"=r" (b->b)
1436                                         :"0" (b->a),"1" (b->b));
1437                         break;
1438                 case ROUND_DOWN:
1439                         if ((b->exponent < 0) && underflow)
1440                                 __asm__("addl $1,%0 ; adcl $0,%1"
1441                                         :"=r" (b->a),"=r" (b->b)
1442                                         :"0" (b->a),"1" (b->b));
1443                         break;
1444         }
1445         if (b->a || b->b)
1446                 while (b->b >= 0) {
1447                         b->exponent--;
1448                         __asm__("addl %0,%0 ; adcl %1,%1"
1449                                 :"=r" (b->a),"=r" (b->b)
1450                                 :"0" (b->a),"1" (b->b));
1451                 }
1452         else
1453                 b->exponent = 0;
1454 }
1455
1456 static void
1457 Fscale(const temp_real *a, const temp_real *b, temp_real *c)
1458 {
1459         temp_int ti;
1460
1461         *c = *a;
1462         if(!c->a && !c->b) {                            /* 19 Sep 92*/
1463                 c->exponent = 0;
1464                 return;
1465         }
1466         real_to_int(b, &ti);
1467         if(ti.sign)
1468                 c->exponent -= ti.a;
1469         else
1470                 c->exponent += ti.a;
1471 }
1472
1473 static void
1474 real_to_int(const temp_real * a, temp_int * b)
1475 {
1476         int shift =  16383 + 63 - (a->exponent & 0x7fff);
1477         u_int32_t underflow;
1478
1479         b->a = b->b = underflow = 0;
1480         b->sign = (a->exponent < 0);
1481         if (shift < 0) {
1482                 set_OE();
1483                 return;
1484         }
1485         if (shift < 32) {
1486                 b->b = a->b; b->a = a->a;
1487         } else if (shift < 64) {
1488                 b->a = a->b; underflow = a->a;
1489                 shift -= 32;
1490         } else if (shift < 96) {
1491                 underflow = a->b;
1492                 shift -= 64;
1493         } else {
1494                 underflow = 1;
1495                 shift = 0;
1496         }
1497         __asm__("shrdl %2,%1,%0"
1498                 :"=r" (underflow),"=r" (b->a)
1499                 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1500         __asm__("shrdl %2,%1,%0"
1501                 :"=r" (b->a),"=r" (b->b)
1502                 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1503         __asm__("shrl %1,%0"
1504                 :"=r" (b->b)
1505                 :"c" ((char) shift),"0" (b->b));
1506         switch ((int)ROUNDING) {
1507                 case ROUND_NEAREST:
1508                         __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1509                                 :"=r" (b->a),"=r" (b->b)
1510                                 :"0" (b->a),"1" (b->b)
1511                                 ,"r" (0x7fffffff + (b->a & 1))
1512                                 ,"m" (*&underflow));
1513                         break;
1514                 case ROUND_UP:
1515                         if (!b->sign && underflow)
1516                                 __asm__("addl $1,%0 ; adcl $0,%1"
1517                                         :"=r" (b->a),"=r" (b->b)
1518                                         :"0" (b->a),"1" (b->b));
1519                         break;
1520                 case ROUND_DOWN:
1521                         if (b->sign && underflow)
1522                                 __asm__("addl $1,%0 ; adcl $0,%1"
1523                                         :"=r" (b->a),"=r" (b->b)
1524                                         :"0" (b->a),"1" (b->b));
1525                         break;
1526         }
1527 }
1528
1529 static void
1530 int_to_real(const temp_int * a, temp_real * b)
1531 {
1532         b->a = a->a;
1533         b->b = a->b;
1534         if (b->a || b->b)
1535                 b->exponent = 16383 + 63 + (a->sign? 0x8000:0);
1536         else {
1537                 b->exponent = 0;
1538                 return;
1539         }
1540         while (b->b >= 0) {
1541                 b->exponent--;
1542                 __asm__("addl %0,%0 ; adcl %1,%1"
1543                         :"=r" (b->a),"=r" (b->b)
1544                         :"0" (b->a),"1" (b->b));
1545         }
1546 }
1547
1548 static int
1549 fpu_modevent(module_t mod, int type, void *unused)
1550 {
1551         switch (type) {
1552         case MOD_LOAD:
1553                 if (pmath_emulate) {
1554                         printf("Another Math emulator already present\n");
1555                         return EBUSY;
1556                 }
1557                 pmath_emulate = math_emulate;
1558                 if (bootverbose)
1559                         printf("Math emulator present\n");
1560                 break;
1561         case MOD_UNLOAD:
1562                 if (pmath_emulate != math_emulate) {
1563                         printf("Cannot unload another math emulator\n");
1564                         return EACCES;
1565                 }
1566                 pmath_emulate = 0;
1567                 if (bootverbose)
1568                         printf("Math emulator unloaded\n");
1569                 break;
1570         default:
1571                 break;
1572         }
1573         return 0;
1574 }
1575 static moduledata_t fpumod = {
1576         "fpu",
1577         fpu_modevent,
1578         0
1579 };
1580 DECLARE_MODULE(fpu, fpumod, SI_SUB_DRIVERS, SI_ORDER_ANY);