Fix typos.
[dragonfly.git] / sys / i386 / i386 / math_emulate.c
1 /*
2  * linux/kernel/math/math_emulate.c
3  *
4  * (C) 1991 Linus Torvalds
5  *
6  * [expediant "port" of linux 8087 emulator to 386BSD, with apologies -wfj]
7  *
8  *      from: 386BSD 0.1
9  * $FreeBSD: src/sys/i386/i386/math_emulate.c,v 1.35 1999/08/28 00:43:47 peter Exp $
10  * $DragonFly: src/sys/i386/i386/Attic/math_emulate.c,v 1.5 2006/09/13 18:45:12 swildner Exp $
11  */
12
13 /*
14  * Limited emulation 27.12.91 - mostly loads/stores, which gcc wants
15  * even for soft-float, unless you use bruce evans' patches. The patches
16  * are great, but they have to be re-applied for every version, and the
17  * library is different for soft-float and 80387. So emulation is more
18  * practical, even though it's slower.
19  *
20  * 28.12.91 - loads/stores work, even BCD. I'll have to start thinking
21  * about add/sub/mul/div. Urgel. I should find some good source, but I'll
22  * just fake up something.
23  *
24  * 30.12.91 - add/sub/mul/div/com seem to work mostly. I should really
25  * test every possible combination.
26  */
27
28 /*
29  * This file is full of ugly macros etc: one problem was that gcc simply
30  * didn't want to make the structures as they should be: it has to try to
31  * align them. Sickening code, but at least I've hidden the ugly things
32  * in this one file: the other files don't need to know about these things.
33  *
34  * The other files also don't care about ST(x) etc - they just get addresses
35  * to 80-bit temporary reals, and do with them as they please. I wanted to
36  * hide most of the 387-specific things here.
37  */
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41
42 #include <machine/frame.h>
43 #include <machine/reg.h>
44
45 #include <sys/proc.h>
46 #include <sys/kernel.h>
47
48 #include <vm/vm.h>
49 #include <sys/lock.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <sys/user.h>
53
54 #define __ALIGNED_TEMP_REAL 1
55 #include <i386/i386/math_emu.h>
56
57 #define bswapw(x) __asm__("xchgb %%al,%%ah":"=a" (x):"0" ((short)x))
58 #define ST(x) (*__st((x)))
59 #define PST(x) ((const temp_real *) __st((x)))
60 #define math_abort(tfp, signo) tfp->tf_eip = oldeip; return (signo);
61
62 /*
63  * We don't want these inlined - it gets too messy in the machine-code.
64  */
65 static void fpop(void);
66 static void fpush(void);
67 static void fxchg(temp_real_unaligned *a, temp_real_unaligned *b);
68 static temp_real_unaligned *__st(int i);
69
70 static unsigned char
71 get_fs_byte(char *adr) 
72         { return(fubyte(adr)); }
73
74 static unsigned short
75 get_fs_word(unsigned short *adr)
76         { return(fuword(adr)); }
77
78 static u_int32_t
79 get_fs_long(u_int32_t *adr)
80         { return(fuword(adr)); }
81
82 static void 
83 put_fs_byte(unsigned char val, char *adr)
84         { (void)subyte(adr,val); }
85
86 static void 
87 put_fs_word(unsigned short val, short *adr)
88         { (void)susword(adr,val); }
89
90 static void 
91 put_fs_long(u_long val, u_int32_t *adr)
92         { (void)suword(adr,val); }
93
94 static int
95 math_emulate(struct trapframe *info)
96 {
97         unsigned short code;
98         temp_real tmp;
99         char *address;
100         u_int32_t oldeip;
101
102         /* ever used fp? */
103         if ((curthread->td_pcb->pcb_flags & FP_SOFTFP) == 0) {
104                 curthread->td_pcb->pcb_flags |= FP_SOFTFP;
105                 I387.cwd = 0x037f;
106                 I387.swd = 0x0000;
107                 I387.twd = 0x0000;
108         }
109
110         if (I387.cwd & I387.swd & 0x3f)
111                 I387.swd |= 0x8000;
112         else
113                 I387.swd &= 0x7fff;
114         oldeip = info->tf_eip;
115 /* 0x001f means user code space */
116         if ((u_short)info->tf_cs != 0x001F) {
117                 printf("math_emulate: %04x:%08lx\n", (u_short)info->tf_cs,
118                         (u_long)oldeip);
119                 panic("?Math emulation needed in kernel?");
120         }
121         /* completely ignore an operand-size prefix */
122         if (get_fs_byte((char *) info->tf_eip) == 0x66)
123                 info->tf_eip++;
124         code = get_fs_word((unsigned short *) info->tf_eip);
125         bswapw(code);
126         code &= 0x7ff;
127         I387.fip = oldeip;
128         *(unsigned short *) &I387.fcs = (u_short) info->tf_cs;
129         *(1+(unsigned short *) &I387.fcs) = code;
130         info->tf_eip += 2;
131         switch (code) {
132                 case 0x1d0: /* fnop */
133                         return(0);
134                 case 0x1d1: case 0x1d2: case 0x1d3:  /* fst to 32-bit mem */
135                 case 0x1d4: case 0x1d5: case 0x1d6: case 0x1d7:
136                         math_abort(info,SIGILL);
137                 case 0x1e0: /* fchs */
138                         ST(0).exponent ^= 0x8000;
139                         return(0);
140                 case 0x1e1: /* fabs */
141                         ST(0).exponent &= 0x7fff;
142                         return(0);
143                 case 0x1e2: case 0x1e3:
144                         math_abort(info,SIGILL);
145                 case 0x1e4: /* ftst */
146                         ftst(PST(0));
147                         return(0);
148                 case 0x1e5: /* fxam */
149                         printf("fxam not implemented\n");
150                         math_abort(info,SIGILL);
151                 case 0x1e6: case 0x1e7: /* fldenv */
152                         math_abort(info,SIGILL);
153                 case 0x1e8: /* fld1 */
154                         fpush();
155                         ST(0) = CONST1;
156                         return(0);
157                 case 0x1e9: /* fld2t */
158                         fpush();
159                         ST(0) = CONSTL2T;
160                         return(0);
161                 case 0x1ea: /* fld2e */
162                         fpush();
163                         ST(0) = CONSTL2E;
164                         return(0);
165                 case 0x1eb: /* fldpi */
166                         fpush();
167                         ST(0) = CONSTPI;
168                         return(0);
169                 case 0x1ec: /* fldlg2 */
170                         fpush();
171                         ST(0) = CONSTLG2;
172                         return(0);
173                 case 0x1ed: /* fldln2 */
174                         fpush();
175                         ST(0) = CONSTLN2;
176                         return(0);
177                 case 0x1ee: /* fldz */
178                         fpush();
179                         ST(0) = CONSTZ;
180                         return(0);
181                 case 0x1ef:
182                         math_abort(info,SIGILL);
183                 case 0x1f0: /* f2xm1 */
184                 case 0x1f1: /* fyl2x */
185                 case 0x1f2: /* fptan */
186                 case 0x1f3: /* fpatan */
187                 case 0x1f4: /* fxtract */
188                 case 0x1f5: /* fprem1 */
189                 case 0x1f6: /* fdecstp */
190                 case 0x1f7: /* fincstp */
191                 case 0x1f8: /* fprem */
192                 case 0x1f9: /* fyl2xp1 */
193                 case 0x1fa: /* fsqrt */
194                 case 0x1fb: /* fsincos */
195                 case 0x1fe: /* fsin */
196                 case 0x1ff: /* fcos */
197                         uprintf(
198                          "math_emulate: instruction %04x not implemented\n",
199                           code + 0xd800);
200                         math_abort(info,SIGILL);
201                 case 0x1fc: /* frndint */
202                         frndint(PST(0),&tmp);
203                         real_to_real(&tmp,&ST(0));
204                         return(0);
205                 case 0x1fd: /* fscale */
206                         /* incomplete and totally inadequate -wfj */
207                         Fscale(PST(0), PST(1), &tmp);
208                         real_to_real(&tmp,&ST(0));
209                         return(0);                      /* 19 Sep 92*/
210                 case 0x2e9: /* ????? */
211 /* if this should be a fucomp ST(0),ST(1) , it must be a 0x3e9  ATS */
212                         fucom(PST(1),PST(0));
213                         fpop(); fpop();
214                         return(0);
215                 case 0x3d0: case 0x3d1: /* fist ?? */
216                         return(0);
217                 case 0x3e2: /* fclex */
218                         I387.swd &= 0x7f00;
219                         return(0);
220                 case 0x3e3: /* fninit */
221                         I387.cwd = 0x037f;
222                         I387.swd = 0x0000;
223                         I387.twd = 0x0000;
224                         return(0);
225                 case 0x3e4:
226                         return(0);
227                 case 0x6d9: /* fcompp */
228                         fcom(PST(1),PST(0));
229                         fpop(); fpop();
230                         return(0);
231                 case 0x7e0: /* fstsw ax */
232                         *(short *) &info->tf_eax = I387.swd;
233                         return(0);
234         }
235         switch (code >> 3) {
236                 case 0x18: /* fadd */
237                         fadd(PST(0),PST(code & 7),&tmp);
238                         real_to_real(&tmp,&ST(0));
239                         return(0);
240                 case 0x19: /* fmul */
241                         fmul(PST(0),PST(code & 7),&tmp);
242                         real_to_real(&tmp,&ST(0));
243                         return(0);
244                 case 0x1a: /* fcom */
245                         fcom(PST(code & 7),PST(0));
246                         return(0);
247                 case 0x1b: /* fcomp */
248                         fcom(PST(code & 7),PST(0));
249                         fpop();
250                         return(0);
251                 case 0x1c: /* fsubr */
252                         real_to_real(&ST(code & 7),&tmp);
253                         tmp.exponent ^= 0x8000;
254                         fadd(PST(0),&tmp,&tmp);
255                         real_to_real(&tmp,&ST(0));
256                         return(0);
257                 case 0x1d: /* fsub */
258                         ST(0).exponent ^= 0x8000;
259                         fadd(PST(0),PST(code & 7),&tmp);
260                         real_to_real(&tmp,&ST(0));
261                         return(0);
262                 case 0x1e: /* fdivr */
263                         fdiv(PST(0),PST(code & 7),&tmp);
264                         real_to_real(&tmp,&ST(0));
265                         return(0);
266                 case 0x1f: /* fdiv */
267                         fdiv(PST(code & 7),PST(0),&tmp);
268                         real_to_real(&tmp,&ST(0));
269                         return(0);
270                 case 0x38: /* fld */
271                         fpush();
272                         ST(0) = ST((code & 7)+1);  /* why plus 1 ????? ATS */
273                         return(0);
274                 case 0x39: /* fxch */
275                         fxchg(&ST(0),&ST(code & 7));
276                         return(0);
277                 case 0x3b: /*  ??? ??? wrong ???? ATS */
278                         ST(code & 7) = ST(0);
279                         fpop();
280                         return(0);
281                 case 0x98: /* fadd */
282                         fadd(PST(0),PST(code & 7),&tmp);
283                         real_to_real(&tmp,&ST(code & 7));
284                         return(0);
285                 case 0x99: /* fmul */
286                         fmul(PST(0),PST(code & 7),&tmp);
287                         real_to_real(&tmp,&ST(code & 7));
288                         return(0);
289                 case 0x9a: /* ???? , my manual don't list a direction bit
290 for fcom , ??? ATS */
291                         fcom(PST(code & 7),PST(0));
292                         return(0);
293                 case 0x9b: /* same as above , ATS */
294                         fcom(PST(code & 7),PST(0));
295                         fpop();
296                         return(0);
297                 case 0x9c: /* fsubr */
298                         ST(code & 7).exponent ^= 0x8000;
299                         fadd(PST(0),PST(code & 7),&tmp);
300                         real_to_real(&tmp,&ST(code & 7));
301                         return(0);
302                 case 0x9d: /* fsub */
303                         real_to_real(&ST(0),&tmp);
304                         tmp.exponent ^= 0x8000;
305                         fadd(PST(code & 7),&tmp,&tmp);
306                         real_to_real(&tmp,&ST(code & 7));
307                         return(0);
308                 case 0x9e: /* fdivr */
309                         fdiv(PST(0),PST(code & 7),&tmp);
310                         real_to_real(&tmp,&ST(code & 7));
311                         return(0);
312                 case 0x9f: /* fdiv */
313                         fdiv(PST(code & 7),PST(0),&tmp);
314                         real_to_real(&tmp,&ST(code & 7));
315                         return(0);
316                 case 0xb8: /* ffree */
317                         printf("ffree not implemented\n");
318                         math_abort(info,SIGILL);
319                 case 0xb9: /* fstp ???? where is the pop ? ATS */
320                         fxchg(&ST(0),&ST(code & 7));
321                         return(0);
322                 case 0xba: /* fst */
323                         ST(code & 7) = ST(0);
324                         return(0);
325                 case 0xbb: /* ????? encoding of fstp to mem ? ATS */
326                         ST(code & 7) = ST(0);
327                         fpop();
328                         return(0);
329                 case 0xbc: /* fucom */
330                         fucom(PST(code & 7),PST(0));
331                         return(0);
332                 case 0xbd: /* fucomp */
333                         fucom(PST(code & 7),PST(0));
334                         fpop();
335                         return(0);
336                 case 0xd8: /* faddp */
337                         fadd(PST(code & 7),PST(0),&tmp);
338                         real_to_real(&tmp,&ST(code & 7));
339                         fpop();
340                         return(0);
341                 case 0xd9: /* fmulp */
342                         fmul(PST(code & 7),PST(0),&tmp);
343                         real_to_real(&tmp,&ST(code & 7));
344                         fpop();
345                         return(0);
346                 case 0xda: /* ??? encoding of ficom with 16 bit mem ? ATS */
347                         fcom(PST(code & 7),PST(0));
348                         fpop();
349                         return(0);
350                 case 0xdc: /* fsubrp */
351                         ST(code & 7).exponent ^= 0x8000;
352                         fadd(PST(0),PST(code & 7),&tmp);
353                         real_to_real(&tmp,&ST(code & 7));
354                         fpop();
355                         return(0);
356                 case 0xdd: /* fsubp */
357                         real_to_real(&ST(0),&tmp);
358                         tmp.exponent ^= 0x8000;
359                         fadd(PST(code & 7),&tmp,&tmp);
360                         real_to_real(&tmp,&ST(code & 7));
361                         fpop();
362                         return(0);
363                 case 0xde: /* fdivrp */
364                         fdiv(PST(0),PST(code & 7),&tmp);
365                         real_to_real(&tmp,&ST(code & 7));
366                         fpop();
367                         return(0);
368                 case 0xdf: /* fdivp */
369                         fdiv(PST(code & 7),PST(0),&tmp);
370                         real_to_real(&tmp,&ST(code & 7));
371                         fpop();
372                         return(0);
373                 case 0xf8: /* fild 16-bit mem ???? ATS */
374                         printf("ffree not implemented\n");
375                         math_abort(info,SIGILL);
376                         fpop();
377                         return(0);
378                 case 0xf9: /*  ????? ATS */
379                         fxchg(&ST(0),&ST(code & 7));
380                         return(0);
381                 case 0xfa: /* fist 16-bit mem ? ATS */
382                 case 0xfb: /* fistp 16-bit mem ? ATS */
383                         ST(code & 7) = ST(0);
384                         fpop();
385                         return(0);
386         }
387         switch ((code>>3) & 0xe7) {
388                 case 0x22:
389                         put_short_real(PST(0),info,code);
390                         return(0);
391                 case 0x23:
392                         put_short_real(PST(0),info,code);
393                         fpop();
394                         return(0);
395                 case 0x24:
396                         address = ea(info,code);
397                         for (code = 0 ; code < 7 ; code++) {
398                                 ((int32_t *) & I387)[code] =
399                                    get_fs_long((u_int32_t *) address);
400                                 address += 4;
401                         }
402                         return(0);
403                 case 0x25:
404                         address = ea(info,code);
405                         *(unsigned short *) &I387.cwd =
406                                 get_fs_word((unsigned short *) address);
407                         return(0);
408                 case 0x26:
409                         address = ea(info,code);
410                         /*verify_area(address,28);*/
411                         for (code = 0 ; code < 7 ; code++) {
412                                 put_fs_long( ((int32_t *) & I387)[code],
413                                         (u_int32_t *) address);
414                                 address += 4;
415                         }
416                         return(0);
417                 case 0x27:
418                         address = ea(info,code);
419                         /*verify_area(address,2);*/
420                         put_fs_word(I387.cwd,(short *) address);
421                         return(0);
422                 case 0x62:
423                         put_long_int(PST(0),info,code);
424                         return(0);
425                 case 0x63:
426                         put_long_int(PST(0),info,code);
427                         fpop();
428                         return(0);
429                 case 0x65:
430                         fpush();
431                         get_temp_real(&tmp,info,code);
432                         real_to_real(&tmp,&ST(0));
433                         return(0);
434                 case 0x67:
435                         put_temp_real(PST(0),info,code);
436                         fpop();
437                         return(0);
438                 case 0xa2:
439                         put_long_real(PST(0),info,code);
440                         return(0);
441                 case 0xa3:
442                         put_long_real(PST(0),info,code);
443                         fpop();
444                         return(0);
445                 case 0xa4:
446                         address = ea(info,code);
447                         for (code = 0 ; code < 27 ; code++) {
448                                 ((int32_t *) & I387)[code] =
449                                    get_fs_long((u_int32_t *) address);
450                                 address += 4;
451                         }
452                         return(0);
453                 case 0xa6:
454                         address = ea(info,code);
455                         /*verify_area(address,108);*/
456                         for (code = 0 ; code < 27 ; code++) {
457                                 put_fs_long( ((int32_t *) & I387)[code],
458                                         (u_int32_t *) address);
459                                 address += 4;
460                         }
461                         I387.cwd = 0x037f;
462                         I387.swd = 0x0000;
463                         I387.twd = 0x0000;
464                         return(0);
465                 case 0xa7:
466                         address = ea(info,code);
467                         /*verify_area(address,2);*/
468                         put_fs_word(I387.swd,(short *) address);
469                         return(0);
470                 case 0xe2:
471                         put_short_int(PST(0),info,code);
472                         return(0);
473                 case 0xe3:
474                         put_short_int(PST(0),info,code);
475                         fpop();
476                         return(0);
477                 case 0xe4:
478                         fpush();
479                         get_BCD(&tmp,info,code);
480                         real_to_real(&tmp,&ST(0));
481                         return(0);
482                 case 0xe5:
483                         fpush();
484                         get_longlong_int(&tmp,info,code);
485                         real_to_real(&tmp,&ST(0));
486                         return(0);
487                 case 0xe6:
488                         put_BCD(PST(0),info,code);
489                         fpop();
490                         return(0);
491                 case 0xe7:
492                         put_longlong_int(PST(0),info,code);
493                         fpop();
494                         return(0);
495         }
496         switch (code >> 9) {
497                 case 0:
498                         get_short_real(&tmp,info,code);
499                         break;
500                 case 1:
501                         get_long_int(&tmp,info,code);
502                         break;
503                 case 2:
504                         get_long_real(&tmp,info,code);
505                         break;
506                 case 4:
507                         get_short_int(&tmp,info,code);
508         }
509         switch ((code>>3) & 0x27) {
510                 case 0:
511                         fadd(&tmp,PST(0),&tmp);
512                         real_to_real(&tmp,&ST(0));
513                         return(0);
514                 case 1:
515                         fmul(&tmp,PST(0),&tmp);
516                         real_to_real(&tmp,&ST(0));
517                         return(0);
518                 case 2:
519                         fcom(&tmp,PST(0));
520                         return(0);
521                 case 3:
522                         fcom(&tmp,PST(0));
523                         fpop();
524                         return(0);
525                 case 4:
526                         tmp.exponent ^= 0x8000;
527                         fadd(&tmp,PST(0),&tmp);
528                         real_to_real(&tmp,&ST(0));
529                         return(0);
530                 case 5:
531                         ST(0).exponent ^= 0x8000;
532                         fadd(&tmp,PST(0),&tmp);
533                         real_to_real(&tmp,&ST(0));
534                         return(0);
535                 case 6:
536                         fdiv(PST(0),&tmp,&tmp);
537                         real_to_real(&tmp,&ST(0));
538                         return(0);
539                 case 7:
540                         fdiv(&tmp,PST(0),&tmp);
541                         real_to_real(&tmp,&ST(0));
542                         return(0);
543         }
544         if ((code & 0x138) == 0x100) {
545                         fpush();
546                         real_to_real(&tmp,&ST(0));
547                         return(0);
548         }
549         printf("Unknown math-insns: %04x:%08x %04x\n",(u_short)info->tf_cs,
550                 info->tf_eip,code);
551         math_abort(info,SIGFPE);
552 }
553
554 static void
555 fpop(void)
556 {
557         u_int32_t tmp;
558
559         tmp = I387.swd & 0xffffc7ffUL;
560         I387.swd += 0x00000800;
561         I387.swd &= 0x00003800;
562         I387.swd |= tmp;
563 }
564
565 static void
566 fpush(void)
567 {
568         u_int32_t tmp;
569
570         tmp = I387.swd & 0xffffc7ffUL;
571         I387.swd += 0x00003800;
572         I387.swd &= 0x00003800;
573         I387.swd |= tmp;
574 }
575
576 static void 
577 fxchg(temp_real_unaligned *a, temp_real_unaligned *b)
578 {
579         temp_real_unaligned c;
580
581         c = *a;
582         *a = *b;
583         *b = c;
584 }
585
586 static temp_real_unaligned *
587 __st(int i)
588 {
589         i += I387.swd >> 11;
590         i &= 7;
591         return (temp_real_unaligned *) (i*10 + (char *)(I387.st_space));
592 }
593
594 /*
595  * linux/kernel/math/ea.c
596  *
597  * (C) 1991 Linus Torvalds
598  */
599
600 /*
601  * Calculate the effective address.
602  */
603
604
605 static int __regoffset[] = {
606         tEAX, tECX, tEDX, tEBX, tESP, tEBP, tESI, tEDI
607 };
608
609 #define REG(x) (((int *)curproc->p_md.md_regs)[__regoffset[(x)]])
610
611 static char *
612 sib(struct trapframe *info, int mod)
613 {
614         unsigned char ss,index,base;
615         int32_t offset = 0;
616
617         base = get_fs_byte((char *) info->tf_eip);
618         info->tf_eip++;
619         ss = base >> 6;
620         index = (base >> 3) & 7;
621         base &= 7;
622         if (index == 4)
623                 offset = 0;
624         else
625                 offset = REG(index);
626         offset <<= ss;
627         if (mod || base != 5)
628                 offset += REG(base);
629         if (mod == 1) {
630                 offset += (signed char) get_fs_byte((char *) info->tf_eip);
631                 info->tf_eip++;
632         } else if (mod == 2 || base == 5) {
633                 offset += (signed) get_fs_long((u_int32_t *) info->tf_eip);
634                 info->tf_eip += 4;
635         }
636         I387.foo = offset;
637         I387.fos = 0x17;
638         return (char *) offset;
639 }
640
641 static char *
642 ea(struct trapframe *info, unsigned short code)
643 {
644         unsigned char mod,rm;
645         int32_t *tmp;
646         int offset = 0;
647
648         mod = (code >> 6) & 3;
649         rm = code & 7;
650         if (rm == 4 && mod != 3)
651                 return sib(info,mod);
652         if (rm == 5 && !mod) {
653                 offset = get_fs_long((u_int32_t *) info->tf_eip);
654                 info->tf_eip += 4;
655                 I387.foo = offset;
656                 I387.fos = 0x17;
657                 return (char *) offset;
658         }
659         tmp = (int32_t *) &REG(rm);
660         switch (mod) {
661                 case 0: offset = 0; break;
662                 case 1:
663                         offset = (signed char) get_fs_byte((char *) info->tf_eip);
664                         info->tf_eip++;
665                         break;
666                 case 2:
667                         offset = (signed) get_fs_long((u_int32_t *) info->tf_eip);
668                         info->tf_eip += 4;
669                         break;
670 #ifdef notyet
671                 case 3:
672                         math_abort(info,1<<(SIGILL-1));
673 #endif
674         }
675         I387.foo = offset;
676         I387.fos = 0x17;
677         return offset + (char *) *tmp;
678 }
679 /*
680  * linux/kernel/math/get_put.c
681  *
682  * (C) 1991 Linus Torvalds
683  */
684
685 /*
686  * This file handles all accesses to user memory: getting and putting
687  * ints/reals/BCD etc. This is the only part that concerns itself with
688  * other than temporary real format. All other cals are strictly temp_real.
689  */
690
691 static void 
692 get_short_real(temp_real *tmp, struct trapframe *info, unsigned short code)
693 {
694         char *addr;
695         short_real sr;
696
697         addr = ea(info,code);
698         sr = get_fs_long((u_int32_t *) addr);
699         short_to_temp(&sr,tmp);
700 }
701
702 static void
703 get_long_real(temp_real *tmp, struct trapframe *info, unsigned short code)
704 {
705         char *addr;
706         long_real lr;
707
708         addr = ea(info,code);
709         lr.a = get_fs_long((u_int32_t *) addr);
710         lr.b = get_fs_long(1 + (u_int32_t *) addr);
711         long_to_temp(&lr,tmp);
712 }
713
714 static void
715 get_temp_real(temp_real *tmp, struct trapframe *info, unsigned short code)
716 {
717         char *addr;
718
719         addr = ea(info,code);
720         tmp->a = get_fs_long((u_int32_t *) addr);
721         tmp->b = get_fs_long(1 + (u_int32_t *) addr);
722         tmp->exponent = get_fs_word(4 + (unsigned short *) addr);
723 }
724
725 static void
726 get_short_int(temp_real *tmp, struct trapframe *info, unsigned short code)
727 {
728         char *addr;
729         temp_int ti;
730
731         addr = ea(info,code);
732         ti.a = (signed short) get_fs_word((unsigned short *) addr);
733         ti.b = 0;
734         if ((ti.sign = (ti.a < 0)) != 0)
735                 ti.a = - ti.a;
736         int_to_real(&ti,tmp);
737 }
738
739 static void
740 get_long_int(temp_real *tmp, struct trapframe *info, unsigned short code)
741 {
742         char *addr;
743         temp_int ti;
744
745         addr = ea(info,code);
746         ti.a = get_fs_long((u_int32_t *) addr);
747         ti.b = 0;
748         if ((ti.sign = (ti.a < 0)) != 0)
749                 ti.a = - ti.a;
750         int_to_real(&ti,tmp);
751 }
752
753 static void 
754 get_longlong_int(temp_real *tmp, struct trapframe *info, unsigned short code)
755 {
756         char *addr;
757         temp_int ti;
758
759         addr = ea(info,code);
760         ti.a = get_fs_long((u_int32_t *) addr);
761         ti.b = get_fs_long(1 + (u_int32_t *) addr);
762         if ((ti.sign = (ti.b < 0)) != 0)
763                 __asm__("notl %0 ; notl %1\n\t"
764                         "addl $1,%0 ; adcl $0,%1"
765                         :"=r" (ti.a),"=r" (ti.b)
766                         :"0" (ti.a),"1" (ti.b));
767         int_to_real(&ti,tmp);
768 }
769
770 #define MUL10(low,high) \
771 __asm__("addl %0,%0 ; adcl %1,%1\n\t" \
772 "movl %0,%%ecx ; movl %1,%%ebx\n\t" \
773 "addl %0,%0 ; adcl %1,%1\n\t" \
774 "addl %0,%0 ; adcl %1,%1\n\t" \
775 "addl %%ecx,%0 ; adcl %%ebx,%1" \
776 :"=a" (low),"=d" (high) \
777 :"0" (low),"1" (high):"cx","bx")
778
779 #define ADD64(val,low,high) \
780 __asm__("addl %4,%0 ; adcl $0,%1":"=r" (low),"=r" (high) \
781 :"0" (low),"1" (high),"r" ((u_int32_t) (val)))
782
783 static void
784 get_BCD(temp_real *tmp, struct trapframe *info, unsigned short code)
785 {
786         int k;
787         char *addr;
788         temp_int i;
789         unsigned char c;
790
791         addr = ea(info,code);
792         addr += 9;
793         i.sign = 0x80 & get_fs_byte(addr--);
794         i.a = i.b = 0;
795         for (k = 0; k < 9; k++) {
796                 c = get_fs_byte(addr--);
797                 MUL10(i.a, i.b);
798                 ADD64((c>>4), i.a, i.b);
799                 MUL10(i.a, i.b);
800                 ADD64((c&0xf), i.a, i.b);
801         }
802         int_to_real(&i,tmp);
803 }
804
805 static void 
806 put_short_real(const temp_real *tmp,
807         struct trapframe *info, unsigned short code)
808 {
809         char *addr;
810         short_real sr;
811
812         addr = ea(info,code);
813         /*verify_area(addr,4);*/
814         temp_to_short(tmp,&sr);
815         put_fs_long(sr,(u_int32_t *) addr);
816 }
817
818 static void
819 put_long_real(const temp_real *tmp,
820         struct trapframe *info, unsigned short code)
821 {
822         char *addr;
823         long_real lr;
824
825         addr = ea(info,code);
826         /*verify_area(addr,8);*/
827         temp_to_long(tmp,&lr);
828         put_fs_long(lr.a, (u_int32_t *) addr);
829         put_fs_long(lr.b, 1 + (u_int32_t *) addr);
830 }
831
832 static void
833 put_temp_real(const temp_real *tmp,
834         struct trapframe *info, unsigned short code)
835 {
836         char *addr;
837
838         addr = ea(info,code);
839         /*verify_area(addr,10);*/
840         put_fs_long(tmp->a, (u_int32_t *) addr);
841         put_fs_long(tmp->b, 1 + (u_int32_t *) addr);
842         put_fs_word(tmp->exponent, 4 + (short *) addr);
843 }
844
845 static void
846 put_short_int(const temp_real *tmp,
847         struct trapframe *info, unsigned short code)
848 {
849         char *addr;
850         temp_int ti;
851
852         addr = ea(info,code);
853         real_to_int(tmp,&ti);
854         /*verify_area(addr,2);*/
855         if (ti.sign)
856                 ti.a = -ti.a;
857         put_fs_word(ti.a,(short *) addr);
858 }
859
860 static void
861 put_long_int(const temp_real *tmp,
862         struct trapframe *info, unsigned short code)
863 {
864         char *addr;
865         temp_int ti;
866
867         addr = ea(info,code);
868         real_to_int(tmp,&ti);
869         /*verify_area(addr,4);*/
870         if (ti.sign)
871                 ti.a = -ti.a;
872         put_fs_long(ti.a,(u_int32_t *) addr);
873 }
874
875 static void
876 put_longlong_int(const temp_real *tmp,
877         struct trapframe *info, unsigned short code)
878 {
879         char *addr;
880         temp_int ti;
881
882         addr = ea(info,code);
883         real_to_int(tmp,&ti);
884         /*verify_area(addr,8);*/
885         if (ti.sign)
886                 __asm__("notl %0 ; notl %1\n\t"
887                         "addl $1,%0 ; adcl $0,%1"
888                         :"=r" (ti.a),"=r" (ti.b)
889                         :"0" (ti.a),"1" (ti.b));
890         put_fs_long(ti.a,(u_int32_t *) addr);
891         put_fs_long(ti.b,1 + (u_int32_t *) addr);
892 }
893
894 #define DIV10(low,high,rem) \
895 __asm__("divl %6 ; xchgl %1,%2 ; divl %6" \
896         :"=d" (rem),"=a" (low),"=r" (high) \
897         :"0" (0),"1" (high),"2" (low),"c" (10))
898
899 static void
900 put_BCD(const temp_real *tmp,struct trapframe *info, unsigned short code)
901 {
902         int k,rem;
903         char *addr;
904         temp_int i;
905         unsigned char c;
906
907         addr = ea(info,code);
908         /*verify_area(addr,10);*/
909         real_to_int(tmp,&i);
910         if (i.sign)
911                 put_fs_byte(0x80, addr+9);
912         else
913                 put_fs_byte(0, addr+9);
914         for (k = 0; k < 9; k++) {
915                 DIV10(i.a,i.b,rem);
916                 c = rem;
917                 DIV10(i.a,i.b,rem);
918                 c += rem<<4;
919                 put_fs_byte(c,addr++);
920         }
921 }
922
923 /*
924  * linux/kernel/math/mul.c
925  *
926  * (C) 1991 Linus Torvalds
927  */
928
929 /*
930  * temporary real multiplication routine.
931  */
932
933
934 static void
935 shift(int *c)
936 {
937         __asm__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
938                 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
939                 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
940                 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
941                 ::"r" (c):"ax");
942 }
943
944 static void
945 mul64(const temp_real *a, const temp_real *b, int *c)
946 {
947         __asm__("movl (%0),%%eax\n\t"
948                 "mull (%1)\n\t"
949                 "movl %%eax,(%2)\n\t"
950                 "movl %%edx,4(%2)\n\t"
951                 "movl 4(%0),%%eax\n\t"
952                 "mull 4(%1)\n\t"
953                 "movl %%eax,8(%2)\n\t"
954                 "movl %%edx,12(%2)\n\t"
955                 "movl (%0),%%eax\n\t"
956                 "mull 4(%1)\n\t"
957                 "addl %%eax,4(%2)\n\t"
958                 "adcl %%edx,8(%2)\n\t"
959                 "adcl $0,12(%2)\n\t"
960                 "movl 4(%0),%%eax\n\t"
961                 "mull (%1)\n\t"
962                 "addl %%eax,4(%2)\n\t"
963                 "adcl %%edx,8(%2)\n\t"
964                 "adcl $0,12(%2)"
965                 ::"S" (a),"c" (b),"D" (c)
966                 :"ax","dx");
967 }
968
969 static void
970 fmul(const temp_real *src1, const temp_real *src2, temp_real *result)
971 {
972         int i,sign;
973         int tmp[4] = {0,0,0,0};
974
975         sign = (src1->exponent ^ src2->exponent) & 0x8000;
976         i = (src1->exponent & 0x7fff) + (src2->exponent & 0x7fff) - 16383 + 1;
977         if (i<0) {
978                 result->exponent = sign;
979                 result->a = result->b = 0;
980                 return;
981         }
982         if (i>0x7fff) {
983                 set_OE();
984                 return;
985         }
986         mul64(src1,src2,tmp);
987         if (tmp[0] || tmp[1] || tmp[2] || tmp[3])
988                 while (i && tmp[3] >= 0) {
989                         i--;
990                         shift(tmp);
991                 }
992         else
993                 i = 0;
994         result->exponent = i | sign;
995         result->a = tmp[2];
996         result->b = tmp[3];
997 }
998
999 /*
1000  * linux/kernel/math/div.c
1001  *
1002  * (C) 1991 Linus Torvalds
1003  */
1004
1005 /*
1006  * temporary real division routine.
1007  */
1008
1009 static void 
1010 shift_left(int *c)
1011 {
1012         __asm__ __volatile__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
1013                 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
1014                 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
1015                 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
1016                 ::"r" (c):"ax");
1017 }
1018
1019 static void
1020 shift_right(int *c)
1021 {
1022         __asm__("shrl $1,12(%0) ; rcrl $1,8(%0) ; rcrl $1,4(%0) ; rcrl $1,(%0)"
1023                 ::"r" (c));
1024 }
1025
1026 static int
1027 try_sub(int *a, int *b)
1028 {
1029         char ok;
1030
1031         __asm__ __volatile__("movl (%1),%%eax ; subl %%eax,(%2)\n\t"
1032                 "movl 4(%1),%%eax ; sbbl %%eax,4(%2)\n\t"
1033                 "movl 8(%1),%%eax ; sbbl %%eax,8(%2)\n\t"
1034                 "movl 12(%1),%%eax ; sbbl %%eax,12(%2)\n\t"
1035                 "setae %%al":"=a" (ok):"c" (a),"d" (b));
1036         return ok;
1037 }
1038
1039 static void
1040 div64(int *a, int *b, int *c)
1041 {
1042         int tmp[4];
1043         int i;
1044         unsigned int mask = 0;
1045
1046         c += 4;
1047         for (i = 0 ; i<64 ; i++) {
1048                 if (!(mask >>= 1)) {
1049                         c--;
1050                         mask = 0x80000000UL;
1051                 }
1052                 tmp[0] = a[0]; tmp[1] = a[1];
1053                 tmp[2] = a[2]; tmp[3] = a[3];
1054                 if (try_sub(b,tmp)) {
1055                         *c |= mask;
1056                         a[0] = tmp[0]; a[1] = tmp[1];
1057                         a[2] = tmp[2]; a[3] = tmp[3];
1058                 }
1059                 shift_right(b);
1060         }
1061 }
1062
1063 static void
1064 fdiv(const temp_real *src1, const temp_real *src2, temp_real *result)
1065 {
1066         int i,sign;
1067         int a[4],b[4],tmp[4] = {0,0,0,0};
1068
1069         sign = (src1->exponent ^ src2->exponent) & 0x8000;
1070         if (!(src2->a || src2->b)) {
1071                 set_ZE();
1072                 return;
1073         }
1074         i = (src1->exponent & 0x7fff) - (src2->exponent & 0x7fff) + 16383;
1075         if (i<0) {
1076                 set_UE();
1077                 result->exponent = sign;
1078                 result->a = result->b = 0;
1079                 return;
1080         }
1081         a[0] = a[1] = 0;
1082         a[2] = src1->a;
1083         a[3] = src1->b;
1084         b[0] = b[1] = 0;
1085         b[2] = src2->a;
1086         b[3] = src2->b;
1087         while (b[3] >= 0) {
1088                 i++;
1089                 shift_left(b);
1090         }
1091         div64(a,b,tmp);
1092         if (tmp[0] || tmp[1] || tmp[2] || tmp[3]) {
1093                 while (i && tmp[3] >= 0) {
1094                         i--;
1095                         shift_left(tmp);
1096                 }
1097                 if (tmp[3] >= 0)
1098                         set_DE();
1099         } else
1100                 i = 0;
1101         if (i>0x7fff) {
1102                 set_OE();
1103                 return;
1104         }
1105         if (tmp[0] || tmp[1])
1106                 set_PE();
1107         result->exponent = i | sign;
1108         result->a = tmp[2];
1109         result->b = tmp[3];
1110 }
1111
1112 /*
1113  * linux/kernel/math/add.c
1114  *
1115  * (C) 1991 Linus Torvalds
1116  */
1117
1118 /*
1119  * temporary real addition routine.
1120  *
1121  * NOTE! These aren't exact: they are only 62 bits wide, and don't do
1122  * correct rounding. Fast hack. The reason is that we shift right the
1123  * values by two, in order not to have overflow (1 bit), and to be able
1124  * to move the sign into the mantissa (1 bit). Much simpler algorithms,
1125  * and 62 bits (61 really - no rounding) accuracy is usually enough. The
1126  * only time you should notice anything weird is when adding 64-bit
1127  * integers together. When using doubles (52 bits accuracy), the
1128  * 61-bit accuracy never shows at all.
1129  */
1130
1131 #define NEGINT(a) \
1132 __asm__("notl %0 ; notl %1 ; addl $1,%0 ; adcl $0,%1" \
1133         :"=r" (a->a),"=r" (a->b) \
1134         :"0" (a->a),"1" (a->b))
1135
1136 static void
1137 signify(temp_real *a)
1138 {
1139         a->exponent += 2;
1140         __asm__("shrdl $2,%1,%0 ; shrl $2,%1"
1141                 :"=r" (a->a),"=r" (a->b)
1142                 :"0" (a->a),"1" (a->b));
1143         if (a->exponent < 0)
1144                 NEGINT(a);
1145         a->exponent &= 0x7fff;
1146 }
1147
1148 static void
1149 unsignify(temp_real *a)
1150 {
1151         if (!(a->a || a->b)) {
1152                 a->exponent = 0;
1153                 return;
1154         }
1155         a->exponent &= 0x7fff;
1156         if (a->b < 0) {
1157                 NEGINT(a);
1158                 a->exponent |= 0x8000;
1159         }
1160         while (a->b >= 0) {
1161                 a->exponent--;
1162                 __asm__("addl %0,%0 ; adcl %1,%1"
1163                         :"=r" (a->a),"=r" (a->b)
1164                         :"0" (a->a),"1" (a->b));
1165         }
1166 }
1167
1168 static void
1169 fadd(const temp_real *src1, const temp_real *src2, temp_real *result)
1170 {
1171         temp_real a,b;
1172         int x1,x2,shift;
1173
1174         x1 = src1->exponent & 0x7fff;
1175         x2 = src2->exponent & 0x7fff;
1176         if (x1 > x2) {
1177                 a = *src1;
1178                 b = *src2;
1179                 shift = x1-x2;
1180         } else {
1181                 a = *src2;
1182                 b = *src1;
1183                 shift = x2-x1;
1184         }
1185         if (shift >= 64) {
1186                 *result = a;
1187                 return;
1188         }
1189         if (shift >= 32) {
1190                 b.a = b.b;
1191                 b.b = 0;
1192                 shift -= 32;
1193         }
1194         __asm__("shrdl %4,%1,%0 ; shrl %4,%1"
1195                 :"=r" (b.a),"=r" (b.b)
1196                 :"0" (b.a),"1" (b.b),"c" ((char) shift));
1197         signify(&a);
1198         signify(&b);
1199         __asm__("addl %4,%0 ; adcl %5,%1"
1200                 :"=r" (a.a),"=r" (a.b)
1201                 :"0" (a.a),"1" (a.b),"g" (b.a),"g" (b.b));
1202         unsignify(&a);
1203         *result = a;
1204 }
1205
1206 /*
1207  * linux/kernel/math/compare.c
1208  *
1209  * (C) 1991 Linus Torvalds
1210  */
1211
1212 /*
1213  * temporary real comparison routines
1214  */
1215
1216
1217 #define clear_Cx() (I387.swd &= ~0x4500)
1218
1219 static void 
1220 normalize(temp_real *a)
1221 {
1222         int i = a->exponent & 0x7fff;
1223         int sign = a->exponent & 0x8000;
1224
1225         if (!(a->a || a->b)) {
1226                 a->exponent = 0;
1227                 return;
1228         }
1229         while (i && a->b >= 0) {
1230                 i--;
1231                 __asm__("addl %0,%0 ; adcl %1,%1"
1232                         :"=r" (a->a),"=r" (a->b)
1233                         :"0" (a->a),"1" (a->b));
1234         }
1235         a->exponent = i | sign;
1236 }
1237
1238 static void
1239 ftst(const temp_real *a)
1240 {
1241         temp_real b;
1242
1243         clear_Cx();
1244         b = *a;
1245         normalize(&b);
1246         if (b.a || b.b || b.exponent) {
1247                 if (b.exponent < 0)
1248                         set_C0();
1249         } else
1250                 set_C3();
1251 }
1252
1253 static void
1254 fcom(const temp_real *src1, const temp_real *src2)
1255 {
1256         temp_real a;
1257
1258         a = *src1;
1259         a.exponent ^= 0x8000;
1260         fadd(&a,src2,&a);
1261         ftst(&a);
1262 }
1263
1264 static void
1265 fucom(const temp_real *src1, const temp_real *src2)
1266 {
1267         fcom(src1,src2);
1268 }
1269
1270 /*
1271  * linux/kernel/math/convert.c
1272  *
1273  * (C) 1991 Linus Torvalds
1274  */
1275
1276
1277 /*
1278  * NOTE!!! There is some "non-obvious" optimisations in the temp_to_long
1279  * and temp_to_short conversion routines: don't touch them if you don't
1280  * know what's going on. They are the adding of one in the rounding: the
1281  * overflow bit is also used for adding one into the exponent. Thus it
1282  * looks like the overflow would be incorrectly handled, but due to the
1283  * way the IEEE numbers work, things are correct.
1284  *
1285  * There is no checking for total overflow in the conversions, though (ie
1286  * if the temp-real number simply won't fit in a short- or long-real.)
1287  */
1288
1289 static void
1290 short_to_temp(const short_real *a, temp_real *b)
1291 {
1292         if (!(*a & 0x7fffffff)) {
1293                 b->a = b->b = 0;
1294                 if (*a)
1295                         b->exponent = 0x8000;
1296                 else
1297                         b->exponent = 0;
1298                 return;
1299         }
1300         b->exponent = ((*a>>23) & 0xff)-127+16383;
1301         if (*a<0)
1302                 b->exponent |= 0x8000;
1303         b->b = (*a<<8) | 0x80000000UL;
1304         b->a = 0;
1305 }
1306
1307 static void
1308 long_to_temp(const long_real *a, temp_real *b)
1309 {
1310         if (!a->a && !(a->b & 0x7fffffff)) {
1311                 b->a = b->b = 0;
1312                 if (a->b)
1313                         b->exponent = 0x8000;
1314                 else
1315                         b->exponent = 0;
1316                 return;
1317         }
1318         b->exponent = ((a->b >> 20) & 0x7ff)-1023+16383;
1319         if (a->b<0)
1320                 b->exponent |= 0x8000;
1321         b->b = 0x80000000UL | (a->b<<11) | (((u_int32_t)a->a)>>21);
1322         b->a = a->a<<11;
1323 }
1324
1325 static void 
1326 temp_to_short(const temp_real *a, short_real *b)
1327 {
1328         if (!(a->exponent & 0x7fff)) {
1329                 *b = (a->exponent)?0x80000000UL:0;
1330                 return;
1331         }
1332         *b = ((((int32_t) a->exponent)-16383+127) << 23) & 0x7f800000;
1333         if (a->exponent < 0)
1334                 *b |= 0x80000000UL;
1335         *b |= (a->b >> 8) & 0x007fffff;
1336         switch ((int)ROUNDING) {
1337                 case ROUND_NEAREST:
1338                         if ((a->b & 0xff) > 0x80)
1339                                 ++*b;
1340                         break;
1341                 case ROUND_DOWN:
1342                         if ((a->exponent & 0x8000) && (a->b & 0xff))
1343                                 ++*b;
1344                         break;
1345                 case ROUND_UP:
1346                         if (!(a->exponent & 0x8000) && (a->b & 0xff))
1347                                 ++*b;
1348                         break;
1349         }
1350 }
1351
1352 static void
1353 temp_to_long(const temp_real *a, long_real *b)
1354 {
1355         if (!(a->exponent & 0x7fff)) {
1356                 b->a = 0;
1357                 b->b = (a->exponent)?0x80000000UL:0;
1358                 return;
1359         }
1360         b->b = (((0x7fff & (int32_t) a->exponent)-16383+1023) << 20) &
1361             0x7ff00000;
1362         if (a->exponent < 0)
1363                 b->b |= 0x80000000UL;
1364         b->b |= (a->b >> 11) & 0x000fffff;
1365         b->a = a->b << 21;
1366         b->a |= (a->a >> 11) & 0x001fffff;
1367         switch ((int)ROUNDING) {
1368                 case ROUND_NEAREST:
1369                         if ((a->a & 0x7ff) > 0x400)
1370                                 __asm__("addl $1,%0 ; adcl $0,%1"
1371                                         :"=r" (b->a),"=r" (b->b)
1372                                         :"0" (b->a),"1" (b->b));
1373                         break;
1374                 case ROUND_DOWN:
1375                         if ((a->exponent & 0x8000) && (a->b & 0xff))
1376                                 __asm__("addl $1,%0 ; adcl $0,%1"
1377                                         :"=r" (b->a),"=r" (b->b)
1378                                         :"0" (b->a),"1" (b->b));
1379                         break;
1380                 case ROUND_UP:
1381                         if (!(a->exponent & 0x8000) && (a->b & 0xff))
1382                                 __asm__("addl $1,%0 ; adcl $0,%1"
1383                                         :"=r" (b->a),"=r" (b->b)
1384                                         :"0" (b->a),"1" (b->b));
1385                         break;
1386         }
1387 }
1388
1389 static void 
1390 frndint(const temp_real *a, temp_real *b)
1391 {
1392         int shift =  16383 + 63 - (a->exponent & 0x7fff);
1393         u_int32_t underflow;
1394
1395         if ((shift < 0) || (shift == 16383+63)) {
1396                 *b = *a;
1397                 return;
1398         }
1399         b->a = b->b = underflow = 0;
1400         b->exponent = a->exponent;
1401         if (shift < 32) {
1402                 b->b = a->b; b->a = a->a;
1403         } else if (shift < 64) {
1404                 b->a = a->b; underflow = a->a;
1405                 shift -= 32;
1406                 b->exponent += 32;
1407         } else if (shift < 96) {
1408                 underflow = a->b;
1409                 shift -= 64;
1410                 b->exponent += 64;
1411         } else {
1412                 underflow = 1;
1413                 shift = 0;
1414         }
1415         b->exponent += shift;
1416         __asm__("shrdl %2,%1,%0"
1417                 :"=r" (underflow),"=r" (b->a)
1418                 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1419         __asm__("shrdl %2,%1,%0"
1420                 :"=r" (b->a),"=r" (b->b)
1421                 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1422         __asm__("shrl %1,%0"
1423                 :"=r" (b->b)
1424                 :"c" ((char) shift),"0" (b->b));
1425         switch ((int)ROUNDING) {
1426                 case ROUND_NEAREST:
1427                         __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1428                                 :"=r" (b->a),"=r" (b->b)
1429                                 :"0" (b->a),"1" (b->b)
1430                                 ,"r" (0x7fffffff + (b->a & 1))
1431                                 ,"m" (*&underflow));
1432                         break;
1433                 case ROUND_UP:
1434                         if ((b->exponent >= 0) && underflow)
1435                                 __asm__("addl $1,%0 ; adcl $0,%1"
1436                                         :"=r" (b->a),"=r" (b->b)
1437                                         :"0" (b->a),"1" (b->b));
1438                         break;
1439                 case ROUND_DOWN:
1440                         if ((b->exponent < 0) && underflow)
1441                                 __asm__("addl $1,%0 ; adcl $0,%1"
1442                                         :"=r" (b->a),"=r" (b->b)
1443                                         :"0" (b->a),"1" (b->b));
1444                         break;
1445         }
1446         if (b->a || b->b)
1447                 while (b->b >= 0) {
1448                         b->exponent--;
1449                         __asm__("addl %0,%0 ; adcl %1,%1"
1450                                 :"=r" (b->a),"=r" (b->b)
1451                                 :"0" (b->a),"1" (b->b));
1452                 }
1453         else
1454                 b->exponent = 0;
1455 }
1456
1457 static void
1458 Fscale(const temp_real *a, const temp_real *b, temp_real *c)
1459 {
1460         temp_int ti;
1461
1462         *c = *a;
1463         if(!c->a && !c->b) {                            /* 19 Sep 92*/
1464                 c->exponent = 0;
1465                 return;
1466         }
1467         real_to_int(b, &ti);
1468         if(ti.sign)
1469                 c->exponent -= ti.a;
1470         else
1471                 c->exponent += ti.a;
1472 }
1473
1474 static void
1475 real_to_int(const temp_real *a, temp_int *b)
1476 {
1477         int shift =  16383 + 63 - (a->exponent & 0x7fff);
1478         u_int32_t underflow;
1479
1480         b->a = b->b = underflow = 0;
1481         b->sign = (a->exponent < 0);
1482         if (shift < 0) {
1483                 set_OE();
1484                 return;
1485         }
1486         if (shift < 32) {
1487                 b->b = a->b; b->a = a->a;
1488         } else if (shift < 64) {
1489                 b->a = a->b; underflow = a->a;
1490                 shift -= 32;
1491         } else if (shift < 96) {
1492                 underflow = a->b;
1493                 shift -= 64;
1494         } else {
1495                 underflow = 1;
1496                 shift = 0;
1497         }
1498         __asm__("shrdl %2,%1,%0"
1499                 :"=r" (underflow),"=r" (b->a)
1500                 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1501         __asm__("shrdl %2,%1,%0"
1502                 :"=r" (b->a),"=r" (b->b)
1503                 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1504         __asm__("shrl %1,%0"
1505                 :"=r" (b->b)
1506                 :"c" ((char) shift),"0" (b->b));
1507         switch ((int)ROUNDING) {
1508                 case ROUND_NEAREST:
1509                         __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1510                                 :"=r" (b->a),"=r" (b->b)
1511                                 :"0" (b->a),"1" (b->b)
1512                                 ,"r" (0x7fffffff + (b->a & 1))
1513                                 ,"m" (*&underflow));
1514                         break;
1515                 case ROUND_UP:
1516                         if (!b->sign && underflow)
1517                                 __asm__("addl $1,%0 ; adcl $0,%1"
1518                                         :"=r" (b->a),"=r" (b->b)
1519                                         :"0" (b->a),"1" (b->b));
1520                         break;
1521                 case ROUND_DOWN:
1522                         if (b->sign && underflow)
1523                                 __asm__("addl $1,%0 ; adcl $0,%1"
1524                                         :"=r" (b->a),"=r" (b->b)
1525                                         :"0" (b->a),"1" (b->b));
1526                         break;
1527         }
1528 }
1529
1530 static void
1531 int_to_real(const temp_int *a, temp_real *b)
1532 {
1533         b->a = a->a;
1534         b->b = a->b;
1535         if (b->a || b->b)
1536                 b->exponent = 16383 + 63 + (a->sign? 0x8000:0);
1537         else {
1538                 b->exponent = 0;
1539                 return;
1540         }
1541         while (b->b >= 0) {
1542                 b->exponent--;
1543                 __asm__("addl %0,%0 ; adcl %1,%1"
1544                         :"=r" (b->a),"=r" (b->b)
1545                         :"0" (b->a),"1" (b->b));
1546         }
1547 }
1548
1549 static int
1550 fpu_modevent(module_t mod, int type, void *unused)
1551 {
1552         switch (type) {
1553         case MOD_LOAD:
1554                 if (pmath_emulate) {
1555                         printf("Another Math emulator already present\n");
1556                         return EBUSY;
1557                 }
1558                 pmath_emulate = math_emulate;
1559                 if (bootverbose)
1560                         printf("Math emulator present\n");
1561                 break;
1562         case MOD_UNLOAD:
1563                 if (pmath_emulate != math_emulate) {
1564                         printf("Cannot unload another math emulator\n");
1565                         return EACCES;
1566                 }
1567                 pmath_emulate = 0;
1568                 if (bootverbose)
1569                         printf("Math emulator unloaded\n");
1570                 break;
1571         default:
1572                 break;
1573         }
1574         return 0;
1575 }
1576 static moduledata_t fpumod = {
1577         "fpu",
1578         fpu_modevent,
1579         0
1580 };
1581 DECLARE_MODULE(fpu, fpumod, SI_SUB_DRIVERS, SI_ORDER_ANY);