proc->thread stage 4: rework the VFS and DEVICE subsystems to take thread
[dragonfly.git] / sys / platform / pc32 / i386 / math_emulate.c
1 /*
2  * linux/kernel/math/math_emulate.c
3  *
4  * (C) 1991 Linus Torvalds
5  *
6  * [expediant "port" of linux 8087 emulator to 386BSD, with apologies -wfj]
7  *
8  *      from: 386BSD 0.1
9  * $FreeBSD: src/sys/i386/i386/math_emulate.c,v 1.35 1999/08/28 00:43:47 peter Exp $
10  * $DragonFly: src/sys/platform/pc32/i386/math_emulate.c,v 1.4 2003/06/18 18:29:55 dillon Exp $
11  */
12
13 /*
14  * Limited emulation 27.12.91 - mostly loads/stores, which gcc wants
15  * even for soft-float, unless you use bruce evans' patches. The patches
16  * are great, but they have to be re-applied for every version, and the
17  * library is different for soft-float and 80387. So emulation is more
18  * practical, even though it's slower.
19  *
20  * 28.12.91 - loads/stores work, even BCD. I'll have to start thinking
21  * about add/sub/mul/div. Urgel. I should find some good source, but I'll
22  * just fake up something.
23  *
24  * 30.12.91 - add/sub/mul/div/com seem to work mostly. I should really
25  * test every possible combination.
26  */
27
28 /*
29  * This file is full of ugly macros etc: one problem was that gcc simply
30  * didn't want to make the structures as they should be: it has to try to
31  * align them. Sickening code, but at least I've hidden the ugly things
32  * in this one file: the other files don't need to know about these things.
33  *
34  * The other files also don't care about ST(x) etc - they just get addresses
35  * to 80-bit temporary reals, and do with them as they please. I wanted to
36  * hide most of the 387-specific things here.
37  */
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41
42 #include <machine/frame.h>
43 #include <machine/reg.h>
44
45 #include <sys/proc.h>
46 #include <sys/kernel.h>
47
48 #include <vm/vm.h>
49 #include <sys/lock.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <sys/user.h>
53
54 #define __ALIGNED_TEMP_REAL 1
55 #include <i386/i386/math_emu.h>
56
57 #define bswapw(x) __asm__("xchgb %%al,%%ah":"=a" (x):"0" ((short)x))
58 #define ST(x) (*__st((x)))
59 #define PST(x) ((const temp_real *) __st((x)))
60 #define math_abort(tfp, signo) tfp->tf_eip = oldeip; return (signo);
61
62 /*
63  * We don't want these inlined - it gets too messy in the machine-code.
64  */
65 static void fpop(void);
66 static void fpush(void);
67 static void fxchg(temp_real_unaligned * a, temp_real_unaligned * b);
68 static temp_real_unaligned * __st(int i);
69
70 static unsigned char
71 get_fs_byte(char *adr) 
72         { return(fubyte(adr)); }
73
74 static unsigned short
75 get_fs_word(unsigned short *adr)
76         { return(fuword(adr)); }
77
78 static u_int32_t
79 get_fs_long(u_int32_t *adr)
80         { return(fuword(adr)); }
81
82 static void 
83 put_fs_byte(unsigned char val, char *adr)
84         { (void)subyte(adr,val); }
85
86 static void 
87 put_fs_word(unsigned short val, short *adr)
88         { (void)susword(adr,val); }
89
90 static void 
91 put_fs_long(u_long val, u_int32_t *adr)
92         { (void)suword(adr,val); }
93
94 static int
95 math_emulate(struct trapframe * info)
96 {
97         unsigned short code;
98         temp_real tmp;
99         char * address;
100         u_int32_t oldeip;
101
102         /* ever used fp? */
103         if ((curthread->td_pcb->pcb_flags & FP_SOFTFP) == 0) {
104                 curthread->td_pcb->pcb_flags |= FP_SOFTFP;
105                 I387.cwd = 0x037f;
106                 I387.swd = 0x0000;
107                 I387.twd = 0x0000;
108         }
109
110         if (I387.cwd & I387.swd & 0x3f)
111                 I387.swd |= 0x8000;
112         else
113                 I387.swd &= 0x7fff;
114         oldeip = info->tf_eip;
115 /* 0x001f means user code space */
116         if ((u_short)info->tf_cs != 0x001F) {
117                 printf("math_emulate: %04x:%08lx\n", (u_short)info->tf_cs,
118                         (u_long)oldeip);
119                 panic("?Math emulation needed in kernel?");
120         }
121         /* completely ignore an operand-size prefix */
122         if (get_fs_byte((char *) info->tf_eip) == 0x66)
123                 info->tf_eip++;
124         code = get_fs_word((unsigned short *) info->tf_eip);
125         bswapw(code);
126         code &= 0x7ff;
127         I387.fip = oldeip;
128         *(unsigned short *) &I387.fcs = (u_short) info->tf_cs;
129         *(1+(unsigned short *) &I387.fcs) = code;
130         info->tf_eip += 2;
131         switch (code) {
132                 case 0x1d0: /* fnop */
133                         return(0);
134                 case 0x1d1: case 0x1d2: case 0x1d3:  /* fst to 32-bit mem */
135                 case 0x1d4: case 0x1d5: case 0x1d6: case 0x1d7:
136                         math_abort(info,SIGILL);
137                 case 0x1e0: /* fchs */
138                         ST(0).exponent ^= 0x8000;
139                         return(0);
140                 case 0x1e1: /* fabs */
141                         ST(0).exponent &= 0x7fff;
142                         return(0);
143                 case 0x1e2: case 0x1e3:
144                         math_abort(info,SIGILL);
145                 case 0x1e4: /* ftst */
146                         ftst(PST(0));
147                         return(0);
148                 case 0x1e5: /* fxam */
149                         printf("fxam not implemented\n");
150                         math_abort(info,SIGILL);
151                 case 0x1e6: case 0x1e7: /* fldenv */
152                         math_abort(info,SIGILL);
153                 case 0x1e8: /* fld1 */
154                         fpush();
155                         ST(0) = CONST1;
156                         return(0);
157                 case 0x1e9: /* fld2t */
158                         fpush();
159                         ST(0) = CONSTL2T;
160                         return(0);
161                 case 0x1ea: /* fld2e */
162                         fpush();
163                         ST(0) = CONSTL2E;
164                         return(0);
165                 case 0x1eb: /* fldpi */
166                         fpush();
167                         ST(0) = CONSTPI;
168                         return(0);
169                 case 0x1ec: /* fldlg2 */
170                         fpush();
171                         ST(0) = CONSTLG2;
172                         return(0);
173                 case 0x1ed: /* fldln2 */
174                         fpush();
175                         ST(0) = CONSTLN2;
176                         return(0);
177                 case 0x1ee: /* fldz */
178                         fpush();
179                         ST(0) = CONSTZ;
180                         return(0);
181                 case 0x1ef:
182                         math_abort(info,SIGILL);
183                 case 0x1f0: /* f2xm1 */
184                 case 0x1f1: /* fyl2x */
185                 case 0x1f2: /* fptan */
186                 case 0x1f3: /* fpatan */
187                 case 0x1f4: /* fxtract */
188                 case 0x1f5: /* fprem1 */
189                 case 0x1f6: /* fdecstp */
190                 case 0x1f7: /* fincstp */
191                 case 0x1f8: /* fprem */
192                 case 0x1f9: /* fyl2xp1 */
193                 case 0x1fa: /* fsqrt */
194                 case 0x1fb: /* fsincos */
195                 case 0x1fe: /* fsin */
196                 case 0x1ff: /* fcos */
197                         uprintf(
198                          "math_emulate: instruction %04x not implemented\n",
199                           code + 0xd800);
200                         math_abort(info,SIGILL);
201                 case 0x1fc: /* frndint */
202                         frndint(PST(0),&tmp);
203                         real_to_real(&tmp,&ST(0));
204                         return(0);
205                 case 0x1fd: /* fscale */
206                         /* incomplete and totally inadequate -wfj */
207                         Fscale(PST(0), PST(1), &tmp);
208                         real_to_real(&tmp,&ST(0));
209                         return(0);                      /* 19 Sep 92*/
210                 case 0x2e9: /* ????? */
211 /* if this should be a fucomp ST(0),ST(1) , it must be a 0x3e9  ATS */
212                         fucom(PST(1),PST(0));
213                         fpop(); fpop();
214                         return(0);
215                 case 0x3d0: case 0x3d1: /* fist ?? */
216                         return(0);
217                 case 0x3e2: /* fclex */
218                         I387.swd &= 0x7f00;
219                         return(0);
220                 case 0x3e3: /* fninit */
221                         I387.cwd = 0x037f;
222                         I387.swd = 0x0000;
223                         I387.twd = 0x0000;
224                         return(0);
225                 case 0x3e4:
226                         return(0);
227                 case 0x6d9: /* fcompp */
228                         fcom(PST(1),PST(0));
229                         fpop(); fpop();
230                         return(0);
231                 case 0x7e0: /* fstsw ax */
232                         *(short *) &info->tf_eax = I387.swd;
233                         return(0);
234         }
235         switch (code >> 3) {
236                 case 0x18: /* fadd */
237                         fadd(PST(0),PST(code & 7),&tmp);
238                         real_to_real(&tmp,&ST(0));
239                         return(0);
240                 case 0x19: /* fmul */
241                         fmul(PST(0),PST(code & 7),&tmp);
242                         real_to_real(&tmp,&ST(0));
243                         return(0);
244                 case 0x1a: /* fcom */
245                         fcom(PST(code & 7),PST(0));
246                         return(0);
247                 case 0x1b: /* fcomp */
248                         fcom(PST(code & 7),PST(0));
249                         fpop();
250                         return(0);
251                 case 0x1c: /* fsubr */
252                         real_to_real(&ST(code & 7),&tmp);
253                         tmp.exponent ^= 0x8000;
254                         fadd(PST(0),&tmp,&tmp);
255                         real_to_real(&tmp,&ST(0));
256                         return(0);
257                 case 0x1d: /* fsub */
258                         ST(0).exponent ^= 0x8000;
259                         fadd(PST(0),PST(code & 7),&tmp);
260                         real_to_real(&tmp,&ST(0));
261                         return(0);
262                 case 0x1e: /* fdivr */
263                         fdiv(PST(0),PST(code & 7),&tmp);
264                         real_to_real(&tmp,&ST(0));
265                         return(0);
266                 case 0x1f: /* fdiv */
267                         fdiv(PST(code & 7),PST(0),&tmp);
268                         real_to_real(&tmp,&ST(0));
269                         return(0);
270                 case 0x38: /* fld */
271                         fpush();
272                         ST(0) = ST((code & 7)+1);  /* why plus 1 ????? ATS */
273                         return(0);
274                 case 0x39: /* fxch */
275                         fxchg(&ST(0),&ST(code & 7));
276                         return(0);
277                 case 0x3b: /*  ??? ??? wrong ???? ATS */
278                         ST(code & 7) = ST(0);
279                         fpop();
280                         return(0);
281                 case 0x98: /* fadd */
282                         fadd(PST(0),PST(code & 7),&tmp);
283                         real_to_real(&tmp,&ST(code & 7));
284                         return(0);
285                 case 0x99: /* fmul */
286                         fmul(PST(0),PST(code & 7),&tmp);
287                         real_to_real(&tmp,&ST(code & 7));
288                         return(0);
289                 case 0x9a: /* ???? , my manual don't list a direction bit
290 for fcom , ??? ATS */
291                         fcom(PST(code & 7),PST(0));
292                         return(0);
293                 case 0x9b: /* same as above , ATS */
294                         fcom(PST(code & 7),PST(0));
295                         fpop();
296                         return(0);
297                 case 0x9c: /* fsubr */
298                         ST(code & 7).exponent ^= 0x8000;
299                         fadd(PST(0),PST(code & 7),&tmp);
300                         real_to_real(&tmp,&ST(code & 7));
301                         return(0);
302                 case 0x9d: /* fsub */
303                         real_to_real(&ST(0),&tmp);
304                         tmp.exponent ^= 0x8000;
305                         fadd(PST(code & 7),&tmp,&tmp);
306                         real_to_real(&tmp,&ST(code & 7));
307                         return(0);
308                 case 0x9e: /* fdivr */
309                         fdiv(PST(0),PST(code & 7),&tmp);
310                         real_to_real(&tmp,&ST(code & 7));
311                         return(0);
312                 case 0x9f: /* fdiv */
313                         fdiv(PST(code & 7),PST(0),&tmp);
314                         real_to_real(&tmp,&ST(code & 7));
315                         return(0);
316                 case 0xb8: /* ffree */
317                         printf("ffree not implemented\n");
318                         math_abort(info,SIGILL);
319                 case 0xb9: /* fstp ???? where is the pop ? ATS */
320                         fxchg(&ST(0),&ST(code & 7));
321                         return(0);
322                 case 0xba: /* fst */
323                         ST(code & 7) = ST(0);
324                         return(0);
325                 case 0xbb: /* ????? encoding of fstp to mem ? ATS */
326                         ST(code & 7) = ST(0);
327                         fpop();
328                         return(0);
329                 case 0xbc: /* fucom */
330                         fucom(PST(code & 7),PST(0));
331                         return(0);
332                 case 0xbd: /* fucomp */
333                         fucom(PST(code & 7),PST(0));
334                         fpop();
335                         return(0);
336                 case 0xd8: /* faddp */
337                         fadd(PST(code & 7),PST(0),&tmp);
338                         real_to_real(&tmp,&ST(code & 7));
339                         fpop();
340                         return(0);
341                 case 0xd9: /* fmulp */
342                         fmul(PST(code & 7),PST(0),&tmp);
343                         real_to_real(&tmp,&ST(code & 7));
344                         fpop();
345                         return(0);
346                 case 0xda: /* ??? encoding of ficom with 16 bit mem ? ATS */
347                         fcom(PST(code & 7),PST(0));
348                         fpop();
349                         return(0);
350                 case 0xdc: /* fsubrp */
351                         ST(code & 7).exponent ^= 0x8000;
352                         fadd(PST(0),PST(code & 7),&tmp);
353                         real_to_real(&tmp,&ST(code & 7));
354                         fpop();
355                         return(0);
356                 case 0xdd: /* fsubp */
357                         real_to_real(&ST(0),&tmp);
358                         tmp.exponent ^= 0x8000;
359                         fadd(PST(code & 7),&tmp,&tmp);
360                         real_to_real(&tmp,&ST(code & 7));
361                         fpop();
362                         return(0);
363                 case 0xde: /* fdivrp */
364                         fdiv(PST(0),PST(code & 7),&tmp);
365                         real_to_real(&tmp,&ST(code & 7));
366                         fpop();
367                         return(0);
368                 case 0xdf: /* fdivp */
369                         fdiv(PST(code & 7),PST(0),&tmp);
370                         real_to_real(&tmp,&ST(code & 7));
371                         fpop();
372                         return(0);
373                 case 0xf8: /* fild 16-bit mem ???? ATS */
374                         printf("ffree not implemented\n");
375                         math_abort(info,SIGILL);
376                         fpop();
377                         return(0);
378                 case 0xf9: /*  ????? ATS */
379                         fxchg(&ST(0),&ST(code & 7));
380                         return(0);
381                 case 0xfa: /* fist 16-bit mem ? ATS */
382                 case 0xfb: /* fistp 16-bit mem ? ATS */
383                         ST(code & 7) = ST(0);
384                         fpop();
385                         return(0);
386         }
387         switch ((code>>3) & 0xe7) {
388                 case 0x22:
389                         put_short_real(PST(0),info,code);
390                         return(0);
391                 case 0x23:
392                         put_short_real(PST(0),info,code);
393                         fpop();
394                         return(0);
395                 case 0x24:
396                         address = ea(info,code);
397                         for (code = 0 ; code < 7 ; code++) {
398                                 ((int32_t *) & I387)[code] =
399                                    get_fs_long((u_int32_t *) address);
400                                 address += 4;
401                         }
402                         return(0);
403                 case 0x25:
404                         address = ea(info,code);
405                         *(unsigned short *) &I387.cwd =
406                                 get_fs_word((unsigned short *) address);
407                         return(0);
408                 case 0x26:
409                         address = ea(info,code);
410                         /*verify_area(address,28);*/
411                         for (code = 0 ; code < 7 ; code++) {
412                                 put_fs_long( ((int32_t *) & I387)[code],
413                                         (u_int32_t *) address);
414                                 address += 4;
415                         }
416                         return(0);
417                 case 0x27:
418                         address = ea(info,code);
419                         /*verify_area(address,2);*/
420                         put_fs_word(I387.cwd,(short *) address);
421                         return(0);
422                 case 0x62:
423                         put_long_int(PST(0),info,code);
424                         return(0);
425                 case 0x63:
426                         put_long_int(PST(0),info,code);
427                         fpop();
428                         return(0);
429                 case 0x65:
430                         fpush();
431                         get_temp_real(&tmp,info,code);
432                         real_to_real(&tmp,&ST(0));
433                         return(0);
434                 case 0x67:
435                         put_temp_real(PST(0),info,code);
436                         fpop();
437                         return(0);
438                 case 0xa2:
439                         put_long_real(PST(0),info,code);
440                         return(0);
441                 case 0xa3:
442                         put_long_real(PST(0),info,code);
443                         fpop();
444                         return(0);
445                 case 0xa4:
446                         address = ea(info,code);
447                         for (code = 0 ; code < 27 ; code++) {
448                                 ((int32_t *) & I387)[code] =
449                                    get_fs_long((u_int32_t *) address);
450                                 address += 4;
451                         }
452                         return(0);
453                 case 0xa6:
454                         address = ea(info,code);
455                         /*verify_area(address,108);*/
456                         for (code = 0 ; code < 27 ; code++) {
457                                 put_fs_long( ((int32_t *) & I387)[code],
458                                         (u_int32_t *) address);
459                                 address += 4;
460                         }
461                         I387.cwd = 0x037f;
462                         I387.swd = 0x0000;
463                         I387.twd = 0x0000;
464                         return(0);
465                 case 0xa7:
466                         address = ea(info,code);
467                         /*verify_area(address,2);*/
468                         put_fs_word(I387.swd,(short *) address);
469                         return(0);
470                 case 0xe2:
471                         put_short_int(PST(0),info,code);
472                         return(0);
473                 case 0xe3:
474                         put_short_int(PST(0),info,code);
475                         fpop();
476                         return(0);
477                 case 0xe4:
478                         fpush();
479                         get_BCD(&tmp,info,code);
480                         real_to_real(&tmp,&ST(0));
481                         return(0);
482                 case 0xe5:
483                         fpush();
484                         get_longlong_int(&tmp,info,code);
485                         real_to_real(&tmp,&ST(0));
486                         return(0);
487                 case 0xe6:
488                         put_BCD(PST(0),info,code);
489                         fpop();
490                         return(0);
491                 case 0xe7:
492                         put_longlong_int(PST(0),info,code);
493                         fpop();
494                         return(0);
495         }
496         switch (code >> 9) {
497                 case 0:
498                         get_short_real(&tmp,info,code);
499                         break;
500                 case 1:
501                         get_long_int(&tmp,info,code);
502                         break;
503                 case 2:
504                         get_long_real(&tmp,info,code);
505                         break;
506                 case 4:
507                         get_short_int(&tmp,info,code);
508         }
509         switch ((code>>3) & 0x27) {
510                 case 0:
511                         fadd(&tmp,PST(0),&tmp);
512                         real_to_real(&tmp,&ST(0));
513                         return(0);
514                 case 1:
515                         fmul(&tmp,PST(0),&tmp);
516                         real_to_real(&tmp,&ST(0));
517                         return(0);
518                 case 2:
519                         fcom(&tmp,PST(0));
520                         return(0);
521                 case 3:
522                         fcom(&tmp,PST(0));
523                         fpop();
524                         return(0);
525                 case 4:
526                         tmp.exponent ^= 0x8000;
527                         fadd(&tmp,PST(0),&tmp);
528                         real_to_real(&tmp,&ST(0));
529                         return(0);
530                 case 5:
531                         ST(0).exponent ^= 0x8000;
532                         fadd(&tmp,PST(0),&tmp);
533                         real_to_real(&tmp,&ST(0));
534                         return(0);
535                 case 6:
536                         fdiv(PST(0),&tmp,&tmp);
537                         real_to_real(&tmp,&ST(0));
538                         return(0);
539                 case 7:
540                         fdiv(&tmp,PST(0),&tmp);
541                         real_to_real(&tmp,&ST(0));
542                         return(0);
543         }
544         if ((code & 0x138) == 0x100) {
545                         fpush();
546                         real_to_real(&tmp,&ST(0));
547                         return(0);
548         }
549         printf("Unknown math-insns: %04x:%08x %04x\n",(u_short)info->tf_cs,
550                 info->tf_eip,code);
551         math_abort(info,SIGFPE);
552 }
553
554 static void
555 fpop(void)
556 {
557         u_int32_t tmp;
558
559         tmp = I387.swd & 0xffffc7ffUL;
560         I387.swd += 0x00000800;
561         I387.swd &= 0x00003800;
562         I387.swd |= tmp;
563 }
564
565 static void
566 fpush(void)
567 {
568         u_int32_t tmp;
569
570         tmp = I387.swd & 0xffffc7ffUL;
571         I387.swd += 0x00003800;
572         I387.swd &= 0x00003800;
573         I387.swd |= tmp;
574 }
575
576 static void 
577 fxchg(temp_real_unaligned * a, temp_real_unaligned * b)
578 {
579         temp_real_unaligned c;
580
581         c = *a;
582         *a = *b;
583         *b = c;
584 }
585
586 static temp_real_unaligned *
587 __st(int i)
588 {
589         i += I387.swd >> 11;
590         i &= 7;
591         return (temp_real_unaligned *) (i*10 + (char *)(I387.st_space));
592 }
593
594 /*
595  * linux/kernel/math/ea.c
596  *
597  * (C) 1991 Linus Torvalds
598  */
599
600 /*
601  * Calculate the effective address.
602  */
603
604
605 static int __regoffset[] = {
606         tEAX, tECX, tEDX, tEBX, tESP, tEBP, tESI, tEDI
607 };
608
609 #define REG(x) (((int *)curproc->p_md.md_regs)[__regoffset[(x)]])
610
611 static char *
612 sib(struct trapframe * info, int mod)
613 {
614         unsigned char ss,index,base;
615         int32_t offset = 0;
616
617         base = get_fs_byte((char *) info->tf_eip);
618         info->tf_eip++;
619         ss = base >> 6;
620         index = (base >> 3) & 7;
621         base &= 7;
622         if (index == 4)
623                 offset = 0;
624         else
625                 offset = REG(index);
626         offset <<= ss;
627         if (mod || base != 5)
628                 offset += REG(base);
629         if (mod == 1) {
630                 offset += (signed char) get_fs_byte((char *) info->tf_eip);
631                 info->tf_eip++;
632         } else if (mod == 2 || base == 5) {
633                 offset += (signed) get_fs_long((u_int32_t *) info->tf_eip);
634                 info->tf_eip += 4;
635         }
636         I387.foo = offset;
637         I387.fos = 0x17;
638         return (char *) offset;
639 }
640
641 static char *
642 ea(struct trapframe * info, unsigned short code)
643 {
644         unsigned char mod,rm;
645         int32_t * tmp;
646         int offset = 0;
647
648         mod = (code >> 6) & 3;
649         rm = code & 7;
650         if (rm == 4 && mod != 3)
651                 return sib(info,mod);
652         if (rm == 5 && !mod) {
653                 offset = get_fs_long((u_int32_t *) info->tf_eip);
654                 info->tf_eip += 4;
655                 I387.foo = offset;
656                 I387.fos = 0x17;
657                 return (char *) offset;
658         }
659         tmp = (int32_t *) &REG(rm);
660         switch (mod) {
661                 case 0: offset = 0; break;
662                 case 1:
663                         offset = (signed char) get_fs_byte((char *) info->tf_eip);
664                         info->tf_eip++;
665                         break;
666                 case 2:
667                         offset = (signed) get_fs_long((u_int32_t *) info->tf_eip);
668                         info->tf_eip += 4;
669                         break;
670 #ifdef notyet
671                 case 3:
672                         math_abort(info,1<<(SIGILL-1));
673 #endif
674         }
675         I387.foo = offset;
676         I387.fos = 0x17;
677         return offset + (char *) *tmp;
678 }
679 /*
680  * linux/kernel/math/get_put.c
681  *
682  * (C) 1991 Linus Torvalds
683  */
684
685 /*
686  * This file handles all accesses to user memory: getting and putting
687  * ints/reals/BCD etc. This is the only part that concerns itself with
688  * other than temporary real format. All other cals are strictly temp_real.
689  */
690
691 static void 
692 get_short_real(temp_real * tmp, struct trapframe * info, unsigned short code)
693 {
694         char * addr;
695         short_real sr;
696
697         addr = ea(info,code);
698         sr = get_fs_long((u_int32_t *) addr);
699         short_to_temp(&sr,tmp);
700 }
701
702 static void
703 get_long_real(temp_real * tmp, struct trapframe * info, unsigned short code)
704 {
705         char * addr;
706         long_real lr;
707
708         addr = ea(info,code);
709         lr.a = get_fs_long((u_int32_t *) addr);
710         lr.b = get_fs_long(1 + (u_int32_t *) addr);
711         long_to_temp(&lr,tmp);
712 }
713
714 static void
715 get_temp_real(temp_real * tmp, struct trapframe * info, unsigned short code)
716 {
717         char * addr;
718
719         addr = ea(info,code);
720         tmp->a = get_fs_long((u_int32_t *) addr);
721         tmp->b = get_fs_long(1 + (u_int32_t *) addr);
722         tmp->exponent = get_fs_word(4 + (unsigned short *) addr);
723 }
724
725 static void
726 get_short_int(temp_real * tmp, struct trapframe * info, unsigned short code)
727 {
728         char * addr;
729         temp_int ti;
730
731         addr = ea(info,code);
732         ti.a = (signed short) get_fs_word((unsigned short *) addr);
733         ti.b = 0;
734         if ((ti.sign = (ti.a < 0)) != 0)
735                 ti.a = - ti.a;
736         int_to_real(&ti,tmp);
737 }
738
739 static void
740 get_long_int(temp_real * tmp, struct trapframe * info, unsigned short code)
741 {
742         char * addr;
743         temp_int ti;
744
745         addr = ea(info,code);
746         ti.a = get_fs_long((u_int32_t *) addr);
747         ti.b = 0;
748         if ((ti.sign = (ti.a < 0)) != 0)
749                 ti.a = - ti.a;
750         int_to_real(&ti,tmp);
751 }
752
753 static void 
754 get_longlong_int(temp_real * tmp, struct trapframe * info, unsigned short code)
755 {
756         char * addr;
757         temp_int ti;
758
759         addr = ea(info,code);
760         ti.a = get_fs_long((u_int32_t *) addr);
761         ti.b = get_fs_long(1 + (u_int32_t *) addr);
762         if ((ti.sign = (ti.b < 0)) != 0)
763                 __asm__("notl %0 ; notl %1\n\t"
764                         "addl $1,%0 ; adcl $0,%1"
765                         :"=r" (ti.a),"=r" (ti.b)
766                         :"0" (ti.a),"1" (ti.b));
767         int_to_real(&ti,tmp);
768 }
769
770 #define MUL10(low,high) \
771 __asm__("addl %0,%0 ; adcl %1,%1\n\t" \
772 "movl %0,%%ecx ; movl %1,%%ebx\n\t" \
773 "addl %0,%0 ; adcl %1,%1\n\t" \
774 "addl %0,%0 ; adcl %1,%1\n\t" \
775 "addl %%ecx,%0 ; adcl %%ebx,%1" \
776 :"=a" (low),"=d" (high) \
777 :"0" (low),"1" (high):"cx","bx")
778
779 #define ADD64(val,low,high) \
780 __asm__("addl %4,%0 ; adcl $0,%1":"=r" (low),"=r" (high) \
781 :"0" (low),"1" (high),"r" ((u_int32_t) (val)))
782
783 static void
784 get_BCD(temp_real * tmp, struct trapframe * info, unsigned short code)
785 {
786         int k;
787         char * addr;
788         temp_int i;
789         unsigned char c;
790
791         addr = ea(info,code);
792         addr += 9;
793         i.sign = 0x80 & get_fs_byte(addr--);
794         i.a = i.b = 0;
795         for (k = 0; k < 9; k++) {
796                 c = get_fs_byte(addr--);
797                 MUL10(i.a, i.b);
798                 ADD64((c>>4), i.a, i.b);
799                 MUL10(i.a, i.b);
800                 ADD64((c&0xf), i.a, i.b);
801         }
802         int_to_real(&i,tmp);
803 }
804
805 static void 
806 put_short_real(const temp_real * tmp,
807         struct trapframe * info, unsigned short code)
808 {
809         char * addr;
810         short_real sr;
811
812         addr = ea(info,code);
813         /*verify_area(addr,4);*/
814         temp_to_short(tmp,&sr);
815         put_fs_long(sr,(u_int32_t *) addr);
816 }
817
818 static void
819 put_long_real(const temp_real * tmp,
820         struct trapframe * info, unsigned short code)
821 {
822         char * addr;
823         long_real lr;
824
825         addr = ea(info,code);
826         /*verify_area(addr,8);*/
827         temp_to_long(tmp,&lr);
828         put_fs_long(lr.a, (u_int32_t *) addr);
829         put_fs_long(lr.b, 1 + (u_int32_t *) addr);
830 }
831
832 static void
833 put_temp_real(const temp_real * tmp,
834         struct trapframe * info, unsigned short code)
835 {
836         char * addr;
837
838         addr = ea(info,code);
839         /*verify_area(addr,10);*/
840         put_fs_long(tmp->a, (u_int32_t *) addr);
841         put_fs_long(tmp->b, 1 + (u_int32_t *) addr);
842         put_fs_word(tmp->exponent, 4 + (short *) addr);
843 }
844
845 static void
846 put_short_int(const temp_real * tmp,
847         struct trapframe * info, unsigned short code)
848 {
849         char * addr;
850         temp_int ti;
851
852         addr = ea(info,code);
853         real_to_int(tmp,&ti);
854         /*verify_area(addr,2);*/
855         if (ti.sign)
856                 ti.a = -ti.a;
857         put_fs_word(ti.a,(short *) addr);
858 }
859
860 static void
861 put_long_int(const temp_real * tmp,
862         struct trapframe * info, unsigned short code)
863 {
864         char * addr;
865         temp_int ti;
866
867         addr = ea(info,code);
868         real_to_int(tmp,&ti);
869         /*verify_area(addr,4);*/
870         if (ti.sign)
871                 ti.a = -ti.a;
872         put_fs_long(ti.a,(u_int32_t *) addr);
873 }
874
875 static void
876 put_longlong_int(const temp_real * tmp,
877         struct trapframe * info, unsigned short code)
878 {
879         char * addr;
880         temp_int ti;
881
882         addr = ea(info,code);
883         real_to_int(tmp,&ti);
884         /*verify_area(addr,8);*/
885         if (ti.sign)
886                 __asm__("notl %0 ; notl %1\n\t"
887                         "addl $1,%0 ; adcl $0,%1"
888                         :"=r" (ti.a),"=r" (ti.b)
889                         :"0" (ti.a),"1" (ti.b));
890         put_fs_long(ti.a,(u_int32_t *) addr);
891         put_fs_long(ti.b,1 + (u_int32_t *) addr);
892 }
893
894 #define DIV10(low,high,rem) \
895 __asm__("divl %6 ; xchgl %1,%2 ; divl %6" \
896         :"=d" (rem),"=a" (low),"=r" (high) \
897         :"0" (0),"1" (high),"2" (low),"c" (10))
898
899 static void
900 put_BCD(const temp_real * tmp,struct trapframe * info, unsigned short code)
901 {
902         int k,rem;
903         char * addr;
904         temp_int i;
905         unsigned char c;
906
907         addr = ea(info,code);
908         /*verify_area(addr,10);*/
909         real_to_int(tmp,&i);
910         if (i.sign)
911                 put_fs_byte(0x80, addr+9);
912         else
913                 put_fs_byte(0, addr+9);
914         for (k = 0; k < 9; k++) {
915                 DIV10(i.a,i.b,rem);
916                 c = rem;
917                 DIV10(i.a,i.b,rem);
918                 c += rem<<4;
919                 put_fs_byte(c,addr++);
920         }
921 }
922
923 /*
924  * linux/kernel/math/mul.c
925  *
926  * (C) 1991 Linus Torvalds
927  */
928
929 /*
930  * temporary real multiplication routine.
931  */
932
933
934 static void
935 shift(int * c)
936 {
937         __asm__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
938                 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
939                 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
940                 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
941                 ::"r" (c):"ax");
942 }
943
944 static void
945 mul64(const temp_real * a, const temp_real * b, int * c)
946 {
947         __asm__("movl (%0),%%eax\n\t"
948                 "mull (%1)\n\t"
949                 "movl %%eax,(%2)\n\t"
950                 "movl %%edx,4(%2)\n\t"
951                 "movl 4(%0),%%eax\n\t"
952                 "mull 4(%1)\n\t"
953                 "movl %%eax,8(%2)\n\t"
954                 "movl %%edx,12(%2)\n\t"
955                 "movl (%0),%%eax\n\t"
956                 "mull 4(%1)\n\t"
957                 "addl %%eax,4(%2)\n\t"
958                 "adcl %%edx,8(%2)\n\t"
959                 "adcl $0,12(%2)\n\t"
960                 "movl 4(%0),%%eax\n\t"
961                 "mull (%1)\n\t"
962                 "addl %%eax,4(%2)\n\t"
963                 "adcl %%edx,8(%2)\n\t"
964                 "adcl $0,12(%2)"
965                 ::"S" (a),"c" (b),"D" (c)
966                 :"ax","dx");
967 }
968
969 static void
970 fmul(const temp_real * src1, const temp_real * src2, temp_real * result)
971 {
972         int i,sign;
973         int tmp[4] = {0,0,0,0};
974
975         sign = (src1->exponent ^ src2->exponent) & 0x8000;
976         i = (src1->exponent & 0x7fff) + (src2->exponent & 0x7fff) - 16383 + 1;
977         if (i<0) {
978                 result->exponent = sign;
979                 result->a = result->b = 0;
980                 return;
981         }
982         if (i>0x7fff) {
983                 set_OE();
984                 return;
985         }
986         mul64(src1,src2,tmp);
987         if (tmp[0] || tmp[1] || tmp[2] || tmp[3])
988                 while (i && tmp[3] >= 0) {
989                         i--;
990                         shift(tmp);
991                 }
992         else
993                 i = 0;
994         result->exponent = i | sign;
995         result->a = tmp[2];
996         result->b = tmp[3];
997 }
998
999 /*
1000  * linux/kernel/math/div.c
1001  *
1002  * (C) 1991 Linus Torvalds
1003  */
1004
1005 /*
1006  * temporary real division routine.
1007  */
1008
1009 static void 
1010 shift_left(int * c)
1011 {
1012         __asm__ __volatile__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
1013                 "movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
1014                 "movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
1015                 "movl 12(%0),%%eax ; adcl %%eax,12(%0)"
1016                 ::"r" (c):"ax");
1017 }
1018
1019 static void
1020 shift_right(int * c)
1021 {
1022         __asm__("shrl $1,12(%0) ; rcrl $1,8(%0) ; rcrl $1,4(%0) ; rcrl $1,(%0)"
1023                 ::"r" (c));
1024 }
1025
1026 static int
1027 try_sub(int * a, int * b)
1028 {
1029         char ok;
1030
1031         __asm__ __volatile__("movl (%1),%%eax ; subl %%eax,(%2)\n\t"
1032                 "movl 4(%1),%%eax ; sbbl %%eax,4(%2)\n\t"
1033                 "movl 8(%1),%%eax ; sbbl %%eax,8(%2)\n\t"
1034                 "movl 12(%1),%%eax ; sbbl %%eax,12(%2)\n\t"
1035                 "setae %%al":"=a" (ok):"c" (a),"d" (b));
1036         return ok;
1037 }
1038
1039 static void
1040 div64(int * a, int * b, int * c)
1041 {
1042         int tmp[4];
1043         int i;
1044         unsigned int mask = 0;
1045
1046         c += 4;
1047         for (i = 0 ; i<64 ; i++) {
1048                 if (!(mask >>= 1)) {
1049                         c--;
1050                         mask = 0x80000000UL;
1051                 }
1052                 tmp[0] = a[0]; tmp[1] = a[1];
1053                 tmp[2] = a[2]; tmp[3] = a[3];
1054                 if (try_sub(b,tmp)) {
1055                         *c |= mask;
1056                         a[0] = tmp[0]; a[1] = tmp[1];
1057                         a[2] = tmp[2]; a[3] = tmp[3];
1058                 }
1059                 shift_right(b);
1060         }
1061 }
1062
1063 static void
1064 fdiv(const temp_real * src1, const temp_real * src2, temp_real * result)
1065 {
1066         int i,sign;
1067         int a[4],b[4],tmp[4] = {0,0,0,0};
1068
1069         sign = (src1->exponent ^ src2->exponent) & 0x8000;
1070         if (!(src2->a || src2->b)) {
1071                 set_ZE();
1072                 return;
1073         }
1074         i = (src1->exponent & 0x7fff) - (src2->exponent & 0x7fff) + 16383;
1075         if (i<0) {
1076                 set_UE();
1077                 result->exponent = sign;
1078                 result->a = result->b = 0;
1079                 return;
1080         }
1081         a[0] = a[1] = 0;
1082         a[2] = src1->a;
1083         a[3] = src1->b;
1084         b[0] = b[1] = 0;
1085         b[2] = src2->a;
1086         b[3] = src2->b;
1087         while (b[3] >= 0) {
1088                 i++;
1089                 shift_left(b);
1090         }
1091         div64(a,b,tmp);
1092         if (tmp[0] || tmp[1] || tmp[2] || tmp[3]) {
1093                 while (i && tmp[3] >= 0) {
1094                         i--;
1095                         shift_left(tmp);
1096                 }
1097                 if (tmp[3] >= 0)
1098                         set_DE();
1099         } else
1100                 i = 0;
1101         if (i>0x7fff) {
1102                 set_OE();
1103                 return;
1104         }
1105         if (tmp[0] || tmp[1])
1106                 set_PE();
1107         result->exponent = i | sign;
1108         result->a = tmp[2];
1109         result->b = tmp[3];
1110 }
1111
1112 /*
1113  * linux/kernel/math/add.c
1114  *
1115  * (C) 1991 Linus Torvalds
1116  */
1117
1118 /*
1119  * temporary real addition routine.
1120  *
1121  * NOTE! These aren't exact: they are only 62 bits wide, and don't do
1122  * correct rounding. Fast hack. The reason is that we shift right the
1123  * values by two, in order not to have overflow (1 bit), and to be able
1124  * to move the sign into the mantissa (1 bit). Much simpler algorithms,
1125  * and 62 bits (61 really - no rounding) accuracy is usually enough. The
1126  * only time you should notice anything weird is when adding 64-bit
1127  * integers together. When using doubles (52 bits accuracy), the
1128  * 61-bit accuracy never shows at all.
1129  */
1130
1131 #define NEGINT(a) \
1132 __asm__("notl %0 ; notl %1 ; addl $1,%0 ; adcl $0,%1" \
1133         :"=r" (a->a),"=r" (a->b) \
1134         :"0" (a->a),"1" (a->b))
1135
1136 static void signify(temp_real * a)
1137 {
1138         a->exponent += 2;
1139         __asm__("shrdl $2,%1,%0 ; shrl $2,%1"
1140                 :"=r" (a->a),"=r" (a->b)
1141                 :"0" (a->a),"1" (a->b));
1142         if (a->exponent < 0)
1143                 NEGINT(a);
1144         a->exponent &= 0x7fff;
1145 }
1146
1147 static void unsignify(temp_real * a)
1148 {
1149         if (!(a->a || a->b)) {
1150                 a->exponent = 0;
1151                 return;
1152         }
1153         a->exponent &= 0x7fff;
1154         if (a->b < 0) {
1155                 NEGINT(a);
1156                 a->exponent |= 0x8000;
1157         }
1158         while (a->b >= 0) {
1159                 a->exponent--;
1160                 __asm__("addl %0,%0 ; adcl %1,%1"
1161                         :"=r" (a->a),"=r" (a->b)
1162                         :"0" (a->a),"1" (a->b));
1163         }
1164 }
1165
1166 static void
1167 fadd(const temp_real * src1, const temp_real * src2, temp_real * result)
1168 {
1169         temp_real a,b;
1170         int x1,x2,shift;
1171
1172         x1 = src1->exponent & 0x7fff;
1173         x2 = src2->exponent & 0x7fff;
1174         if (x1 > x2) {
1175                 a = *src1;
1176                 b = *src2;
1177                 shift = x1-x2;
1178         } else {
1179                 a = *src2;
1180                 b = *src1;
1181                 shift = x2-x1;
1182         }
1183         if (shift >= 64) {
1184                 *result = a;
1185                 return;
1186         }
1187         if (shift >= 32) {
1188                 b.a = b.b;
1189                 b.b = 0;
1190                 shift -= 32;
1191         }
1192         __asm__("shrdl %4,%1,%0 ; shrl %4,%1"
1193                 :"=r" (b.a),"=r" (b.b)
1194                 :"0" (b.a),"1" (b.b),"c" ((char) shift));
1195         signify(&a);
1196         signify(&b);
1197         __asm__("addl %4,%0 ; adcl %5,%1"
1198                 :"=r" (a.a),"=r" (a.b)
1199                 :"0" (a.a),"1" (a.b),"g" (b.a),"g" (b.b));
1200         unsignify(&a);
1201         *result = a;
1202 }
1203
1204 /*
1205  * linux/kernel/math/compare.c
1206  *
1207  * (C) 1991 Linus Torvalds
1208  */
1209
1210 /*
1211  * temporary real comparison routines
1212  */
1213
1214
1215 #define clear_Cx() (I387.swd &= ~0x4500)
1216
1217 static void 
1218 normalize(temp_real * a)
1219 {
1220         int i = a->exponent & 0x7fff;
1221         int sign = a->exponent & 0x8000;
1222
1223         if (!(a->a || a->b)) {
1224                 a->exponent = 0;
1225                 return;
1226         }
1227         while (i && a->b >= 0) {
1228                 i--;
1229                 __asm__("addl %0,%0 ; adcl %1,%1"
1230                         :"=r" (a->a),"=r" (a->b)
1231                         :"0" (a->a),"1" (a->b));
1232         }
1233         a->exponent = i | sign;
1234 }
1235
1236 static void
1237 ftst(const temp_real * a)
1238 {
1239         temp_real b;
1240
1241         clear_Cx();
1242         b = *a;
1243         normalize(&b);
1244         if (b.a || b.b || b.exponent) {
1245                 if (b.exponent < 0)
1246                         set_C0();
1247         } else
1248                 set_C3();
1249 }
1250
1251 static void
1252 fcom(const temp_real * src1, const temp_real * src2)
1253 {
1254         temp_real a;
1255
1256         a = *src1;
1257         a.exponent ^= 0x8000;
1258         fadd(&a,src2,&a);
1259         ftst(&a);
1260 }
1261
1262 static void
1263 fucom(const temp_real * src1, const temp_real * src2)
1264 {
1265         fcom(src1,src2);
1266 }
1267
1268 /*
1269  * linux/kernel/math/convert.c
1270  *
1271  * (C) 1991 Linus Torvalds
1272  */
1273
1274
1275 /*
1276  * NOTE!!! There is some "non-obvious" optimisations in the temp_to_long
1277  * and temp_to_short conversion routines: don't touch them if you don't
1278  * know what's going on. They are the adding of one in the rounding: the
1279  * overflow bit is also used for adding one into the exponent. Thus it
1280  * looks like the overflow would be incorrectly handled, but due to the
1281  * way the IEEE numbers work, things are correct.
1282  *
1283  * There is no checking for total overflow in the conversions, though (ie
1284  * if the temp-real number simply won't fit in a short- or long-real.)
1285  */
1286
1287 static void
1288 short_to_temp(const short_real * a, temp_real * b)
1289 {
1290         if (!(*a & 0x7fffffff)) {
1291                 b->a = b->b = 0;
1292                 if (*a)
1293                         b->exponent = 0x8000;
1294                 else
1295                         b->exponent = 0;
1296                 return;
1297         }
1298         b->exponent = ((*a>>23) & 0xff)-127+16383;
1299         if (*a<0)
1300                 b->exponent |= 0x8000;
1301         b->b = (*a<<8) | 0x80000000UL;
1302         b->a = 0;
1303 }
1304
1305 static void
1306 long_to_temp(const long_real * a, temp_real * b)
1307 {
1308         if (!a->a && !(a->b & 0x7fffffff)) {
1309                 b->a = b->b = 0;
1310                 if (a->b)
1311                         b->exponent = 0x8000;
1312                 else
1313                         b->exponent = 0;
1314                 return;
1315         }
1316         b->exponent = ((a->b >> 20) & 0x7ff)-1023+16383;
1317         if (a->b<0)
1318                 b->exponent |= 0x8000;
1319         b->b = 0x80000000UL | (a->b<<11) | (((u_int32_t)a->a)>>21);
1320         b->a = a->a<<11;
1321 }
1322
1323 static void 
1324 temp_to_short(const temp_real * a, short_real * b)
1325 {
1326         if (!(a->exponent & 0x7fff)) {
1327                 *b = (a->exponent)?0x80000000UL:0;
1328                 return;
1329         }
1330         *b = ((((int32_t) a->exponent)-16383+127) << 23) & 0x7f800000;
1331         if (a->exponent < 0)
1332                 *b |= 0x80000000UL;
1333         *b |= (a->b >> 8) & 0x007fffff;
1334         switch ((int)ROUNDING) {
1335                 case ROUND_NEAREST:
1336                         if ((a->b & 0xff) > 0x80)
1337                                 ++*b;
1338                         break;
1339                 case ROUND_DOWN:
1340                         if ((a->exponent & 0x8000) && (a->b & 0xff))
1341                                 ++*b;
1342                         break;
1343                 case ROUND_UP:
1344                         if (!(a->exponent & 0x8000) && (a->b & 0xff))
1345                                 ++*b;
1346                         break;
1347         }
1348 }
1349
1350 static void
1351 temp_to_long(const temp_real * a, long_real * b)
1352 {
1353         if (!(a->exponent & 0x7fff)) {
1354                 b->a = 0;
1355                 b->b = (a->exponent)?0x80000000UL:0;
1356                 return;
1357         }
1358         b->b = (((0x7fff & (int32_t) a->exponent)-16383+1023) << 20) &
1359             0x7ff00000;
1360         if (a->exponent < 0)
1361                 b->b |= 0x80000000UL;
1362         b->b |= (a->b >> 11) & 0x000fffff;
1363         b->a = a->b << 21;
1364         b->a |= (a->a >> 11) & 0x001fffff;
1365         switch ((int)ROUNDING) {
1366                 case ROUND_NEAREST:
1367                         if ((a->a & 0x7ff) > 0x400)
1368                                 __asm__("addl $1,%0 ; adcl $0,%1"
1369                                         :"=r" (b->a),"=r" (b->b)
1370                                         :"0" (b->a),"1" (b->b));
1371                         break;
1372                 case ROUND_DOWN:
1373                         if ((a->exponent & 0x8000) && (a->b & 0xff))
1374                                 __asm__("addl $1,%0 ; adcl $0,%1"
1375                                         :"=r" (b->a),"=r" (b->b)
1376                                         :"0" (b->a),"1" (b->b));
1377                         break;
1378                 case ROUND_UP:
1379                         if (!(a->exponent & 0x8000) && (a->b & 0xff))
1380                                 __asm__("addl $1,%0 ; adcl $0,%1"
1381                                         :"=r" (b->a),"=r" (b->b)
1382                                         :"0" (b->a),"1" (b->b));
1383                         break;
1384         }
1385 }
1386
1387 static void 
1388 frndint(const temp_real * a, temp_real * b)
1389 {
1390         int shift =  16383 + 63 - (a->exponent & 0x7fff);
1391         u_int32_t underflow;
1392
1393         if ((shift < 0) || (shift == 16383+63)) {
1394                 *b = *a;
1395                 return;
1396         }
1397         b->a = b->b = underflow = 0;
1398         b->exponent = a->exponent;
1399         if (shift < 32) {
1400                 b->b = a->b; b->a = a->a;
1401         } else if (shift < 64) {
1402                 b->a = a->b; underflow = a->a;
1403                 shift -= 32;
1404                 b->exponent += 32;
1405         } else if (shift < 96) {
1406                 underflow = a->b;
1407                 shift -= 64;
1408                 b->exponent += 64;
1409         } else {
1410                 underflow = 1;
1411                 shift = 0;
1412         }
1413         b->exponent += shift;
1414         __asm__("shrdl %2,%1,%0"
1415                 :"=r" (underflow),"=r" (b->a)
1416                 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1417         __asm__("shrdl %2,%1,%0"
1418                 :"=r" (b->a),"=r" (b->b)
1419                 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1420         __asm__("shrl %1,%0"
1421                 :"=r" (b->b)
1422                 :"c" ((char) shift),"0" (b->b));
1423         switch ((int)ROUNDING) {
1424                 case ROUND_NEAREST:
1425                         __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1426                                 :"=r" (b->a),"=r" (b->b)
1427                                 :"0" (b->a),"1" (b->b)
1428                                 ,"r" (0x7fffffff + (b->a & 1))
1429                                 ,"m" (*&underflow));
1430                         break;
1431                 case ROUND_UP:
1432                         if ((b->exponent >= 0) && underflow)
1433                                 __asm__("addl $1,%0 ; adcl $0,%1"
1434                                         :"=r" (b->a),"=r" (b->b)
1435                                         :"0" (b->a),"1" (b->b));
1436                         break;
1437                 case ROUND_DOWN:
1438                         if ((b->exponent < 0) && underflow)
1439                                 __asm__("addl $1,%0 ; adcl $0,%1"
1440                                         :"=r" (b->a),"=r" (b->b)
1441                                         :"0" (b->a),"1" (b->b));
1442                         break;
1443         }
1444         if (b->a || b->b)
1445                 while (b->b >= 0) {
1446                         b->exponent--;
1447                         __asm__("addl %0,%0 ; adcl %1,%1"
1448                                 :"=r" (b->a),"=r" (b->b)
1449                                 :"0" (b->a),"1" (b->b));
1450                 }
1451         else
1452                 b->exponent = 0;
1453 }
1454
1455 static void
1456 Fscale(const temp_real *a, const temp_real *b, temp_real *c)
1457 {
1458         temp_int ti;
1459
1460         *c = *a;
1461         if(!c->a && !c->b) {                            /* 19 Sep 92*/
1462                 c->exponent = 0;
1463                 return;
1464         }
1465         real_to_int(b, &ti);
1466         if(ti.sign)
1467                 c->exponent -= ti.a;
1468         else
1469                 c->exponent += ti.a;
1470 }
1471
1472 static void
1473 real_to_int(const temp_real * a, temp_int * b)
1474 {
1475         int shift =  16383 + 63 - (a->exponent & 0x7fff);
1476         u_int32_t underflow;
1477
1478         b->a = b->b = underflow = 0;
1479         b->sign = (a->exponent < 0);
1480         if (shift < 0) {
1481                 set_OE();
1482                 return;
1483         }
1484         if (shift < 32) {
1485                 b->b = a->b; b->a = a->a;
1486         } else if (shift < 64) {
1487                 b->a = a->b; underflow = a->a;
1488                 shift -= 32;
1489         } else if (shift < 96) {
1490                 underflow = a->b;
1491                 shift -= 64;
1492         } else {
1493                 underflow = 1;
1494                 shift = 0;
1495         }
1496         __asm__("shrdl %2,%1,%0"
1497                 :"=r" (underflow),"=r" (b->a)
1498                 :"c" ((char) shift),"0" (underflow),"1" (b->a));
1499         __asm__("shrdl %2,%1,%0"
1500                 :"=r" (b->a),"=r" (b->b)
1501                 :"c" ((char) shift),"0" (b->a),"1" (b->b));
1502         __asm__("shrl %1,%0"
1503                 :"=r" (b->b)
1504                 :"c" ((char) shift),"0" (b->b));
1505         switch ((int)ROUNDING) {
1506                 case ROUND_NEAREST:
1507                         __asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
1508                                 :"=r" (b->a),"=r" (b->b)
1509                                 :"0" (b->a),"1" (b->b)
1510                                 ,"r" (0x7fffffff + (b->a & 1))
1511                                 ,"m" (*&underflow));
1512                         break;
1513                 case ROUND_UP:
1514                         if (!b->sign && underflow)
1515                                 __asm__("addl $1,%0 ; adcl $0,%1"
1516                                         :"=r" (b->a),"=r" (b->b)
1517                                         :"0" (b->a),"1" (b->b));
1518                         break;
1519                 case ROUND_DOWN:
1520                         if (b->sign && underflow)
1521                                 __asm__("addl $1,%0 ; adcl $0,%1"
1522                                         :"=r" (b->a),"=r" (b->b)
1523                                         :"0" (b->a),"1" (b->b));
1524                         break;
1525         }
1526 }
1527
1528 static void
1529 int_to_real(const temp_int * a, temp_real * b)
1530 {
1531         b->a = a->a;
1532         b->b = a->b;
1533         if (b->a || b->b)
1534                 b->exponent = 16383 + 63 + (a->sign? 0x8000:0);
1535         else {
1536                 b->exponent = 0;
1537                 return;
1538         }
1539         while (b->b >= 0) {
1540                 b->exponent--;
1541                 __asm__("addl %0,%0 ; adcl %1,%1"
1542                         :"=r" (b->a),"=r" (b->b)
1543                         :"0" (b->a),"1" (b->b));
1544         }
1545 }
1546
1547 static int
1548 fpu_modevent(module_t mod, int type, void *unused)
1549 {
1550         switch (type) {
1551         case MOD_LOAD:
1552                 if (pmath_emulate) {
1553                         printf("Another Math emulator already present\n");
1554                         return EBUSY;
1555                 }
1556                 pmath_emulate = math_emulate;
1557                 if (bootverbose)
1558                         printf("Math emulator present\n");
1559                 break;
1560         case MOD_UNLOAD:
1561                 if (pmath_emulate != math_emulate) {
1562                         printf("Cannot unload another math emulator\n");
1563                         return EACCES;
1564                 }
1565                 pmath_emulate = 0;
1566                 if (bootverbose)
1567                         printf("Math emulator unloaded\n");
1568                 break;
1569         default:
1570                 break;
1571         }
1572         return 0;
1573 }
1574 static moduledata_t fpumod = {
1575         "fpu",
1576         fpu_modevent,
1577         0
1578 };
1579 DECLARE_MODULE(fpu, fpumod, SI_SUB_DRIVERS, SI_ORDER_ANY);