gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* linux/kernel/math/math_emulate.c
	3	*
	4	* (C) 1991 Linus Torvalds
	5	*
	6	* [expediant "port" of linux 8087 emulator to 386BSD, with apologies -wfj]
	7	*
	8	* from: 386BSD 0.1
	9	* $FreeBSD: src/sys/i386/i386/math_emulate.c,v 1.35 1999/08/28 00:43:47 peter Exp $
	10	* $DragonFly: src/sys/platform/pc32/i386/math_emulate.c,v 1.9 2007/02/03 17:05:58 corecode Exp $
	11	*/
	12
	13	/*
	14	* Limited emulation 27.12.91 - mostly loads/stores, which gcc wants
	15	* even for soft-float, unless you use bruce evans' patches. The patches
	16	* are great, but they have to be re-applied for every version, and the
	17	* library is different for soft-float and 80387. So emulation is more
	18	* practical, even though it's slower.
	19	*
	20	* 28.12.91 - loads/stores work, even BCD. I'll have to start thinking
	21	* about add/sub/mul/div. Urgel. I should find some good source, but I'll
	22	* just fake up something.
	23	*
	24	* 30.12.91 - add/sub/mul/div/com seem to work mostly. I should really
	25	* test every possible combination.
	26	*/
	27
	28	/*
	29	* This file is full of ugly macros etc: one problem was that gcc simply
	30	* didn't want to make the structures as they should be: it has to try to
	31	* align them. Sickening code, but at least I've hidden the ugly things
	32	* in this one file: the other files don't need to know about these things.
	33	*
	34	* The other files also don't care about ST(x) etc - they just get addresses
	35	* to 80-bit temporary reals, and do with them as they please. I wanted to
	36	* hide most of the 387-specific things here.
	37	*/
	38
	39	#include <sys/param.h>
	40	#include <sys/systm.h>
	41	#include <sys/reg.h>
	42
	43	#include <machine/frame.h>
	44
	45	#include <sys/proc.h>
	46	#include <sys/kernel.h>
	47
	48	#include <vm/vm.h>
	49	#include <sys/lock.h>
	50	#include <vm/pmap.h>
	51	#include <vm/vm_map.h>
	52	#include <sys/user.h>
	53
	54	#define __ALIGNED_TEMP_REAL 1
	55	#include "math_emu.h"
	56
	57	#define bswapw(x) __asm__("xchgb %%al,%%ah":"=a" (x):"0" ((short)x))
	58	#define ST(x) (*__st((x)))
	59	#define PST(x) ((const temp_real *) __st((x)))
	60	#define math_abort(tfp, signo) tfp->tf_eip = oldeip; return (signo);
	61
	62	/*
	63	* We don't want these inlined - it gets too messy in the machine-code.
	64	*/
	65	static void fpop(void);
	66	static void fpush(void);
	67	static void fxchg(temp_real_unaligned a, temp_real_unaligned b);
	68	static temp_real_unaligned *__st(int i);
	69
	70	static unsigned char
	71	get_fs_byte(char *adr)
	72	{ return(fubyte(adr)); }
	73
	74	static unsigned short
	75	get_fs_word(unsigned short *adr)
	76	{ return(fuword(adr)); }
	77
	78	static u_int32_t
	79	get_fs_long(u_int32_t *adr)
	80	{ return(fuword(adr)); }
	81
	82	static void
	83	put_fs_byte(unsigned char val, char *adr)
	84	{ (void)subyte(adr,val); }
	85
	86	static void
	87	put_fs_word(unsigned short val, short *adr)
	88	{ (void)susword(adr,val); }
	89
	90	static void
	91	put_fs_long(u_long val, u_int32_t *adr)
	92	{ (void)suword(adr,val); }
	93
	94	static int
	95	math_emulate(struct trapframe *info)
	96	{
	97	unsigned short code;
	98	temp_real tmp;
	99	char *address;
	100	u_int32_t oldeip;
	101
	102	/* ever used fp? */
	103	if ((curthread->td_pcb->pcb_flags & FP_SOFTFP) == 0) {
	104	curthread->td_pcb->pcb_flags \|= FP_SOFTFP;
	105	I387.cwd = 0x037f;
	106	I387.swd = 0x0000;
	107	I387.twd = 0x0000;
	108	}
	109
	110	if (I387.cwd & I387.swd & 0x3f)
	111	I387.swd \|= 0x8000;
	112	else
	113	I387.swd &= 0x7fff;
	114	oldeip = info->tf_eip;
	115	/* 0x001f means user code space */
	116	if ((u_short)info->tf_cs != 0x001F) {
	117	kprintf("math_emulate: %04x:%08lx\n", (u_short)info->tf_cs,
	118	(u_long)oldeip);
	119	panic("?Math emulation needed in kernel?");
	120	}
	121	/* completely ignore an operand-size prefix */
	122	if (get_fs_byte((char *) info->tf_eip) == 0x66)
	123	info->tf_eip++;
	124	code = get_fs_word((unsigned short *) info->tf_eip);
	125	bswapw(code);
	126	code &= 0x7ff;
	127	I387.fip = oldeip;
	128	(unsigned short ) &I387.fcs = (u_short) info->tf_cs;
	129	(1+(unsigned short ) &I387.fcs) = code;
	130	info->tf_eip += 2;
	131	switch (code) {
	132	case 0x1d0: /* fnop */
	133	return(0);
	134	case 0x1d1: case 0x1d2: case 0x1d3: /* fst to 32-bit mem */
	135	case 0x1d4: case 0x1d5: case 0x1d6: case 0x1d7:
	136	math_abort(info,SIGILL);
	137	case 0x1e0: /* fchs */
	138	ST(0).exponent ^= 0x8000;
	139	return(0);
	140	case 0x1e1: /* fabs */
	141	ST(0).exponent &= 0x7fff;
	142	return(0);
	143	case 0x1e2: case 0x1e3:
	144	math_abort(info,SIGILL);
	145	case 0x1e4: /* ftst */
	146	ftst(PST(0));
	147	return(0);
	148	case 0x1e5: /* fxam */
	149	kprintf("fxam not implemented\n");
	150	math_abort(info,SIGILL);
	151	case 0x1e6: case 0x1e7: /* fldenv */
	152	math_abort(info,SIGILL);
	153	case 0x1e8: /* fld1 */
	154	fpush();
	155	ST(0) = CONST1;
	156	return(0);
	157	case 0x1e9: /* fld2t */
	158	fpush();
	159	ST(0) = CONSTL2T;
	160	return(0);
	161	case 0x1ea: /* fld2e */
	162	fpush();
	163	ST(0) = CONSTL2E;
	164	return(0);
	165	case 0x1eb: /* fldpi */
	166	fpush();
	167	ST(0) = CONSTPI;
	168	return(0);
	169	case 0x1ec: /* fldlg2 */
	170	fpush();
	171	ST(0) = CONSTLG2;
	172	return(0);
	173	case 0x1ed: /* fldln2 */
	174	fpush();
	175	ST(0) = CONSTLN2;
	176	return(0);
	177	case 0x1ee: /* fldz */
	178	fpush();
	179	ST(0) = CONSTZ;
	180	return(0);
	181	case 0x1ef:
	182	math_abort(info,SIGILL);
	183	case 0x1f0: /* f2xm1 */
	184	case 0x1f1: /* fyl2x */
	185	case 0x1f2: /* fptan */
	186	case 0x1f3: /* fpatan */
	187	case 0x1f4: /* fxtract */
	188	case 0x1f5: /* fprem1 */
	189	case 0x1f6: /* fdecstp */
	190	case 0x1f7: /* fincstp */
	191	case 0x1f8: /* fprem */
	192	case 0x1f9: /* fyl2xp1 */
	193	case 0x1fa: /* fsqrt */
	194	case 0x1fb: /* fsincos */
	195	case 0x1fe: /* fsin */
	196	case 0x1ff: /* fcos */
	197	uprintf(
	198	"math_emulate: instruction %04x not implemented\n",
	199	code + 0xd800);
	200	math_abort(info,SIGILL);
	201	case 0x1fc: /* frndint */
	202	frndint(PST(0),&tmp);
	203	real_to_real(&tmp,&ST(0));
	204	return(0);
	205	case 0x1fd: /* fscale */
	206	/* incomplete and totally inadequate -wfj */
	207	Fscale(PST(0), PST(1), &tmp);
	208	real_to_real(&tmp,&ST(0));
	209	return(0); /* 19 Sep 92*/
	210	case 0x2e9: /* ????? */
	211	/* if this should be a fucomp ST(0),ST(1) , it must be a 0x3e9 ATS */
	212	fucom(PST(1),PST(0));
	213	fpop(); fpop();
	214	return(0);
	215	case 0x3d0: case 0x3d1: /* fist ?? */
	216	return(0);
	217	case 0x3e2: /* fclex */
	218	I387.swd &= 0x7f00;
	219	return(0);
	220	case 0x3e3: /* fninit */
	221	I387.cwd = 0x037f;
	222	I387.swd = 0x0000;
	223	I387.twd = 0x0000;
	224	return(0);
	225	case 0x3e4:
	226	return(0);
	227	case 0x6d9: /* fcompp */
	228	fcom(PST(1),PST(0));
	229	fpop(); fpop();
	230	return(0);
	231	case 0x7e0: /* fstsw ax */
	232	(short ) &info->tf_eax = I387.swd;
	233	return(0);
	234	}
	235	switch (code >> 3) {
	236	case 0x18: /* fadd */
	237	fadd(PST(0),PST(code & 7),&tmp);
	238	real_to_real(&tmp,&ST(0));
	239	return(0);
	240	case 0x19: /* fmul */
	241	fmul(PST(0),PST(code & 7),&tmp);
	242	real_to_real(&tmp,&ST(0));
	243	return(0);
	244	case 0x1a: /* fcom */
	245	fcom(PST(code & 7),PST(0));
	246	return(0);
	247	case 0x1b: /* fcomp */
	248	fcom(PST(code & 7),PST(0));
	249	fpop();
	250	return(0);
	251	case 0x1c: /* fsubr */
	252	real_to_real(&ST(code & 7),&tmp);
	253	tmp.exponent ^= 0x8000;
	254	fadd(PST(0),&tmp,&tmp);
	255	real_to_real(&tmp,&ST(0));
	256	return(0);
	257	case 0x1d: /* fsub */
	258	ST(0).exponent ^= 0x8000;
	259	fadd(PST(0),PST(code & 7),&tmp);
	260	real_to_real(&tmp,&ST(0));
	261	return(0);
	262	case 0x1e: /* fdivr */
	263	fdiv(PST(0),PST(code & 7),&tmp);
	264	real_to_real(&tmp,&ST(0));
	265	return(0);
	266	case 0x1f: /* fdiv */
	267	fdiv(PST(code & 7),PST(0),&tmp);
	268	real_to_real(&tmp,&ST(0));
	269	return(0);
	270	case 0x38: /* fld */
	271	fpush();
	272	ST(0) = ST((code & 7)+1); /* why plus 1 ????? ATS */
	273	return(0);
	274	case 0x39: /* fxch */
	275	fxchg(&ST(0),&ST(code & 7));
	276	return(0);
	277	case 0x3b: /* ??? ??? wrong ???? ATS */
	278	ST(code & 7) = ST(0);
	279	fpop();
	280	return(0);
	281	case 0x98: /* fadd */
	282	fadd(PST(0),PST(code & 7),&tmp);
	283	real_to_real(&tmp,&ST(code & 7));
	284	return(0);
	285	case 0x99: /* fmul */
	286	fmul(PST(0),PST(code & 7),&tmp);
	287	real_to_real(&tmp,&ST(code & 7));
	288	return(0);
	289	case 0x9a: /* ???? , my manual don't list a direction bit
	290	for fcom , ??? ATS */
	291	fcom(PST(code & 7),PST(0));
	292	return(0);
	293	case 0x9b: /* same as above , ATS */
	294	fcom(PST(code & 7),PST(0));
	295	fpop();
	296	return(0);
	297	case 0x9c: /* fsubr */
	298	ST(code & 7).exponent ^= 0x8000;
	299	fadd(PST(0),PST(code & 7),&tmp);
	300	real_to_real(&tmp,&ST(code & 7));
	301	return(0);
	302	case 0x9d: /* fsub */
	303	real_to_real(&ST(0),&tmp);
	304	tmp.exponent ^= 0x8000;
	305	fadd(PST(code & 7),&tmp,&tmp);
	306	real_to_real(&tmp,&ST(code & 7));
	307	return(0);
	308	case 0x9e: /* fdivr */
	309	fdiv(PST(0),PST(code & 7),&tmp);
	310	real_to_real(&tmp,&ST(code & 7));
	311	return(0);
	312	case 0x9f: /* fdiv */
	313	fdiv(PST(code & 7),PST(0),&tmp);
	314	real_to_real(&tmp,&ST(code & 7));
	315	return(0);
	316	case 0xb8: /* ffree */
	317	kprintf("ffree not implemented\n");
	318	math_abort(info,SIGILL);
	319	case 0xb9: /* fstp ???? where is the pop ? ATS */
	320	fxchg(&ST(0),&ST(code & 7));
	321	return(0);
	322	case 0xba: /* fst */
	323	ST(code & 7) = ST(0);
	324	return(0);
	325	case 0xbb: /* ????? encoding of fstp to mem ? ATS */
	326	ST(code & 7) = ST(0);
	327	fpop();
	328	return(0);
	329	case 0xbc: /* fucom */
	330	fucom(PST(code & 7),PST(0));
	331	return(0);
	332	case 0xbd: /* fucomp */
	333	fucom(PST(code & 7),PST(0));
	334	fpop();
	335	return(0);
	336	case 0xd8: /* faddp */
	337	fadd(PST(code & 7),PST(0),&tmp);
	338	real_to_real(&tmp,&ST(code & 7));
	339	fpop();
	340	return(0);
	341	case 0xd9: /* fmulp */
	342	fmul(PST(code & 7),PST(0),&tmp);
	343	real_to_real(&tmp,&ST(code & 7));
	344	fpop();
	345	return(0);
	346	case 0xda: /* ??? encoding of ficom with 16 bit mem ? ATS */
	347	fcom(PST(code & 7),PST(0));
	348	fpop();
	349	return(0);
	350	case 0xdc: /* fsubrp */
	351	ST(code & 7).exponent ^= 0x8000;
	352	fadd(PST(0),PST(code & 7),&tmp);
	353	real_to_real(&tmp,&ST(code & 7));
	354	fpop();
	355	return(0);
	356	case 0xdd: /* fsubp */
	357	real_to_real(&ST(0),&tmp);
	358	tmp.exponent ^= 0x8000;
	359	fadd(PST(code & 7),&tmp,&tmp);
	360	real_to_real(&tmp,&ST(code & 7));
	361	fpop();
	362	return(0);
	363	case 0xde: /* fdivrp */
	364	fdiv(PST(0),PST(code & 7),&tmp);
	365	real_to_real(&tmp,&ST(code & 7));
	366	fpop();
	367	return(0);
	368	case 0xdf: /* fdivp */
	369	fdiv(PST(code & 7),PST(0),&tmp);
	370	real_to_real(&tmp,&ST(code & 7));
	371	fpop();
	372	return(0);
	373	case 0xf8: /* fild 16-bit mem ???? ATS */
	374	kprintf("ffree not implemented\n");
	375	math_abort(info,SIGILL);
	376	fpop();
	377	return(0);
	378	case 0xf9: /* ????? ATS */
	379	fxchg(&ST(0),&ST(code & 7));
	380	return(0);
	381	case 0xfa: /* fist 16-bit mem ? ATS */
	382	case 0xfb: /* fistp 16-bit mem ? ATS */
	383	ST(code & 7) = ST(0);
	384	fpop();
	385	return(0);
	386	}
	387	switch ((code>>3) & 0xe7) {
	388	case 0x22:
	389	put_short_real(PST(0),info,code);
	390	return(0);
	391	case 0x23:
	392	put_short_real(PST(0),info,code);
	393	fpop();
	394	return(0);
	395	case 0x24:
	396	address = ea(info,code);
	397	for (code = 0 ; code < 7 ; code++) {
	398	((int32_t *) & I387)[code] =
	399	get_fs_long((u_int32_t *) address);
	400	address += 4;
	401	}
	402	return(0);
	403	case 0x25:
	404	address = ea(info,code);
	405	(unsigned short ) &I387.cwd =
	406	get_fs_word((unsigned short *) address);
	407	return(0);
	408	case 0x26:
	409	address = ea(info,code);
	410	/verify_area(address,28);/
	411	for (code = 0 ; code < 7 ; code++) {
	412	put_fs_long( ((int32_t *) & I387)[code],
	413	(u_int32_t *) address);
	414	address += 4;
	415	}
	416	return(0);
	417	case 0x27:
	418	address = ea(info,code);
	419	/verify_area(address,2);/
	420	put_fs_word(I387.cwd,(short *) address);
	421	return(0);
	422	case 0x62:
	423	put_long_int(PST(0),info,code);
	424	return(0);
	425	case 0x63:
	426	put_long_int(PST(0),info,code);
	427	fpop();
	428	return(0);
	429	case 0x65:
	430	fpush();
	431	get_temp_real(&tmp,info,code);
	432	real_to_real(&tmp,&ST(0));
	433	return(0);
	434	case 0x67:
	435	put_temp_real(PST(0),info,code);
	436	fpop();
	437	return(0);
	438	case 0xa2:
	439	put_long_real(PST(0),info,code);
	440	return(0);
	441	case 0xa3:
	442	put_long_real(PST(0),info,code);
	443	fpop();
	444	return(0);
	445	case 0xa4:
	446	address = ea(info,code);
	447	for (code = 0 ; code < 27 ; code++) {
	448	((int32_t *) & I387)[code] =
	449	get_fs_long((u_int32_t *) address);
	450	address += 4;
	451	}
	452	return(0);
	453	case 0xa6:
	454	address = ea(info,code);
	455	/verify_area(address,108);/
	456	for (code = 0 ; code < 27 ; code++) {
	457	put_fs_long( ((int32_t *) & I387)[code],
	458	(u_int32_t *) address);
	459	address += 4;
	460	}
	461	I387.cwd = 0x037f;
	462	I387.swd = 0x0000;
	463	I387.twd = 0x0000;
	464	return(0);
	465	case 0xa7:
	466	address = ea(info,code);
	467	/verify_area(address,2);/
	468	put_fs_word(I387.swd,(short *) address);
	469	return(0);
	470	case 0xe2:
	471	put_short_int(PST(0),info,code);
	472	return(0);
	473	case 0xe3:
	474	put_short_int(PST(0),info,code);
	475	fpop();
	476	return(0);
	477	case 0xe4:
	478	fpush();
	479	get_BCD(&tmp,info,code);
	480	real_to_real(&tmp,&ST(0));
	481	return(0);
	482	case 0xe5:
	483	fpush();
	484	get_longlong_int(&tmp,info,code);
	485	real_to_real(&tmp,&ST(0));
	486	return(0);
	487	case 0xe6:
	488	put_BCD(PST(0),info,code);
	489	fpop();
	490	return(0);
	491	case 0xe7:
	492	put_longlong_int(PST(0),info,code);
	493	fpop();
	494	return(0);
	495	}
	496	switch (code >> 9) {
	497	case 0:
	498	get_short_real(&tmp,info,code);
	499	break;
	500	case 1:
	501	get_long_int(&tmp,info,code);
	502	break;
	503	case 2:
	504	get_long_real(&tmp,info,code);
	505	break;
	506	case 4:
	507	get_short_int(&tmp,info,code);
	508	}
	509	switch ((code>>3) & 0x27) {
	510	case 0:
	511	fadd(&tmp,PST(0),&tmp);
	512	real_to_real(&tmp,&ST(0));
	513	return(0);
	514	case 1:
	515	fmul(&tmp,PST(0),&tmp);
	516	real_to_real(&tmp,&ST(0));
	517	return(0);
	518	case 2:
	519	fcom(&tmp,PST(0));
	520	return(0);
	521	case 3:
	522	fcom(&tmp,PST(0));
	523	fpop();
	524	return(0);
	525	case 4:
	526	tmp.exponent ^= 0x8000;
	527	fadd(&tmp,PST(0),&tmp);
	528	real_to_real(&tmp,&ST(0));
	529	return(0);
	530	case 5:
	531	ST(0).exponent ^= 0x8000;
	532	fadd(&tmp,PST(0),&tmp);
	533	real_to_real(&tmp,&ST(0));
	534	return(0);
	535	case 6:
	536	fdiv(PST(0),&tmp,&tmp);
	537	real_to_real(&tmp,&ST(0));
	538	return(0);
	539	case 7:
	540	fdiv(&tmp,PST(0),&tmp);
	541	real_to_real(&tmp,&ST(0));
	542	return(0);
	543	}
	544	if ((code & 0x138) == 0x100) {
	545	fpush();
	546	real_to_real(&tmp,&ST(0));
	547	return(0);
	548	}
	549	kprintf("Unknown math-insns: %04x:%08x %04x\n",(u_short)info->tf_cs,
	550	info->tf_eip,code);
	551	math_abort(info,SIGFPE);
	552	}
	553
	554	static void
	555	fpop(void)
	556	{
	557	u_int32_t tmp;
	558
	559	tmp = I387.swd & 0xffffc7ffUL;
	560	I387.swd += 0x00000800;
	561	I387.swd &= 0x00003800;
	562	I387.swd \|= tmp;
	563	}
	564
	565	static void
	566	fpush(void)
	567	{
	568	u_int32_t tmp;
	569
	570	tmp = I387.swd & 0xffffc7ffUL;
	571	I387.swd += 0x00003800;
	572	I387.swd &= 0x00003800;
	573	I387.swd \|= tmp;
	574	}
	575
	576	static void
	577	fxchg(temp_real_unaligned a, temp_real_unaligned b)
	578	{
	579	temp_real_unaligned c;
	580
	581	c = *a;
	582	a = b;
	583	*b = c;
	584	}
	585
	586	static temp_real_unaligned *
	587	__st(int i)
	588	{
	589	i += I387.swd >> 11;
	590	i &= 7;
	591	return (temp_real_unaligned ) (i10 + (char *)(I387.st_space));
	592	}
	593
	594	/*
	595	* linux/kernel/math/ea.c
	596	*
	597	* (C) 1991 Linus Torvalds
	598	*/
	599
	600	/*
	601	* Calculate the effective address.
	602	*/
	603
	604
	605	static int __regoffset[] = {
	606	tEAX, tECX, tEDX, tEBX, tESP, tEBP, tESI, tEDI
	607	};
	608
	609	#define REG(x) (((int *)curthread->td_lwp->lwp_md.md_regs)[__regoffset[(x)]])
	610
	611	static char *
	612	sib(struct trapframe *info, int mod)
	613	{
	614	unsigned char ss,index,base;
	615	int32_t offset = 0;
	616
	617	base = get_fs_byte((char *) info->tf_eip);
	618	info->tf_eip++;
	619	ss = base >> 6;
	620	index = (base >> 3) & 7;
	621	base &= 7;
	622	if (index == 4)
	623	offset = 0;
	624	else
	625	offset = REG(index);
	626	offset <<= ss;
	627	if (mod \|\| base != 5)
	628	offset += REG(base);
	629	if (mod == 1) {
	630	offset += (signed char) get_fs_byte((char *) info->tf_eip);
	631	info->tf_eip++;
	632	} else if (mod == 2 \|\| base == 5) {
	633	offset += (signed) get_fs_long((u_int32_t *) info->tf_eip);
	634	info->tf_eip += 4;
	635	}
	636	I387.foo = offset;
	637	I387.fos = 0x17;
	638	return (char *) offset;
	639	}
	640
	641	static char *
	642	ea(struct trapframe *info, unsigned short code)
	643	{
	644	unsigned char mod,rm;
	645	int32_t *tmp;
	646	int offset = 0;
	647
	648	mod = (code >> 6) & 3;
	649	rm = code & 7;
	650	if (rm == 4 && mod != 3)
	651	return sib(info,mod);
	652	if (rm == 5 && !mod) {
	653	offset = get_fs_long((u_int32_t *) info->tf_eip);
	654	info->tf_eip += 4;
	655	I387.foo = offset;
	656	I387.fos = 0x17;
	657	return (char *) offset;
	658	}
	659	tmp = (int32_t *) &REG(rm);
	660	switch (mod) {
	661	case 0: offset = 0; break;
	662	case 1:
	663	offset = (signed char) get_fs_byte((char *) info->tf_eip);
	664	info->tf_eip++;
	665	break;
	666	case 2:
	667	offset = (signed) get_fs_long((u_int32_t *) info->tf_eip);
	668	info->tf_eip += 4;
	669	break;
	670	#ifdef notyet
	671	case 3:
	672	math_abort(info,1<<(SIGILL-1));
	673	#endif
	674	}
	675	I387.foo = offset;
	676	I387.fos = 0x17;
	677	return offset + (char ) tmp;
	678	}
	679	/*
	680	* linux/kernel/math/get_put.c
	681	*
	682	* (C) 1991 Linus Torvalds
	683	*/
	684
	685	/*
	686	* This file handles all accesses to user memory: getting and putting
	687	* ints/reals/BCD etc. This is the only part that concerns itself with
	688	* other than temporary real format. All other cals are strictly temp_real.
	689	*/
	690
	691	static void
	692	get_short_real(temp_real tmp, struct trapframe info, unsigned short code)
	693	{
	694	char *addr;
	695	short_real sr;
	696
	697	addr = ea(info,code);
	698	sr = get_fs_long((u_int32_t *) addr);
	699	short_to_temp(&sr,tmp);
	700	}
	701
	702	static void
	703	get_long_real(temp_real tmp, struct trapframe info, unsigned short code)
	704	{
	705	char *addr;
	706	long_real lr;
	707
	708	addr = ea(info,code);
	709	lr.a = get_fs_long((u_int32_t *) addr);
	710	lr.b = get_fs_long(1 + (u_int32_t *) addr);
	711	long_to_temp(&lr,tmp);
	712	}
	713
	714	static void
	715	get_temp_real(temp_real tmp, struct trapframe info, unsigned short code)
	716	{
	717	char *addr;
	718
	719	addr = ea(info,code);
	720	tmp->a = get_fs_long((u_int32_t *) addr);
	721	tmp->b = get_fs_long(1 + (u_int32_t *) addr);
	722	tmp->exponent = get_fs_word(4 + (unsigned short *) addr);
	723	}
	724
	725	static void
	726	get_short_int(temp_real tmp, struct trapframe info, unsigned short code)
	727	{
	728	char *addr;
	729	temp_int ti;
	730
	731	addr = ea(info,code);
	732	ti.a = (signed short) get_fs_word((unsigned short *) addr);
	733	ti.b = 0;
	734	if ((ti.sign = (ti.a < 0)) != 0)
	735	ti.a = - ti.a;
	736	int_to_real(&ti,tmp);
	737	}
	738
	739	static void
	740	get_long_int(temp_real tmp, struct trapframe info, unsigned short code)
	741	{
	742	char *addr;
	743	temp_int ti;
	744
	745	addr = ea(info,code);
	746	ti.a = get_fs_long((u_int32_t *) addr);
	747	ti.b = 0;
	748	if ((ti.sign = (ti.a < 0)) != 0)
	749	ti.a = - ti.a;
	750	int_to_real(&ti,tmp);
	751	}
	752
	753	static void
	754	get_longlong_int(temp_real tmp, struct trapframe info, unsigned short code)
	755	{
	756	char *addr;
	757	temp_int ti;
	758
	759	addr = ea(info,code);
	760	ti.a = get_fs_long((u_int32_t *) addr);
	761	ti.b = get_fs_long(1 + (u_int32_t *) addr);
	762	if ((ti.sign = (ti.b < 0)) != 0)
	763	__asm__("notl %0 ; notl %1\n\t"
	764	"addl $1,%0 ; adcl $0,%1"
	765	:"=r" (ti.a),"=r" (ti.b)
	766	:"0" (ti.a),"1" (ti.b));
	767	int_to_real(&ti,tmp);
	768	}
	769
	770	#define MUL10(low,high) \
	771	__asm__("addl %0,%0 ; adcl %1,%1\n\t" \
	772	"movl %0,%%ecx ; movl %1,%%ebx\n\t" \
	773	"addl %0,%0 ; adcl %1,%1\n\t" \
	774	"addl %0,%0 ; adcl %1,%1\n\t" \
	775	"addl %%ecx,%0 ; adcl %%ebx,%1" \
	776	:"=a" (low),"=d" (high) \
	777	:"0" (low),"1" (high):"cx","bx")
	778
	779	#define ADD64(val,low,high) \
	780	__asm__("addl %4,%0 ; adcl $0,%1":"=r" (low),"=r" (high) \
	781	:"0" (low),"1" (high),"r" ((u_int32_t) (val)))
	782
	783	static void
	784	get_BCD(temp_real tmp, struct trapframe info, unsigned short code)
	785	{
	786	int k;
	787	char *addr;
	788	temp_int i;
	789	unsigned char c;
	790
	791	addr = ea(info,code);
	792	addr += 9;
	793	i.sign = 0x80 & get_fs_byte(addr--);
	794	i.a = i.b = 0;
	795	for (k = 0; k < 9; k++) {
	796	c = get_fs_byte(addr--);
	797	MUL10(i.a, i.b);
	798	ADD64((c>>4), i.a, i.b);
	799	MUL10(i.a, i.b);
	800	ADD64((c&0xf), i.a, i.b);
	801	}
	802	int_to_real(&i,tmp);
	803	}
	804
	805	static void
	806	put_short_real(const temp_real *tmp,
	807	struct trapframe *info, unsigned short code)
	808	{
	809	char *addr;
	810	short_real sr;
	811
	812	addr = ea(info,code);
	813	/verify_area(addr,4);/
	814	temp_to_short(tmp,&sr);
	815	put_fs_long(sr,(u_int32_t *) addr);
	816	}
	817
	818	static void
	819	put_long_real(const temp_real *tmp,
	820	struct trapframe *info, unsigned short code)
	821	{
	822	char *addr;
	823	long_real lr;
	824
	825	addr = ea(info,code);
	826	/verify_area(addr,8);/
	827	temp_to_long(tmp,&lr);
	828	put_fs_long(lr.a, (u_int32_t *) addr);
	829	put_fs_long(lr.b, 1 + (u_int32_t *) addr);
	830	}
	831
	832	static void
	833	put_temp_real(const temp_real *tmp,
	834	struct trapframe *info, unsigned short code)
	835	{
	836	char *addr;
	837
	838	addr = ea(info,code);
	839	/verify_area(addr,10);/
	840	put_fs_long(tmp->a, (u_int32_t *) addr);
	841	put_fs_long(tmp->b, 1 + (u_int32_t *) addr);
	842	put_fs_word(tmp->exponent, 4 + (short *) addr);
	843	}
	844
	845	static void
	846	put_short_int(const temp_real *tmp,
	847	struct trapframe *info, unsigned short code)
	848	{
	849	char *addr;
	850	temp_int ti;
	851
	852	addr = ea(info,code);
	853	real_to_int(tmp,&ti);
	854	/verify_area(addr,2);/
	855	if (ti.sign)
	856	ti.a = -ti.a;
	857	put_fs_word(ti.a,(short *) addr);
	858	}
	859
	860	static void
	861	put_long_int(const temp_real *tmp,
	862	struct trapframe *info, unsigned short code)
	863	{
	864	char *addr;
	865	temp_int ti;
	866
	867	addr = ea(info,code);
	868	real_to_int(tmp,&ti);
	869	/verify_area(addr,4);/
	870	if (ti.sign)
	871	ti.a = -ti.a;
	872	put_fs_long(ti.a,(u_int32_t *) addr);
	873	}
	874
	875	static void
	876	put_longlong_int(const temp_real *tmp,
	877	struct trapframe *info, unsigned short code)
	878	{
	879	char *addr;
	880	temp_int ti;
	881
	882	addr = ea(info,code);
	883	real_to_int(tmp,&ti);
	884	/verify_area(addr,8);/
	885	if (ti.sign)
	886	__asm__("notl %0 ; notl %1\n\t"
	887	"addl $1,%0 ; adcl $0,%1"
	888	:"=r" (ti.a),"=r" (ti.b)
	889	:"0" (ti.a),"1" (ti.b));
	890	put_fs_long(ti.a,(u_int32_t *) addr);
	891	put_fs_long(ti.b,1 + (u_int32_t *) addr);
	892	}
	893
	894	#define DIV10(low,high,rem) \
	895	__asm__("divl %6 ; xchgl %1,%2 ; divl %6" \
	896	:"=d" (rem),"=a" (low),"=r" (high) \
	897	:"0" (0),"1" (high),"2" (low),"c" (10))
	898
	899	static void
	900	put_BCD(const temp_real tmp,struct trapframe info, unsigned short code)
	901	{
	902	int k,rem;
	903	char *addr;
	904	temp_int i;
	905	unsigned char c;
	906
	907	addr = ea(info,code);
	908	/verify_area(addr,10);/
	909	real_to_int(tmp,&i);
	910	if (i.sign)
	911	put_fs_byte(0x80, addr+9);
	912	else
	913	put_fs_byte(0, addr+9);
	914	for (k = 0; k < 9; k++) {
	915	DIV10(i.a,i.b,rem);
	916	c = rem;
	917	DIV10(i.a,i.b,rem);
	918	c += rem<<4;
	919	put_fs_byte(c,addr++);
	920	}
	921	}
	922
	923	/*
	924	* linux/kernel/math/mul.c
	925	*
	926	* (C) 1991 Linus Torvalds
	927	*/
	928
	929	/*
	930	* temporary real multiplication routine.
	931	*/
	932
	933
	934	static void
	935	shift(int *c)
	936	{
	937	__asm__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
	938	"movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
	939	"movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
	940	"movl 12(%0),%%eax ; adcl %%eax,12(%0)"
	941	::"r" (c):"ax");
	942	}
	943
	944	static void
	945	mul64(const temp_real a, const temp_real b, int *c)
	946	{
	947	__asm__("movl (%0),%%eax\n\t"
	948	"mull (%1)\n\t"
	949	"movl %%eax,(%2)\n\t"
	950	"movl %%edx,4(%2)\n\t"
	951	"movl 4(%0),%%eax\n\t"
	952	"mull 4(%1)\n\t"
	953	"movl %%eax,8(%2)\n\t"
	954	"movl %%edx,12(%2)\n\t"
	955	"movl (%0),%%eax\n\t"
	956	"mull 4(%1)\n\t"
	957	"addl %%eax,4(%2)\n\t"
	958	"adcl %%edx,8(%2)\n\t"
	959	"adcl $0,12(%2)\n\t"
	960	"movl 4(%0),%%eax\n\t"
	961	"mull (%1)\n\t"
	962	"addl %%eax,4(%2)\n\t"
	963	"adcl %%edx,8(%2)\n\t"
	964	"adcl $0,12(%2)"
	965	::"S" (a),"c" (b),"D" (c)
	966	:"ax","dx");
	967	}
	968
	969	static void
	970	fmul(const temp_real src1, const temp_real src2, temp_real *result)
	971	{
	972	int i,sign;
	973	int tmp[4] = {0,0,0,0};
	974
	975	sign = (src1->exponent ^ src2->exponent) & 0x8000;
	976	i = (src1->exponent & 0x7fff) + (src2->exponent & 0x7fff) - 16383 + 1;
	977	if (i<0) {
	978	result->exponent = sign;
	979	result->a = result->b = 0;
	980	return;
	981	}
	982	if (i>0x7fff) {
	983	set_OE();
	984	return;
	985	}
	986	mul64(src1,src2,tmp);
	987	if (tmp[0] \|\| tmp[1] \|\| tmp[2] \|\| tmp[3])
	988	while (i && tmp[3] >= 0) {
	989	i--;
	990	shift(tmp);
	991	}
	992	else
	993	i = 0;
	994	result->exponent = i \| sign;
	995	result->a = tmp[2];
	996	result->b = tmp[3];
	997	}
	998
	999	/*
	1000	* linux/kernel/math/div.c
	1001	*
	1002	* (C) 1991 Linus Torvalds
	1003	*/
	1004
	1005	/*
	1006	* temporary real division routine.
	1007	*/
	1008
	1009	static void
	1010	shift_left(int *c)
	1011	{
	1012	__asm__ __volatile__("movl (%0),%%eax ; addl %%eax,(%0)\n\t"
	1013	"movl 4(%0),%%eax ; adcl %%eax,4(%0)\n\t"
	1014	"movl 8(%0),%%eax ; adcl %%eax,8(%0)\n\t"
	1015	"movl 12(%0),%%eax ; adcl %%eax,12(%0)"
	1016	::"r" (c):"ax");
	1017	}
	1018
	1019	static void
	1020	shift_right(int *c)
	1021	{
	1022	__asm__("shrl $1,12(%0) ; rcrl $1,8(%0) ; rcrl $1,4(%0) ; rcrl $1,(%0)"
	1023	::"r" (c));
	1024	}
	1025
	1026	static int
	1027	try_sub(int a, int b)
	1028	{
	1029	char ok;
	1030
	1031	__asm__ __volatile__("movl (%1),%%eax ; subl %%eax,(%2)\n\t"
	1032	"movl 4(%1),%%eax ; sbbl %%eax,4(%2)\n\t"
	1033	"movl 8(%1),%%eax ; sbbl %%eax,8(%2)\n\t"
	1034	"movl 12(%1),%%eax ; sbbl %%eax,12(%2)\n\t"
	1035	"setae %%al":"=a" (ok):"c" (a),"d" (b));
	1036	return ok;
	1037	}
	1038
	1039	static void
	1040	div64(int a, int b, int *c)
	1041	{
	1042	int tmp[4];
	1043	int i;
	1044	unsigned int mask = 0;
	1045
	1046	c += 4;
	1047	for (i = 0 ; i<64 ; i++) {
	1048	if (!(mask >>= 1)) {
	1049	c--;
	1050	mask = 0x80000000UL;
	1051	}
	1052	tmp[0] = a[0]; tmp[1] = a[1];
	1053	tmp[2] = a[2]; tmp[3] = a[3];
	1054	if (try_sub(b,tmp)) {
	1055	*c \|= mask;
	1056	a[0] = tmp[0]; a[1] = tmp[1];
	1057	a[2] = tmp[2]; a[3] = tmp[3];
	1058	}
	1059	shift_right(b);
	1060	}
	1061	}
	1062
	1063	static void
	1064	fdiv(const temp_real src1, const temp_real src2, temp_real *result)
	1065	{
	1066	int i,sign;
	1067	int a[4],b[4],tmp[4] = {0,0,0,0};
	1068
	1069	sign = (src1->exponent ^ src2->exponent) & 0x8000;
	1070	if (!(src2->a \|\| src2->b)) {
	1071	set_ZE();
	1072	return;
	1073	}
	1074	i = (src1->exponent & 0x7fff) - (src2->exponent & 0x7fff) + 16383;
	1075	if (i<0) {
	1076	set_UE();
	1077	result->exponent = sign;
	1078	result->a = result->b = 0;
	1079	return;
	1080	}
	1081	a[0] = a[1] = 0;
	1082	a[2] = src1->a;
	1083	a[3] = src1->b;
	1084	b[0] = b[1] = 0;
	1085	b[2] = src2->a;
	1086	b[3] = src2->b;
	1087	while (b[3] >= 0) {
	1088	i++;
	1089	shift_left(b);
	1090	}
	1091	div64(a,b,tmp);
	1092	if (tmp[0] \|\| tmp[1] \|\| tmp[2] \|\| tmp[3]) {
	1093	while (i && tmp[3] >= 0) {
	1094	i--;
	1095	shift_left(tmp);
	1096	}
	1097	if (tmp[3] >= 0)
	1098	set_DE();
	1099	} else
	1100	i = 0;
	1101	if (i>0x7fff) {
	1102	set_OE();
	1103	return;
	1104	}
	1105	if (tmp[0] \|\| tmp[1])
	1106	set_PE();
	1107	result->exponent = i \| sign;
	1108	result->a = tmp[2];
	1109	result->b = tmp[3];
	1110	}
	1111
	1112	/*
	1113	* linux/kernel/math/add.c
	1114	*
	1115	* (C) 1991 Linus Torvalds
	1116	*/
	1117
	1118	/*
	1119	* temporary real addition routine.
	1120	*
	1121	* NOTE! These aren't exact: they are only 62 bits wide, and don't do
	1122	* correct rounding. Fast hack. The reason is that we shift right the
	1123	* values by two, in order not to have overflow (1 bit), and to be able
	1124	* to move the sign into the mantissa (1 bit). Much simpler algorithms,
	1125	* and 62 bits (61 really - no rounding) accuracy is usually enough. The
	1126	* only time you should notice anything weird is when adding 64-bit
	1127	* integers together. When using doubles (52 bits accuracy), the
	1128	* 61-bit accuracy never shows at all.
	1129	*/
	1130
	1131	#define NEGINT(a) \
	1132	__asm__("notl %0 ; notl %1 ; addl $1,%0 ; adcl $0,%1" \
	1133	:"=r" (a->a),"=r" (a->b) \
	1134	:"0" (a->a),"1" (a->b))
	1135
	1136	static void
	1137	signify(temp_real *a)
	1138	{
	1139	a->exponent += 2;
	1140	__asm__("shrdl $2,%1,%0 ; shrl $2,%1"
	1141	:"=r" (a->a),"=r" (a->b)
	1142	:"0" (a->a),"1" (a->b));
	1143	if (a->exponent < 0)
	1144	NEGINT(a);
	1145	a->exponent &= 0x7fff;
	1146	}
	1147
	1148	static void
	1149	unsignify(temp_real *a)
	1150	{
	1151	if (!(a->a \|\| a->b)) {
	1152	a->exponent = 0;
	1153	return;
	1154	}
	1155	a->exponent &= 0x7fff;
	1156	if (a->b < 0) {
	1157	NEGINT(a);
	1158	a->exponent \|= 0x8000;
	1159	}
	1160	while (a->b >= 0) {
	1161	a->exponent--;
	1162	__asm__("addl %0,%0 ; adcl %1,%1"
	1163	:"=r" (a->a),"=r" (a->b)
	1164	:"0" (a->a),"1" (a->b));
	1165	}
	1166	}
	1167
	1168	static void
	1169	fadd(const temp_real src1, const temp_real src2, temp_real *result)
	1170	{
	1171	temp_real a,b;
	1172	int x1,x2,shift;
	1173
	1174	x1 = src1->exponent & 0x7fff;
	1175	x2 = src2->exponent & 0x7fff;
	1176	if (x1 > x2) {
	1177	a = *src1;
	1178	b = *src2;
	1179	shift = x1-x2;
	1180	} else {
	1181	a = *src2;
	1182	b = *src1;
	1183	shift = x2-x1;
	1184	}
	1185	if (shift >= 64) {
	1186	*result = a;
	1187	return;
	1188	}
	1189	if (shift >= 32) {
	1190	b.a = b.b;
	1191	b.b = 0;
	1192	shift -= 32;
	1193	}
	1194	__asm__("shrdl %4,%1,%0 ; shrl %4,%1"
	1195	:"=r" (b.a),"=r" (b.b)
	1196	:"0" (b.a),"1" (b.b),"c" ((char) shift));
	1197	signify(&a);
	1198	signify(&b);
	1199	__asm__("addl %4,%0 ; adcl %5,%1"
	1200	:"=r" (a.a),"=r" (a.b)
	1201	:"0" (a.a),"1" (a.b),"g" (b.a),"g" (b.b));
	1202	unsignify(&a);
	1203	*result = a;
	1204	}
	1205
	1206	/*
	1207	* linux/kernel/math/compare.c
	1208	*
	1209	* (C) 1991 Linus Torvalds
	1210	*/
	1211
	1212	/*
	1213	* temporary real comparison routines
	1214	*/
	1215
	1216
	1217	#define clear_Cx() (I387.swd &= ~0x4500)
	1218
	1219	static void
	1220	normalize(temp_real *a)
	1221	{
	1222	int i = a->exponent & 0x7fff;
	1223	int sign = a->exponent & 0x8000;
	1224
	1225	if (!(a->a \|\| a->b)) {
	1226	a->exponent = 0;
	1227	return;
	1228	}
	1229	while (i && a->b >= 0) {
	1230	i--;
	1231	__asm__("addl %0,%0 ; adcl %1,%1"
	1232	:"=r" (a->a),"=r" (a->b)
	1233	:"0" (a->a),"1" (a->b));
	1234	}
	1235	a->exponent = i \| sign;
	1236	}
	1237
	1238	static void
	1239	ftst(const temp_real *a)
	1240	{
	1241	temp_real b;
	1242
	1243	clear_Cx();
	1244	b = *a;
	1245	normalize(&b);
	1246	if (b.a \|\| b.b \|\| b.exponent) {
	1247	if (b.exponent < 0)
	1248	set_C0();
	1249	} else
	1250	set_C3();
	1251	}
	1252
	1253	static void
	1254	fcom(const temp_real src1, const temp_real src2)
	1255	{
	1256	temp_real a;
	1257
	1258	a = *src1;
	1259	a.exponent ^= 0x8000;
	1260	fadd(&a,src2,&a);
	1261	ftst(&a);
	1262	}
	1263
	1264	static void
	1265	fucom(const temp_real src1, const temp_real src2)
	1266	{
	1267	fcom(src1,src2);
	1268	}
	1269
	1270	/*
	1271	* linux/kernel/math/convert.c
	1272	*
	1273	* (C) 1991 Linus Torvalds
	1274	*/
	1275
	1276
	1277	/*
	1278	* NOTE!!! There is some "non-obvious" optimisations in the temp_to_long
	1279	* and temp_to_short conversion routines: don't touch them if you don't
	1280	* know what's going on. They are the adding of one in the rounding: the
	1281	* overflow bit is also used for adding one into the exponent. Thus it
	1282	* looks like the overflow would be incorrectly handled, but due to the
	1283	* way the IEEE numbers work, things are correct.
	1284	*
	1285	* There is no checking for total overflow in the conversions, though (ie
	1286	* if the temp-real number simply won't fit in a short- or long-real.)
	1287	*/
	1288
	1289	static void
	1290	short_to_temp(const short_real a, temp_real b)
	1291	{
	1292	if (!(*a & 0x7fffffff)) {
	1293	b->a = b->b = 0;
	1294	if (*a)
	1295	b->exponent = 0x8000;
	1296	else
	1297	b->exponent = 0;
	1298	return;
	1299	}
	1300	b->exponent = ((*a>>23) & 0xff)-127+16383;
	1301	if (*a<0)
	1302	b->exponent \|= 0x8000;
	1303	b->b = (*a<<8) \| 0x80000000UL;
	1304	b->a = 0;
	1305	}
	1306
	1307	static void
	1308	long_to_temp(const long_real a, temp_real b)
	1309	{
	1310	if (!a->a && !(a->b & 0x7fffffff)) {
	1311	b->a = b->b = 0;
	1312	if (a->b)
	1313	b->exponent = 0x8000;
	1314	else
	1315	b->exponent = 0;
	1316	return;
	1317	}
	1318	b->exponent = ((a->b >> 20) & 0x7ff)-1023+16383;
	1319	if (a->b<0)
	1320	b->exponent \|= 0x8000;
	1321	b->b = 0x80000000UL \| (a->b<<11) \| (((u_int32_t)a->a)>>21);
	1322	b->a = a->a<<11;
	1323	}
	1324
	1325	static void
	1326	temp_to_short(const temp_real a, short_real b)
	1327	{
	1328	if (!(a->exponent & 0x7fff)) {
	1329	*b = (a->exponent)?0x80000000UL:0;
	1330	return;
	1331	}
	1332	*b = ((((int32_t) a->exponent)-16383+127) << 23) & 0x7f800000;
	1333	if (a->exponent < 0)
	1334	*b \|= 0x80000000UL;
	1335	*b \|= (a->b >> 8) & 0x007fffff;
	1336	switch ((int)ROUNDING) {
	1337	case ROUND_NEAREST:
	1338	if ((a->b & 0xff) > 0x80)
	1339	++*b;
	1340	break;
	1341	case ROUND_DOWN:
	1342	if ((a->exponent & 0x8000) && (a->b & 0xff))
	1343	++*b;
	1344	break;
	1345	case ROUND_UP:
	1346	if (!(a->exponent & 0x8000) && (a->b & 0xff))
	1347	++*b;
	1348	break;
	1349	}
	1350	}
	1351
	1352	static void
	1353	temp_to_long(const temp_real a, long_real b)
	1354	{
	1355	if (!(a->exponent & 0x7fff)) {
	1356	b->a = 0;
	1357	b->b = (a->exponent)?0x80000000UL:0;
	1358	return;
	1359	}
	1360	b->b = (((0x7fff & (int32_t) a->exponent)-16383+1023) << 20) &
	1361	0x7ff00000;
	1362	if (a->exponent < 0)
	1363	b->b \|= 0x80000000UL;
	1364	b->b \|= (a->b >> 11) & 0x000fffff;
	1365	b->a = a->b << 21;
	1366	b->a \|= (a->a >> 11) & 0x001fffff;
	1367	switch ((int)ROUNDING) {
	1368	case ROUND_NEAREST:
	1369	if ((a->a & 0x7ff) > 0x400)
	1370	__asm__("addl $1,%0 ; adcl $0,%1"
	1371	:"=r" (b->a),"=r" (b->b)
	1372	:"0" (b->a),"1" (b->b));
	1373	break;
	1374	case ROUND_DOWN:
	1375	if ((a->exponent & 0x8000) && (a->b & 0xff))
	1376	__asm__("addl $1,%0 ; adcl $0,%1"
	1377	:"=r" (b->a),"=r" (b->b)
	1378	:"0" (b->a),"1" (b->b));
	1379	break;
	1380	case ROUND_UP:
	1381	if (!(a->exponent & 0x8000) && (a->b & 0xff))
	1382	__asm__("addl $1,%0 ; adcl $0,%1"
	1383	:"=r" (b->a),"=r" (b->b)
	1384	:"0" (b->a),"1" (b->b));
	1385	break;
	1386	}
	1387	}
	1388
	1389	static void
	1390	frndint(const temp_real a, temp_real b)
	1391	{
	1392	int shift = 16383 + 63 - (a->exponent & 0x7fff);
	1393	u_int32_t underflow;
	1394
	1395	if ((shift < 0) \|\| (shift == 16383+63)) {
	1396	b = a;
	1397	return;
	1398	}
	1399	b->a = b->b = underflow = 0;
	1400	b->exponent = a->exponent;
	1401	if (shift < 32) {
	1402	b->b = a->b; b->a = a->a;
	1403	} else if (shift < 64) {
	1404	b->a = a->b; underflow = a->a;
	1405	shift -= 32;
	1406	b->exponent += 32;
	1407	} else if (shift < 96) {
	1408	underflow = a->b;
	1409	shift -= 64;
	1410	b->exponent += 64;
	1411	} else {
	1412	underflow = 1;
	1413	shift = 0;
	1414	}
	1415	b->exponent += shift;
	1416	__asm__("shrdl %2,%1,%0"
	1417	:"=r" (underflow),"=r" (b->a)
	1418	:"c" ((char) shift),"0" (underflow),"1" (b->a));
	1419	__asm__("shrdl %2,%1,%0"
	1420	:"=r" (b->a),"=r" (b->b)
	1421	:"c" ((char) shift),"0" (b->a),"1" (b->b));
	1422	__asm__("shrl %1,%0"
	1423	:"=r" (b->b)
	1424	:"c" ((char) shift),"0" (b->b));
	1425	switch ((int)ROUNDING) {
	1426	case ROUND_NEAREST:
	1427	__asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
	1428	:"=r" (b->a),"=r" (b->b)
	1429	:"0" (b->a),"1" (b->b)
	1430	,"r" (0x7fffffff + (b->a & 1))
	1431	,"m" (*&underflow));
	1432	break;
	1433	case ROUND_UP:
	1434	if ((b->exponent >= 0) && underflow)
	1435	__asm__("addl $1,%0 ; adcl $0,%1"
	1436	:"=r" (b->a),"=r" (b->b)
	1437	:"0" (b->a),"1" (b->b));
	1438	break;
	1439	case ROUND_DOWN:
	1440	if ((b->exponent < 0) && underflow)
	1441	__asm__("addl $1,%0 ; adcl $0,%1"
	1442	:"=r" (b->a),"=r" (b->b)
	1443	:"0" (b->a),"1" (b->b));
	1444	break;
	1445	}
	1446	if (b->a \|\| b->b)
	1447	while (b->b >= 0) {
	1448	b->exponent--;
	1449	__asm__("addl %0,%0 ; adcl %1,%1"
	1450	:"=r" (b->a),"=r" (b->b)
	1451	:"0" (b->a),"1" (b->b));
	1452	}
	1453	else
	1454	b->exponent = 0;
	1455	}
	1456
	1457	static void
	1458	Fscale(const temp_real a, const temp_real b, temp_real *c)
	1459	{
	1460	temp_int ti;
	1461
	1462	c = a;
	1463	if(!c->a && !c->b) { /* 19 Sep 92*/
	1464	c->exponent = 0;
	1465	return;
	1466	}
	1467	real_to_int(b, &ti);
	1468	if(ti.sign)
	1469	c->exponent -= ti.a;
	1470	else
	1471	c->exponent += ti.a;
	1472	}
	1473
	1474	static void
	1475	real_to_int(const temp_real a, temp_int b)
	1476	{
	1477	int shift = 16383 + 63 - (a->exponent & 0x7fff);
	1478	u_int32_t underflow;
	1479
	1480	b->a = b->b = underflow = 0;
	1481	b->sign = (a->exponent < 0);
	1482	if (shift < 0) {
	1483	set_OE();
	1484	return;
	1485	}
	1486	if (shift < 32) {
	1487	b->b = a->b; b->a = a->a;
	1488	} else if (shift < 64) {
	1489	b->a = a->b; underflow = a->a;
	1490	shift -= 32;
	1491	} else if (shift < 96) {
	1492	underflow = a->b;
	1493	shift -= 64;
	1494	} else {
	1495	underflow = 1;
	1496	shift = 0;
	1497	}
	1498	__asm__("shrdl %2,%1,%0"
	1499	:"=r" (underflow),"=r" (b->a)
	1500	:"c" ((char) shift),"0" (underflow),"1" (b->a));
	1501	__asm__("shrdl %2,%1,%0"
	1502	:"=r" (b->a),"=r" (b->b)
	1503	:"c" ((char) shift),"0" (b->a),"1" (b->b));
	1504	__asm__("shrl %1,%0"
	1505	:"=r" (b->b)
	1506	:"c" ((char) shift),"0" (b->b));
	1507	switch ((int)ROUNDING) {
	1508	case ROUND_NEAREST:
	1509	__asm__("addl %4,%5 ; adcl $0,%0 ; adcl $0,%1"
	1510	:"=r" (b->a),"=r" (b->b)
	1511	:"0" (b->a),"1" (b->b)
	1512	,"r" (0x7fffffff + (b->a & 1))
	1513	,"m" (*&underflow));
	1514	break;
	1515	case ROUND_UP:
	1516	if (!b->sign && underflow)
	1517	__asm__("addl $1,%0 ; adcl $0,%1"
	1518	:"=r" (b->a),"=r" (b->b)
	1519	:"0" (b->a),"1" (b->b));
	1520	break;
	1521	case ROUND_DOWN:
	1522	if (b->sign && underflow)
	1523	__asm__("addl $1,%0 ; adcl $0,%1"
	1524	:"=r" (b->a),"=r" (b->b)
	1525	:"0" (b->a),"1" (b->b));
	1526	break;
	1527	}
	1528	}
	1529
	1530	static void
	1531	int_to_real(const temp_int a, temp_real b)
	1532	{
	1533	b->a = a->a;
	1534	b->b = a->b;
	1535	if (b->a \|\| b->b)
	1536	b->exponent = 16383 + 63 + (a->sign? 0x8000:0);
	1537	else {
	1538	b->exponent = 0;
	1539	return;
	1540	}
	1541	while (b->b >= 0) {
	1542	b->exponent--;
	1543	__asm__("addl %0,%0 ; adcl %1,%1"
	1544	:"=r" (b->a),"=r" (b->b)
	1545	:"0" (b->a),"1" (b->b));
	1546	}
	1547	}
	1548
	1549	static int
	1550	fpu_modevent(module_t mod, int type, void *unused)
	1551	{
	1552	switch (type) {
	1553	case MOD_LOAD:
	1554	if (pmath_emulate) {
	1555	kprintf("Another Math emulator already present\n");
	1556	return EBUSY;
	1557	}
	1558	pmath_emulate = math_emulate;
	1559	if (bootverbose)
	1560	kprintf("Math emulator present\n");
	1561	break;
	1562	case MOD_UNLOAD:
	1563	if (pmath_emulate != math_emulate) {
	1564	kprintf("Cannot unload another math emulator\n");
	1565	return EACCES;
	1566	}
	1567	pmath_emulate = 0;
	1568	if (bootverbose)
	1569	kprintf("Math emulator unloaded\n");
	1570	break;
	1571	default:
	1572	break;
	1573	}
	1574	return 0;
	1575	}
	1576	static moduledata_t fpumod = {
	1577	"fpu",
	1578	fpu_modevent,
	1579	0
	1580	};
	1581	DECLARE_MODULE(fpu, fpumod, SI_SUB_DRIVERS, SI_ORDER_ANY);