/* * INSN.C * * (c)Copyright 2016, Matthew Dillon, All Rights Reserved. See the * COPYRIGHT file at the base of the distribution. * * Emit the instruction as x86 assembly. If the EA/argument form * is not compatible we make it compatible. */ #include "defs.h" #include "insnx86.h" static void doInsnRM2NoReadDst(RASParser *p, rinsn_t *rin, const char *x86op, uint8_t extov); static void doInsnMR2NoReadDst(RASParser *p, rinsn_t *rin, const char *x86op, uint8_t extov); static void doInsnRMMR3(RASParser *p, rinsn_t *rin, const char *x86op); static void doInsnMR3(RASParser *p, rinsn_t *rin, const char *x86op, int nodrd); static void doInsn1R(RASParser *p, rinsn_t *rin, const char *x86op1, const char *x86op2, const char *x86opt2arg); static void doInsnSHIFT3(RASParser *p, rinsn_t *rin, const char *x86op); static void doInsnDXAX3(RASParser *p, rinsn_t *rin, const char *x86op, int issigned, int dxresult); static int doInsnFloating3(RASParser *p, rinsn_t *rin, const char *x86op, int isfcmp); static void doMOVE(RASParser *p, rinsn_t *rin, rea_t *sea, rea_t *dea); static void doMOVEExt(RASParser *p, rinsn_t *rin, rea_t *sea, rea_t *dea, uint8_t extov); static void doLEA(RASParser *p, rea_t *sea, rea_t *dea); static void doSpecialSave(rinsn_t *rin); static void doSpecialRestore(rinsn_t *rin); static int InsnSimplifyRMMRCMP(RASParser *p, rinsn_t *rin, rea_t **sea1p, rea_t **sea2p); static int InsnSimplifyMRFCMP(RASParser *p, rinsn_t *rin, rea_t **sea1p, rea_t **sea2p); static rea_t *InsnSimplifyFP128(RASParser *p, rinsn_t *rin, rea_t *sea, rea_t *tea, uint8_t ext, int rspoff, int loadme); static void printINSN(rinsn_t *rin, const char *x86op, rea_t *sea, rea_t *dea, uint8_t extov); static void printEA(rea_t *rea, uint8_t extov); static void printREGTGT(uint16_t target_reg, uint32_t regno, uint8_t ext); static void printREG(rea_t *rea, uint8_t ext); static void printBRLabel(rsym_t *label); static const char *x86ext(char c, uint8_t argflags); static int findfreebit(uint32_t regno, uint64_t mask); #if 0 static void clearregbit(uint64_t *mask, rea_t *rea); #endif static const char *x86branch(uint32_t op, int invert, int reverse); static int sameEA(rea_t *rea1, rea_t *rea2); static int adjacentLabel(rinsn_t *rin, rsym_t *sym); static rinsn_t *allocInsnBlock(RASParser *p, rblock_t *rblock, uint32_t op, uint8_t ext, int args); static void initEA(rea_t *rea, uint8_t eamode, uint32_t regno, uint16_t target_reg); static void RegAllocatorX86(RASParser *p, rblock_t *rblock, runesize_t stacksize); static void RegAllocatorScan(RASParser *p, rblock_t *rblock); static void RegAllocatorClear(RASParser *p, rblock_t *rblock); static uint8_t ExtSize; static char X86Size; static uint64_t SaveMask; static int RegAllocWeight; static runesize_t ExtraStackBase; static runesize_t ExtraStackSpace; /* * Global initialization (occurs after initial symbol load) */ void InsnTargetInit(void) { ExtSize = (sizeof(runesize_t) == 4) ? REXT_I32 : REXT_I64; X86Size = x86ext(ExtSize, 0)[0]; RegAllocator = RegAllocatorX86; } /* * Target-specific adjustments for this assembly emitter. Called during * initial parsing, prior to any characterization or optimization. * * Specify registers which get clobbered by certain instructions. Note * that most procedure calls will clobber a *lot* of registers, including * all the %xmm* registers (if we want to be compatible with C anyhow). * * Our register optimizer is capable of saving/restoring some scratch * registers around procedure calls, see X86_REGF_SPECIAL_SAVE. */ void InsnTargetAdjust(RASParser *p, rinsn_t *rin) { uint64_t raf; rea_t *rea1; rea_t *rea2; rea_t *rea3; rea_t *rea4; /* * This is a bit of a brute-force approach, but the registerizer * has to know before we actually start emitting instructions. */ if (rin->op & INSNF_FLOAT) { /* * FP instructions may need to use XMM0 and/or XMM1 as * temporaries to fix incompatible EAs. Generally speaking * all operands for 128-bit FP instructions set ADDRUSED * to try to prevent caching in %xmm registers because we * have to use memory ops for 80/128-bit FP operations. * * We have a bit of sophistication here to allow conditionals * with boolean results to cache the boolean result. */ raf = X86_REGF_XMM0 | X86_REGF_XMM1; if (rin->ext1 == REXT_I128) { rea1 = rin->arg1.rea; rea2 = rin->arg2.rea; rea3 = rin->arg3.rea; if (rea1) { rea1->flags |= REAF_ADDRUSED; /*this messes up matching */ /*rea1->flags &= ~REAF_CACHEABLE;*/ } if (rin->op & INSNF_COND) { if (rin->operands > 2 || (rin->operands == 2 && (rin->flags & RINSF_BRANCH))) { rea2->flags |= REAF_ADDRUSED; /*this messes up matching */ /*rea2->flags &= ~REAF_CACHEABLE;*/ } if (rin->operands > 3 || (rin->operands == 3 && (rin->flags & RINSF_BRANCH))) { rea3->flags |= REAF_ADDRUSED; /*this messes up matching */ /*rea3->flags &= ~REAF_CACHEABLE;*/ } } else { if (rea2) { rea2->flags |= REAF_ADDRUSED; /*this messes up matching */ /*rea2->flags &= ~REAF_CACHEABLE;*/ } if (rea3) { rea3->flags |= REAF_ADDRUSED; /*this messes up matching */ /*rea3->flags &= ~REAF_CACHEABLE;*/ } } rea4 = rin->arg4.rea; if (rea4) { rea4->flags |= REAF_ADDRUSED; /*rea4->flags &= ~REAF_CACHEABLE;*/ } } } else { /* * Integer instructions might need to use RCX as a temporary * to fix incompatible EAs. Other requirements are switched * on below. */ raf = X86_REGF_RCX; } switch(rin->op) { case INSN_BZERO: /* * For stosb */ raf |= X86_REGF_RAX; raf |= X86_REGF_RCX | X86_REGF_RDI; break; case INSN_BCOPY: /* * For movsb */ raf |= X86_REGF_RCX | X86_REGF_RSI | X86_REGF_RDI; break; case INSN_MULU: case INSN_MULS: case INSN_DIVU: case INSN_DIVS: case INSN_MODU: case INSN_MODS: /* * We may need/clobber these regs, depending. */ raf |= X86_REGF_RAX | X86_REGF_RDX; break; case INSN_QCALL: /* * System interface call */ raf |= X86_REGF_RAX; raf |= X86_STDF_ARG1 | X86_STDF_ARG2 | X86_STDF_ARG3; raf |= X86_REGF_CALLSCR_STD; break; case INSN_RCALL: raf |= X86_REGF_RAX; raf |= X86_RUNTIMEF_ARG1 | X86_RUNTIMEF_ARG2 | X86_RUNTIMEF_ARG3; raf |= X86_REGF_CALLSCR_RUNTIME; break; case INSN_CALL: case INSN_TCALL: /* * Call arguments %ap:RSI %rp:RDI [%sg:RDX] (linux/bsd) * Call arguments %ap:RCX %rp:RDX [%sg:R8] (ms) */ raf |= X86_REGF_RAX; raf |= X86_RUNEF_ARG1 | X86_RUNEF_ARG2 | X86_RUNEF_ARG3; raf |= X86_REGF_CALLSCR_RUNE; break; case INSN_ASL: case INSN_ASR: case INSN_LSR: raf |= X86_REGF_RAX | X86_REGF_RCX; break; case INSN_PCOPY: case INSN_PCHECK: raf |= X86_REGF_CALLSCR_RUNTIME; break; case INSN_DET: raf |= X86_REGF_CALLSCR_RUNTIME; break; case INSN_PGET: case INSN_PGETH: case INSN_PPUT: case INSN_PPUTH: case INSN_PREF: case INSN_PREL: case INSN_PLOCK: case INSN_PLOCKH: case INSN_PUNLOCK: case INSN_PUNLOCKH: case INSN_PATOM: case INSN_PIGET: case INSN_PIGETH: case INSN_STKIREF: case INSN_STKIGET: case INSN_STKIGETH: case INSN_SRSGET: case INSN_SRSGETH: case INSN_SRSPUT: case INSN_SRSPUTH: case INSN_LVALLOC: case INSN_IGET: case INSN_IGETH: case INSN_IPUT: case INSN_IPUTH: case INSN_IREF: case INSN_IREL: case INSN_ILOCK: case INSN_IUNLOCK: case INSN_INEW: case INSN_IATOM: raf |= X86_REGF_RAX; raf |= X86_REGF_CALLSCR_RUNTIME; break; case INSN_CMPTYPE: case INSN_BND_TRAP: case INSN_TSCHED: raf |= X86_REGF_CALLSCR_RUNTIME; break; case INSN_UITOF: case INSN_SITOF: case INSN_FTOUI: case INSN_FTOSI: case INSN_CASTF: /* * Misc FP conversions might use these three registers. */ raf = X86_REGF_XMM0 | X86_REGF_XMM1 | X86_REGF_RCX; break; default: break; } rin->regused_init |= raf; } /* * We need to formally add some instructions to the basic block to * access the procedure arguments (%ap, %rp, and potentially %sg). This * allows us to use the register allocator to optimize which regs they * go into. * * These instructions are set up to make sure that the register allocator * does not blow away the arguments before we've had a chance to load * them into the proper registers. We must also ensure that the source * and destination EAs never match which we do by setting cache_id. */ void InsnProcedureBasicBlock(RASParser *p, rblock_t *rblock, runesize_t bytes, runesize_t align) { rinsn_t *rin; rin = allocInsnBlock(p, rblock, INSN_MOVE, ExtSize, 2); initEA(rin->arg1.rea, EA_DIRECT, REG_AP, X86_RUNE_ARG1); initEA(rin->arg2.rea, EA_DIRECT, REG_AP, 0); rin->arg2.flags |= RAF_WRITE; rin->arg1.rea->cache_id = 1; rin->regused_init |= X86_RUNEF_ARG2; rin->regused_init |= X86_RUNEF_ARG3; rin = allocInsnBlock(p, rblock, INSN_MOVE, ExtSize, 2); initEA(rin->arg1.rea, EA_DIRECT, REG_RP, X86_RUNE_ARG2); initEA(rin->arg2.rea, EA_DIRECT, REG_RP, 0); rin->arg2.flags |= RAF_WRITE; rin->arg1.rea->cache_id = 1; rin->regused_init |= X86_RUNEF_ARG3; rin = allocInsnBlock(p, rblock, INSN_MOVE, ExtSize, 2); initEA(rin->arg1.rea, EA_DIRECT, REG_SG, X86_RUNE_ARG3); initEA(rin->arg2.rea, EA_DIRECT, REG_SG, 0); rin->arg2.flags |= RAF_WRITE; rin->arg1.rea->cache_id = 1; } /* * Output prologue */ void InsnProcedureStart(RASParser *p, runesize_t bytes, runesize_t align) { static rea_t xrsp; rsym_t *psym = p->psym; int count; /* total bytes pushed on stack */ int fcount; /* floating point subsection */ int i; printf("\n"); printf("\t# PROC %s, %jd, %jd\n", psym->id, (intmax_t)bytes, (intmax_t)align); printf("\t.text\n"); printf("\t.globl\t%s\n", psym->id + 1); /* skip the '@' */ printf("\t.type\t%s, @function\n", psym->id + 1); printf("%s:\n", psym->id + 1); printf("\t.cfi_startproc\n"); count = 0; for (i = 0; i < (X86_REG_XMM0 & ~X86_SIZE_UNSPEC); ++i) { if ((SaveMask & (1LLU << i)) == 0) continue; if (X86_REGF_CALLSAVE_RUNE & (1LLU << i)) { printf("\tpush%c\t", X86Size); printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize); printf("\n"); count += sizeof(void *); /* do not bump bytes, not included */ } } fcount = 0; for (i = (X86_REG_XMM0 & ~X86_SIZE_UNSPEC); i < 64; ++i) { if ((SaveMask & (1LLU << i)) == 0) continue; if (X86_REGF_CALLSAVE_RUNE & (1LLU << i)) ++fcount; } bytes += fcount * 16; /* (temporary) */ if (fcount && align < 16) align = 16; /* * We may need extra stack space for ABI calls */ if (ExtraStackSpace) { if (align < 16) align = 16; bytes = (bytes + 15) & ~15; bytes += ExtraStackSpace; } if (bytes) { size_t pad; if (align < 16) align = 16; if (bytes < align) bytes = align; bytes = (bytes + align - 1) & ~(align - 1); /* * 16-byte align our pushes + %rip to ensure that the * resulting stack pointer is fully aligned. */ count += sizeof(void *); /* include %rip */ pad = ((count + 15) & ~15) - count; /* 16-byte align */ bytes += pad; xrsp.target_reg = X86_REG_RSP; xrsp.eamode = EA_DIRECT; printf("\tsub%c\t$%jd, ", X86Size, (intmax_t)bytes); printEA(&xrsp, ExtSize); printf("\n"); count += bytes; bytes -= pad; /* undo */ ProcStackPad = pad; } else { ProcStackPad = 0; } /* * Save FP registers past the nominal stack and extra space. */ bytes -= fcount * 16; /* undo */ fcount = 0; for (i = (X86_REG_XMM0 & ~X86_SIZE_UNSPEC); i < 64; ++i) { if ((SaveMask & (1LLU << i)) == 0) continue; if (X86_REGF_CALLSAVE_RUNE & (1LLU << i)) { printf("\tmovaps\t"); printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize); printf(", %zd(%%rsp)\n", bytes + fcount * 16); ++fcount; } } /* * count is the whole frame * * ProcStackSize is just the base variable and extra space, and does * not count the register save area, pad, or %rip. */ printf("\t.cfi_def_cfa_offset\t%jd\n", (intmax_t)(count)); ProcStackSize = bytes; } void InsnProcedureEnd(RASParser *p, runesize_t bytes, runesize_t align) { static rea_t xrsp; int fcount; int i; /* * Restore FP regs after the variable space */ fcount = 0; for (i = 32; i < 64; ++i) { if ((SaveMask & (1LLU << i)) == 0) continue; if (X86_REGF_CALLSAVE_RUNE & (1LLU << i)) { printf("\tmovaps\t"); printf("%zd(%%rsp), ", ProcStackSize + fcount * 16); printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize); printf("\n"); ++fcount; } } /* * Include the FP and %rip save space in the %rsp restore */ bytes = ProcStackSize + fcount * 16 + ProcStackPad; if (bytes) { xrsp.target_reg = X86_REG_RSP; xrsp.eamode = EA_DIRECT; printf("\tadd%c\t$%zd, ", X86Size, bytes); printEA(&xrsp, ExtSize); printf("\n"); } /* * Use pop for normal registers */ for (i = 31; i >= 0; --i) { if ((SaveMask & (1LLU << i)) == 0) continue; if (X86_REGF_CALLSAVE_RUNE & (1LLU << i)) { printf("\tpop%c\t", X86Size); printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize); printf("\n"); } } printf("\tret%c\n", X86Size); printf("\t.cfi_endproc\n"); } void InsnLABEL(RASParser *p, rinsn_t *rin) { printf(".L%d%s:\n", ProcNo, rin->label->id); } void InsnMOVE(RASParser *p, rinsn_t *rin) { if (rin->arg1.rea->eamode == EA_MEMORY && rin->arg2.rea->eamode == EA_MEMORY && rin->ext1 == REXT_I128) { rin->arg1.flags |= RAF_FLOAT; rin->arg2.flags |= RAF_FLOAT; } if (rin->arg1.rea->eamode == EA_MEMORY) doInsnMR2NoReadDst(p, rin, "mov", 0); else doInsnRM2NoReadDst(p, rin, "mov", 0); } void InsnADD(RASParser *p, rinsn_t *rin) { doInsnRMMR3(p, rin, "add"); } void InsnSUB(RASParser *p, rinsn_t *rin) { doInsnRMMR3(p, rin, "sub"); } void InsnAND(RASParser *p, rinsn_t *rin) { doInsnRMMR3(p, rin, "and"); } void InsnOR(RASParser *p, rinsn_t *rin) { doInsnRMMR3(p, rin, "or"); } void InsnXOR(RASParser *p, rinsn_t *rin) { doInsnRMMR3(p, rin, "xor"); } void InsnNOT(RASParser *p, rinsn_t *rin) { static rea_t xrcx; if (rin->operands == 1 || rin->ext1 == rin->ext2) { if (rin->ext1 == REXT_I8) { printf("\tcmp%s\t$0, ", x86ext(rin->ext1, 0)); printEA(rin->arg1.rea, rin->ext1); printf("\n"); printf("\tsete\t"); if (rin->operands == 1) printEA(rin->arg1.rea, rin->ext1); else printEA(rin->arg2.rea, rin->ext1); } else { xrcx.target_reg = X86_REG_RCX; xrcx.eamode = EA_DIRECT; printf("\txorl\t%%ecx, %%ecx\n"); printf("\tcmp%s\t$0, ", x86ext(rin->ext1, 0)); printEA(rin->arg1.rea, rin->ext1); printf("\n"); printf("\tsete\t%%cl\n"); if (rin->operands == 1) doMOVE(p, rin, &xrcx, rin->arg1.rea); else doMOVE(p, rin, &xrcx, rin->arg2.rea); } printf("\n"); } else { dassert(rin->operands == 2); printf("\tcmp%s\t$0, ", x86ext(rin->ext1, 0)); printEA(rin->arg1.rea, rin->ext1); printf("\n"); printf("\tsete\t"); printEA(rin->arg2.rea, rin->ext2); printf("\n"); } } void InsnCOM(RASParser *p, rinsn_t *rin) { doInsn1R(p, rin, "not", "xor", "$-1"); } void InsnNEG(RASParser *p, rinsn_t *rin) { doInsn1R(p, rin, "neg", NULL, NULL); } void InsnPOS(RASParser *p, rinsn_t *rin) { InsnMOVE(p, rin); } void InsnASL(RASParser *p, rinsn_t *rin) { doInsnSHIFT3(p, rin, "sal"); } void InsnASR(RASParser *p, rinsn_t *rin) { doInsnSHIFT3(p, rin, "sar"); } void InsnLSR(RASParser *p, rinsn_t *rin) { doInsnSHIFT3(p, rin, "shr"); } void InsnADDC(RASParser *p, rinsn_t *rin) { dpanic("ADDC not supported"); } void InsnSUBC(RASParser *p, rinsn_t *rin) { dpanic("SUBC not supported"); } void InsnMULU(RASParser *p, rinsn_t *rin) { doInsnDXAX3(p, rin, "mul", 0, 0); /*doInsnMR3(p, rin, "imul");*/ /* imul work for signed? */ } void InsnMULS(RASParser *p, rinsn_t *rin) { doInsnMR3(p, rin, "imul", 0); } /* * WARNING: Intermediate value in %rdx:%rax */ void InsnDIVU(RASParser *p, rinsn_t *rin) { doInsnDXAX3(p, rin, "div", 0, 0); } /* * WARNING: Intermediate value in %rdx:%rax */ void InsnDIVS(RASParser *p, rinsn_t *rin) { doInsnDXAX3(p, rin, "idiv", 1, 0); } void InsnMODU(RASParser *p, rinsn_t *rin) { doInsnDXAX3(p, rin, "div", 0, 1); /* result in %*dx */ } void InsnMODS(RASParser *p, rinsn_t *rin) { doInsnDXAX3(p, rin, "idiv", 1, 1); /* result in %*dx */ } void InsnINC(RASParser *p, rinsn_t *rin) { doInsn1R(p, rin, "inc", "add", "$1"); } void InsnDEC(RASParser *p, rinsn_t *rin) { doInsn1R(p, rin, "dec", "sub", "$1"); } void InsnCMP(RASParser *p, rinsn_t *rin) { rea_t *sea1; rea_t *sea2; int rev; /* * Issue comparison */ rev = InsnSimplifyRMMRCMP(p, rin, &sea1, &sea2); printf("\tcmp%s\t", x86ext(rin->ext1, 0)); printEA(sea1, rin->ext1); printf(", "); printEA(sea2, rin->ext1); printf("\n"); if (rin->flags & RINSF_BRANCH) { /* * Compare and branch. Invert sense if we can optimize * the jmp. */ if (adjacentLabel(rin, rin->brtrue)) { /* * next insn matches true path, invert and branch * on-false. */ printf("\tj%s\t", x86branch(rin->op, 1, rev)); printBRLabel(rin->brfalse); printf("\n"); } else if (adjacentLabel(rin, rin->brfalse)) { /* * next insn matches false path, do not invert * and branch on-true. */ printf("\tj%s\t", x86branch(rin->op, 0, rev)); printBRLabel(rin->brtrue); printf("\n"); } else { /* * next insn does not match either path. */ printf("\tj%s\t", x86branch(rin->op, 0, rev)); printBRLabel(rin->brtrue); printf("\n"); printf("\tjmp\t"); printBRLabel(rin->brfalse); printf("\n"); } } else { /* * Compare and set result */ printf("\tset%s\t", x86branch(rin->op, 0, rev)); printEA(rin->arg3.rea, REXT_I8); printf("\n"); } } void InsnMOVEA(RASParser *p, rinsn_t *rin) { InsnMOVE(p, rin); } void InsnADDA(RASParser *p, rinsn_t *rin) { InsnADD(p, rin); } void InsnSUBA(RASParser *p, rinsn_t *rin) { InsnSUB(p, rin); } void InsnADDAU(RASParser *p, rinsn_t *rin) { InsnADD(p, rin); } void InsnSUBAU(RASParser *p, rinsn_t *rin) { InsnSUB(p, rin); } void InsnSUBAA(RASParser *p, rinsn_t *rin) { InsnSUB(p, rin); } void InsnLEA(RASParser *p, rinsn_t *rin) { rea_t *rea1 = rin->arg1.rea; rea_t *rea2 = rin->arg2.rea; if (rea1->eamode != EA_MEMORY) { doInsnRM2NoReadDst(p, rin, "mov", 0); } else if (rea1->eamode == EA_MEMORY && rea1->sym == NULL && rea1->direct->target_reg && rea1->offset == 0) { doMOVE(p, rin, rea1->direct, rea2); } else { doInsnMR2NoReadDst(p, rin, "lea", ExtSize); } } /* * BCOPY.align bytes,sea,dea (non-overlapping guaranteed) */ void InsnBCOPY(RASParser *p, rinsn_t *rin) { static rea_t xrdi; static rea_t xrsi; static rea_t xrcx; xrdi.target_reg = X86_REG_RDI; xrdi.eamode = EA_DIRECT; xrsi.target_reg = X86_REG_RSI; xrsi.eamode = EA_DIRECT; xrcx.target_reg = X86_REG_RCX; xrcx.eamode = EA_DIRECT; printf("\tcld\n"); doMOVEExt(p, rin, rin->arg1.rea, &xrcx, ExtSize); doLEA(p, rin->arg2.rea, &xrsi); doLEA(p, rin->arg3.rea, &xrdi); printf("\trep\n"); printf("\tmovsb\n"); } /* * BZERO.align bytes,dea */ void InsnBZERO(RASParser *p, rinsn_t *rin) { static rea_t xrax __unused; static rea_t xrdi; static rea_t xrcx; static rea_t xmm0 __unused; rea_t rea2; runesize_t value __unused; xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; xrdi.target_reg = X86_REG_RDI; xrdi.eamode = EA_DIRECT; xrcx.target_reg = X86_REG_RCX; xrcx.eamode = EA_DIRECT; xmm0.target_reg = X86_REG_XMM0; xmm0.eamode = EA_DIRECT; if (rin->arg1.rea->eamode == EA_IMMEDIATE) value = rin->arg1.rea->immlo; else value = 0; /* retain zerod register optimizations for mixed BZEROs */ p->opt_flags |= p->opt_last & (RASOPT_RAX_ZERO | RASOPT_XMM0_ZERO); rea2 = *rin->arg2.rea; if (rin->ext2 == REXT_I8 && value <= 4) { dassert(value == 1 || rea2.eamode == EA_MEMORY); if (p->opt_last & RASOPT_RAX_ZERO) printf("\t# xorl\t%%eax, %%eax\n"); else printf("\txorl\t%%eax, %%eax\n"); p->opt_flags |= RASOPT_RAX_ZERO; while (value > 0) { doMOVEExt(p, rin, &xrax, &rea2, rin->ext2); rea2.offset += 2; value -= 2; } } else if (rin->ext2 == REXT_I16 && (value & 1) == 0 && value <= 8) { dassert(value == 2 || rea2.eamode == EA_MEMORY); if (p->opt_last & RASOPT_RAX_ZERO) printf("\t# xorl\t%%eax, %%eax\n"); else printf("\txorl\t%%eax, %%eax\n"); p->opt_flags |= RASOPT_RAX_ZERO; while (value > 0) { doMOVEExt(p, rin, &xrax, &rea2, rin->ext2); rea2.offset += 2; value -= 2; } } else if (rin->ext2 == REXT_I32 && (value & 3) == 0 && value <= 16) { dassert(value == 4 || rea2.eamode == EA_MEMORY); if (p->opt_last & RASOPT_RAX_ZERO) printf("\t# xorl\t%%eax, %%eax\n"); else printf("\txorl\t%%eax, %%eax\n"); p->opt_flags |= RASOPT_RAX_ZERO; while (value > 0) { doMOVEExt(p, rin, &xrax, &rea2, rin->ext2); rea2.offset += 4; value -= 4; } } else if (rin->ext2 == REXT_I64 && (value & 7) == 0 && value <= 40 && ExtSize == REXT_I64) { dassert(value == 8 || rea2.eamode == EA_MEMORY); if (p->opt_last & RASOPT_RAX_ZERO) printf("\t# xorl\t%%eax, %%eax\n"); else printf("\txorl\t%%eax, %%eax\n"); p->opt_flags |= RASOPT_RAX_ZERO; while (value > 0) { doMOVEExt(p, rin, &xrax, &rea2, rin->ext2); rea2.offset += 8; value -= 8; } } else if (rin->ext2 == REXT_I128 && (value & 15) == 0 && value <= 80) { dassert(value == 16 || rea2.eamode == EA_MEMORY); if (p->opt_last & RASOPT_XMM0_ZERO) printf("\t# pxor\t%%xmm0, %%xmm0\n"); else printf("\tpxor\t%%xmm0, %%xmm0\n"); p->opt_flags |= RASOPT_XMM0_ZERO; while (value > 0) { doMOVEExt(p, rin, &xmm0, &rea2, rin->ext2 | REXTF_FLOAT); rea2.offset += 16; value -= 16; } } else { printf("\tcld\n"); printf("\txorl\t%%eax, %%eax\n"); doMOVEExt(p, rin, rin->arg1.rea, &xrcx, ExtSize); doLEA(p, &rea2, &xrdi); printf("\trep\n"); printf("\tstosb\n"); } } void InsnCMPTYPE(RASParser *p, rinsn_t *rin) { static rea_t xarg1; static rea_t xarg2; static rea_t xrax; xarg1.target_reg = X86_RUNTIME_ARG1; xarg1.eamode = EA_DIRECT; xarg2.target_reg = X86_RUNTIME_ARG2; xarg2.eamode = EA_DIRECT; doLEA(p, rin->arg1.rea, &xarg1); doLEA(p, rin->arg2.rea, &xarg2); doSpecialSave(rin); printf("\tcall\tRuneRunTime_SWCmpType\n"); doSpecialRestore(rin); if (rin->flags & RINSF_BRANCH) { xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; printf("\tcmp%c\t$0, ", X86Size); printEA(&xrax, ExtSize); printf("\n"); if (adjacentLabel(rin, rin->brtrue)) { /* * next insn matches true path, invert and branch * on-false. */ printf("\tje\t"); printBRLabel(rin->brfalse); } else if (adjacentLabel(rin, rin->brfalse)) { /* * next insn matches false path, do not invert * and branch on-true. */ printf("\tjne\t"); printBRLabel(rin->brtrue); } else { /* * next insn does not match either path. */ printf("\tjne\t"); printBRLabel(rin->brtrue); printf("\n"); printf("\tjmp\t"); printBRLabel(rin->brfalse); } printf("\n"); } else { xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; doMOVEExt(p, rin, &xrax, rin->arg3.rea, REXT_I8); } } /* * Cast unsigned integer to size */ void InsnCASTU(RASParser *p, rinsn_t *rin) { static rea_t xrea; if (rin->ext1 == rin->ext2) { /*doMOVE(p, rin, rin->arg1.rea, rin->arg2.rea);*/ InsnMOVE(p, rin); } else if (rin->arg1.rea->eamode == EA_IMMEDIATE || rin->arg1.rea->eamode == EA_IMMEDIATE16) { doMOVEExt(p, rin, rin->arg1.rea, rin->arg2.rea, REXTF_EA2); } else if (rin->ext1 < rin->ext2) { switch(rin->ext1) { case REXT_I8: doInsnMR2NoReadDst(p, rin, "movzb", REXTF_SEA1 | REXTF_EA2); break; case REXT_I16: doInsnMR2NoReadDst(p, rin, "movzw", REXTF_SEA1 | REXTF_EA2); break; case REXT_I32: if (rin->arg1.rea->eamode == EA_MEMORY) { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; doMOVEExt(p, rin, rin->arg1.rea, &xrea, REXT_I32); printINSN(rin, "mov", &xrea, rin->arg2.rea, REXTF_EA2); } else { doMOVEExt(p, rin, rin->arg1.rea, rin->arg1.rea, REXT_I32); printINSN(rin, "mov", rin->arg1.rea, rin->arg2.rea, REXTF_EA2); } break; default: dpanic("Unknown/unsupported REXT %d", rin->ext1); break; } } else { if (rin->arg1.rea->eamode == EA_MEMORY) { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &xrea); printINSN(rin, "mov", &xrea, rin->arg2.rea, REXTF_EA2); } else { printINSN(rin, "mov", rin->arg1.rea, rin->arg2.rea, REXTF_EA2); } } } /* * Cast signed integer to size */ void InsnCASTS(RASParser *p, rinsn_t *rin) { static rea_t xrea; if (rin->ext1 == rin->ext2) { /*doMOVE(p, rin, rin->arg1.rea, rin->arg2.rea);*/ InsnMOVE(p, rin); } else if (rin->arg1.rea->eamode == EA_IMMEDIATE || rin->arg1.rea->eamode == EA_IMMEDIATE16) { doMOVEExt(p, rin, rin->arg1.rea, rin->arg2.rea, REXTF_EA2); } else if (rin->ext1 < rin->ext2) { switch(rin->ext1) { case REXT_I8: doInsnMR2NoReadDst(p, rin, "movsb", REXTF_SEA1 | REXTF_EA2); break; case REXT_I16: doInsnMR2NoReadDst(p, rin, "movsw", REXTF_SEA1 | REXTF_EA2); break; case REXT_I32: doInsnMR2NoReadDst(p, rin, "movsl", REXTF_SEA1 | REXTF_EA2); break; default: dpanic("Unknown/unsupported REXT %d", rin->ext1); break; } } else { if (rin->arg1.rea->eamode == EA_MEMORY) { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &xrea); printINSN(rin, "mov", &xrea, rin->arg2.rea, REXTF_EA2); } else { printINSN(rin, "mov", rin->arg1.rea, rin->arg2.rea, REXTF_EA2); } } } /* * CASTP - Cast a pointer to an integer. */ void InsnCASTP(RASParser *p, rinsn_t *rin) { InsnCASTS(p, rin); } static void insnZeroArgNoReturn(RASParser *p, rinsn_t *rin, const char *fname) { doSpecialSave(rin); printf("\tcall\tRuneRunTime_%s\n", fname); doSpecialRestore(rin); } static void insnZeroArgRet(RASParser *p, rinsn_t *rin, const char *fname) { static rea_t xrax; xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; doSpecialSave(rin); printf("\tcall\tRuneRunTime_%s\n", fname); doSpecialRestore(rin); if ((rin->arg2.flags & RAF_LIFE_END) == 0) { xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; doMOVEExt(p, rin, &xrax, rin->arg1.rea, ExtSize); } } static void insnOneArgNoReturn(RASParser *p, rinsn_t *rin, const char *fname) { static rea_t xarg1; xarg1.target_reg = X86_RUNTIME_ARG1; xarg1.eamode = EA_DIRECT; doLEA(p, rin->arg1.rea, &xarg1); doSpecialSave(rin); printf("\tcall\tRuneRunTime_%s\n", fname); doSpecialRestore(rin); } static void insnOneArg32NoReturn(RASParser *p, rinsn_t *rin, const char *fname) { static rea_t xarg1; xarg1.target_reg = X86_RUNTIME_ARG1; xarg1.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &xarg1); doSpecialSave(rin); printf("\tcall\tRuneRunTime_%s\n", fname); doSpecialRestore(rin); } static void insnOneArgRet(RASParser *p, rinsn_t *rin, const char *fname) { static rea_t xarg1; static rea_t xrax; xarg1.target_reg = X86_RUNTIME_ARG1; xarg1.eamode = EA_DIRECT; doLEA(p, rin->arg1.rea, &xarg1); doSpecialSave(rin); printf("\tcall\tRuneRunTime_%s\n", fname); doSpecialRestore(rin); if ((rin->arg2.flags & RAF_LIFE_END) == 0) { xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; doMOVEExt(p, rin, &xrax, rin->arg2.rea, ExtSize); } } static void insnTwoArgNoReturn(RASParser *p, rinsn_t *rin, const char *fname) { static rea_t xarg1; static rea_t xarg2; xarg1.target_reg = X86_RUNTIME_ARG1; xarg1.eamode = EA_DIRECT; xarg2.target_reg = X86_RUNTIME_ARG2; xarg2.eamode = EA_DIRECT; doLEA(p, rin->arg1.rea, &xarg1); doLEA(p, rin->arg2.rea, &xarg2); doSpecialSave(rin); printf("\tcall\tRuneRunTime_%s\n", fname); doSpecialRestore(rin); } #if 0 static void insnTwoArgRet(RASParser *p, rinsn_t *rin, const char *fname) { static rea_t xarg1; static rea_t xarg2; static rea_t xrax; xarg1.target_reg = X86_RUNTIME_ARG1; xarg1.eamode = EA_DIRECT; doLEA(p, rin->arg1.rea, &xarg1); xarg2.target_reg = X86_RUNTIME_ARG2; xarg2.eamode = EA_DIRECT; doLEA(p, rin->arg2.rea, &xarg2); doSpecialSave(rin); printf("\tcall\tRuneRunTime_%s\n", fname); doSpecialRestore(rin); xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; doMOVEExt(p, rin, &xrax, rin->arg3.rea, ExtSize); } #endif void InsnPCOPY(RASParser *p, rinsn_t *rin) { insnTwoArgNoReturn(p, rin, "PCopy"); } /* * P-routines */ void InsnPGET(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PGet"); } void InsnPGETH(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PGetH"); } void InsnPPUT(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PPut"); } void InsnPPUTH(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PPutH"); } void InsnPREF(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PRef"); } void InsnPREL(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PRel"); } void InsnPLOCK(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PLock"); } void InsnPLOCKH(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PLockH"); } void InsnPUNLOCK(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PUnlock"); } void InsnPUNLOCKH(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "PUnlockH"); } /* * Info adjustment and extraction routines */ void InsnPATOM(RASParser *p, rinsn_t *rin) { dassert(rin->arg2.rea); insnOneArgRet(p, rin, "PAtom"); } void InsnPIGET(RASParser *p, rinsn_t *rin) { dassert(rin->arg2.rea); insnOneArgRet(p, rin, "PIGet"); } void InsnPIGETH(RASParser *p, rinsn_t *rin) { dassert(rin->arg2.rea); insnOneArgRet(p, rin, "PIGetH"); } void InsnSTKIREF(RASParser *p, rinsn_t *rin) { insnZeroArgRet(p, rin, "STKIRef"); } void InsnSTKIGET(RASParser *p, rinsn_t *rin) { insnZeroArgRet(p, rin, "STKIGet"); } void InsnSTKIGETH(RASParser *p, rinsn_t *rin) { insnZeroArgRet(p, rin, "STKIGetH"); } void InsnSRSGET(RASParser *p, rinsn_t *rin) { insnZeroArgNoReturn(p, rin, "SRSGet"); } void InsnSRSGETH(RASParser *p, rinsn_t *rin) { insnZeroArgNoReturn(p, rin, "SRSGetH"); } void InsnSRSPUT(RASParser *p, rinsn_t *rin) { insnZeroArgNoReturn(p, rin, "SRSPut"); } void InsnSRSPUTH(RASParser *p, rinsn_t *rin) { insnZeroArgNoReturn(p, rin, "SRSPutH"); } void InsnIGET(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "IGet"); } void InsnIGETH(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "IGetH"); } void InsnIPUT(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "IPut"); } void InsnIPUTH(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "IPutH"); } void InsnIREF(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "IRef"); } void InsnIREL(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "IRel"); } void InsnILOCK(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "ILock"); } void InsnILOCKH(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "ILockH"); } void InsnIUNLOCK(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "IUnlock"); } void InsnIUNLOCKH(RASParser *p, rinsn_t *rin) { insnOneArgNoReturn(p, rin, "IUnlockH"); } void InsnINEW(RASParser *p, rinsn_t *rin) { insnZeroArgRet(p, rin, "INew"); } void InsnINEWN(RASParser *p, rinsn_t *rin) { insnZeroArgRet(p, rin, "INewN"); } void InsnIATOM(RASParser *p, rinsn_t *rin) { dassert(rin->arg2.rea); insnOneArgRet(p, rin, "IAtom"); } void InsnLVALLOC(RASParser *p, rinsn_t *rin) { static rea_t xarg1; static rea_t xarg2; static rea_t xrax; xarg1.target_reg = X86_RUNTIME_ARG1; xarg1.eamode = EA_DIRECT; doLEA(p, rin->arg1.rea, &xarg1); xarg2.target_reg = X86_RUNTIME_ARG2; xarg2.eamode = EA_DIRECT; doLEA(p, rin->arg2.rea, &xarg2); doSpecialSave(rin); printf("\tcall\tRuneRunTime_LVAlloc\n"); doSpecialRestore(rin); if ((rin->arg3.flags & RAF_LIFE_END) == 0) { xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; doMOVEExt(p, rin, &xrax, rin->arg3.rea, REXT_I8); } } void InsnPCHECK(RASParser *p, rinsn_t *rin) { static rea_t xarg1; static rea_t xarg2; xarg1.target_reg = X86_RUNTIME_ARG1; xarg1.eamode = EA_DIRECT; xarg2.target_reg = X86_RUNTIME_ARG2; xarg2.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &xarg1); doLEA(p, rin->arg2.rea, &xarg2); doSpecialSave(rin); printf("\tcall\tRuneRunTime_PCheck\n"); doSpecialRestore(rin); } void InsnBNDTRAP(RASParser *p, rinsn_t *rin) { printf("\tcall\tRuneRunTime_BoundsTrap\n"); } void InsnCMPA(RASParser *p, rinsn_t *rin) { InsnCMP(p, rin); } /* * Rune-Rune call */ void InsnCALL(RASParser *p, rinsn_t *rin) { static rea_t xrax; static rea_t xarg1; static rea_t xarg2; static rea_t xarg3; int doind = 0; /* XXX eamode not really deterministic */ if (rin->arg1.rea->eamode == EA_DIRECT || rin->arg1.rea->regno) { xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; doMOVEExt(p, rin, rin->arg1.rea, &xrax, ExtSize); doind = 1; } else if (rin->op == INSN_TCALL) { xarg3.target_reg = X86_RUNE_ARG3; xarg3.eamode = EA_DIRECT; doLEA(p, rin->arg1.rea, &xarg3); } xarg1.target_reg = X86_RUNE_ARG1; xarg1.eamode = EA_DIRECT; xarg2.target_reg = X86_RUNE_ARG2; xarg2.eamode = EA_DIRECT; if (rin->arg4.rea) { dassert(rin->op != INSN_TCALL); /* XXX fixme */ xarg3.target_reg = X86_RUNE_ARG3; xarg3.eamode = EA_DIRECT; if (rin->arg4.rea->eamode == EA_DIRECT || rin->arg4.rea->regno) { doMOVEExt(p, rin, rin->arg4.rea, &xarg3, ExtSize); } else { doLEA(p, rin->arg4.rea, &xarg3); } } if (rin->arg2.rea) doLEA(p, rin->arg2.rea, &xarg1); if (rin->arg3.rea) doLEA(p, rin->arg3.rea, &xarg2); doSpecialSave(rin); if (rin->op == INSN_TCALL) { printf("\tcall\tRuneRunTime_ThreadedCall\n"); } else if (doind) { if (ExtSize == REXT_I32) printf("\tcall\t*%%eax\n"); else printf("\tcall\t*%%rax\n"); } else { printf("\tcall\t"); printEA(rin->arg1.rea, ExtSize); printf("\n"); } doSpecialRestore(rin); } /* * Rune-Rune threaded call */ void InsnTCALL(RASParser *p, rinsn_t *rin) { InsnCALL(p, rin); } void InsnLCALL(RASParser *p, rinsn_t *rin) { printf("\t#XXX\n"); } /* * System interfacing call (e.g. native libc, system call, etc) */ void InsnQCALL(RASParser *p, rinsn_t *rin) { static rea_t xrax; static rea_t xarg1; static rea_t xarg2; int doind = 0; /* XXX eamode not really deterministic */ if (rin->arg1.rea->eamode == EA_DIRECT || rin->arg1.rea->regno) { xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; doMOVEExt(p, rin, rin->arg1.rea, &xrax, ExtSize); doind = 1; } xarg1.target_reg = X86_STD_ARG1; xarg1.eamode = EA_DIRECT; xarg2.target_reg = X86_STD_ARG2; xarg2.eamode = EA_DIRECT; dassert(rin->arg4.rea == NULL); if (rin->arg2.rea) doLEA(p, rin->arg2.rea, &xarg1); if (rin->arg3.rea) doLEA(p, rin->arg3.rea, &xarg2); doSpecialSave(rin); if (doind) { if (ExtSize == REXT_I32) printf("\tcall\t*%%eax\n"); else printf("\tcall\t*%%rax\n"); } else { printf("\tcall\t"); printEA(rin->arg1.rea, ExtSize); printf("\n"); } doSpecialRestore(rin); } /* * RuneRunTime_*() call ABI */ void InsnRCALL(RASParser *p, rinsn_t *rin) { static rea_t xrax; static rea_t xarg1; static rea_t xarg2; int doind = 0; /* XXX eamode not really deterministic */ if (rin->arg1.rea->eamode == EA_DIRECT || rin->arg1.rea->regno) { xrax.target_reg = X86_REG_RAX; xrax.eamode = EA_DIRECT; doMOVEExt(p, rin, rin->arg1.rea, &xrax, ExtSize); doind = 1; } xarg1.target_reg = X86_RUNTIME_ARG1; xarg1.eamode = EA_DIRECT; xarg2.target_reg = X86_RUNTIME_ARG2; xarg2.eamode = EA_DIRECT; dassert(rin->arg4.rea == NULL); if (rin->arg2.rea) doLEA(p, rin->arg2.rea, &xarg1); if (rin->arg3.rea) doLEA(p, rin->arg3.rea, &xarg2); doSpecialSave(rin); if (doind) { if (ExtSize == REXT_I32) printf("\tcall\t*%%eax\n"); else printf("\tcall\t*%%rax\n"); } else { printf("\tcall\t"); printEA(rin->arg1.rea, ExtSize); printf("\n"); } doSpecialRestore(rin); } void InsnRET(RASParser *p, rinsn_t *rin) { rinsn_t *scan; /* * If we are at the end of the procedure we can just fall through, * otherwise jump to the return label. */ scan = RUNE_NEXT(rin, node); while (scan && scan->op == INSN_LABEL) scan = RUNE_NEXT(scan, node); if (scan) printf("\tjmp\t.LRET%d\n", ProcNo); } void InsnDET(RASParser *p, rinsn_t *rin) { insnZeroArgNoReturn(p, rin, "ThreadedDetach"); } void InsnLINK(RASParser *p, rinsn_t *rin) { printf("\t#XXX\n"); } void InsnTSCHED(RASParser *p, rinsn_t *rin) { insnOneArg32NoReturn(p, rin, "TSched"); } void InsnJMP(RASParser *p, rinsn_t *rin) { if (!adjacentLabel(rin, rin->brtrue)) { printf("\tjmp\t"); printBRLabel(rin->brtrue); printf("\n"); } } void InsnTEST(RASParser *p, rinsn_t *rin) { const char *brtrue; const char *brfalse; /* * For TEST_EQ this is 'e' * For TEST_NE this is 'ne' */ brtrue = x86branch(rin->op, 0, 0); brfalse = x86branch(rin->op, 1, 0); printf("\tcmp%s\t$0, ", x86ext(rin->ext1, 0)); printEA(rin->arg1.rea, rin->ext1); printf("\n"); if (rin->flags & RINSF_BRANCH) { if (adjacentLabel(rin, rin->brtrue)) { /* * next insn matches true path, invert and branch * on-false. */ printf("\tj%s\t", brfalse); printBRLabel(rin->brfalse); } else if (adjacentLabel(rin, rin->brfalse)) { /* * next insn matches false path, do not invert * and branch on-true. */ printf("\tj%s\t", brtrue); printBRLabel(rin->brtrue); } else { /* * next insn does not match either path. */ printf("\tj%s\t", brtrue); printBRLabel(rin->brtrue); printf("\n"); printf("\tjmp\t"); printBRLabel(rin->brfalse); } } else { printf("\tset%s\t", brtrue); printEA(rin->arg2.rea, REXT_I8); } printf("\n"); } void InsnFMOVE(RASParser *p, rinsn_t *rin) { doInsnRMMR3(p, rin, "mov"); } void InsnFADD(RASParser *p, rinsn_t *rin) { doInsnFloating3(p, rin, "add", 0); } void InsnFSUB(RASParser *p, rinsn_t *rin) { doInsnFloating3(p, rin, "sub", 0); } void InsnFMUL(RASParser *p, rinsn_t *rin) { doInsnFloating3(p, rin, "mul", 0); } void InsnFDIV(RASParser *p, rinsn_t *rin) { doInsnFloating3(p, rin, "div", 0); } static rea_t * getFPImm(RASParser *p, rinsn_t *rin, int v) { static rea_t imm; union { float f; const int32_t v; } uu; union { double d; const int64_t v; } vv; union { long double x; const int64_t v[2]; } ww; switch(rin->ext1) { case REXT_I32: uu.f = (float)v; imm.eamode = EA_IMMEDIATE; imm.immlo = uu.v; imm.immhi = 0; break; case REXT_I64: vv.d = (double)v; imm.eamode = EA_IMMEDIATE; imm.immlo = vv.v; imm.immhi = 0; break; case REXT_I128: imm.eamode = EA_IMMEDIATE16; ww.x = (long double)v; #if _BYTE_ORDER == _LITTLE_ENDIAN imm.immlo = ww.v[0]; imm.immhi = ww.v[1]; #else imm.immlo = ww.v[1]; imm.immhi = ww.v[0]; #endif break; default: dpanic("Unknown/unsupported REXT %d", rin->ext1); break; } return &imm; } void InsnFINC(RASParser *p, rinsn_t *rin) { static rinsn_t srin; srin.operands = rin->operands + 1; srin.arg1.flags = srin.arg2.flags = srin.arg3.flags = RAF_FLOAT; srin.arg1.rea = getFPImm(p, rin, 1); srin.arg2.rea = rin->arg1.rea; srin.arg3.rea = rin->arg2.rea; srin.ext1 = rin->ext1; doInsnFloating3(p, &srin, "add", 0); } void InsnFDEC(RASParser *p, rinsn_t *rin) { static rinsn_t srin; srin.operands = rin->operands + 1; srin.arg1.flags = srin.arg2.flags = srin.arg3.flags = RAF_FLOAT; srin.arg1.rea = getFPImm(p, rin, 1); srin.arg2.rea = rin->arg1.rea; srin.arg3.rea = rin->arg2.rea; srin.ext1 = rin->ext1; doInsnFloating3(p, &srin, "sub", 0); } void InsnFNEG(RASParser *p, rinsn_t *rin) { static rinsn_t srin; srin.operands = 3; srin.arg1.flags = srin.arg2.flags = srin.arg3.flags = RAF_FLOAT; if (rin->operands == 1) { srin.arg1.rea = rin->arg1.rea; srin.arg2.rea = getFPImm(p, rin, 0); srin.arg3.rea = rin->arg1.rea; } else { srin.arg1.rea = rin->arg1.rea; srin.arg2.rea = getFPImm(p, rin, 0); srin.arg3.rea = rin->arg2.rea; } srin.ext1 = rin->ext1; doInsnFloating3(p, &srin, "sub", 0); } void InsnFPOS(RASParser *p, rinsn_t *rin) { InsnPOS(p, rin); } void InsnFNOT(RASParser *p, rinsn_t *rin) { dassert(rin->operands == 2); dassert(rin->ext2 == REXT_I8); if (rin->ext1 != REXT_I128) { printf("\tpxor\t%%xmm0, %%xmm0\n"); printf("\tucomis%c\t", ((rin->ext1 == REXT_I32) ? 's' : 'd')); printEA(rin->arg1.rea, rin->ext1); printf(", %%xmm0\n"); printf("\tsete\t"); printEA(rin->arg2.rea, rin->ext2); printf("\n"); } else { static rea_t xrea; rea_t *sea; sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xrea, rin->ext1, -16, 1); printf("\tfldt\t"); printEA(sea, REXT_I128); printf("\n"); printf("\tfldz\n"); printf("\tfucomip\t%%st(1), %%st\n"); printf("\tfstp\t%%st(0)\n"); printf("\tsete\t"); printEA(rin->arg2.rea, rin->ext2); printf("\n"); } } void InsnUITOF(RASParser *p, rinsn_t *rin) { printf("\t#XXX\n"); InsnSITOF(p, rin); } void InsnSITOF(RASParser *p, rinsn_t *rin) { static rea_t xsea; static rea_t xdea; static rea_t xrcx; int isunsigned = (rin->op == INSN_UITOF); if (rin->ext2 != REXT_I128) { /* * f32 and f64 */ xsea.target_reg = X86_REG_RCX; xsea.eamode = EA_DIRECT; xsea.offset = 0; xdea.target_reg = X86_REG_XMM0; xdea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &xsea); if (rin->ext1 == REXT_I8) { if (isunsigned) printf("\tmovzbl\t%%cl, %%ecx\n"); else printf("\tmovsbl\t%%cl, %%ecx\n"); } if (rin->ext1 == REXT_I16) { if (isunsigned) printf("\tmovzwl\t%%cx, %%ecx\n"); else printf("\tmovswl\t%%cx, %%ecx\n"); } if (rin->ext1 == REXT_I64) { printf("\tcvtsi2%sq\t%%rcx, %%xmm0\n", x86ext(rin->ext2, RAF_FLOAT)); doMOVEExt(p, rin, &xdea, rin->arg2.rea, REXTF_EA2); } else { printf("\tcvtsi2%s\t%%ecx, %%xmm0\n", x86ext(rin->ext2, RAF_FLOAT)); doMOVEExt(p, rin, &xdea, rin->arg2.rea, REXTF_EA2); } } else { rea_t *sea; rea_t *dea; uint8_t sext; /* * SimplifyFP128 ensures that the requested argument is in * memory and not a register or immediate. We have to do * more work for signed 8-bit quantities. */ if (rin->ext1 == REXT_I8 || (isunsigned && rin->ext1 != REXT_I64)) { xsea.target_reg = X86_REG_RSP; xsea.eamode = EA_MEMORY; xsea.offset = -16; sea = &xsea; } else { sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xsea, rin->ext1, -16, 1); } dea = InsnSimplifyFP128(p, rin, rin->arg2.rea, &xdea, rin->ext2, -32, 0); switch(rin->ext1) { case REXT_I8: xrcx.target_reg = X86_REG_RCX; xrcx.eamode = EA_DIRECT; xrcx.offset = 0; doMOVEExt(p, rin, rin->arg1.rea, &xrcx, rin->ext1|REXTF_SEA1); if (isunsigned) printf("\tmovzbw\t%%cl, %%cx\n"); else printf("\tmovsbw\t%%cl, %%cx\n"); doMOVEExt(p, rin, &xrcx, sea, REXT_I16); sext = REXT_I16; printf("\tfilds\t"); break; case REXT_I16: if (isunsigned) { xrcx.target_reg = X86_REG_RCX; xrcx.eamode = EA_DIRECT; xrcx.offset = 0; doMOVEExt(p, rin, rin->arg1.rea, &xrcx, rin->ext1|REXTF_SEA1); printf("\tmovzwl\t%%cx, %%ecx\n"); doMOVEExt(p, rin, &xrcx, sea, REXT_I32); sext = REXT_I32; printf("\tfildl\t"); } else { sext = REXT_I16; printf("\tfilds\t"); } break; case REXT_I32: if (isunsigned) { xrcx.target_reg = X86_REG_RCX; xrcx.eamode = EA_DIRECT; xrcx.offset = 0; /* moves to %e* zero-ext to %r* */ doMOVEExt(p, rin, rin->arg1.rea, &xrcx, rin->ext1|REXTF_SEA1); doMOVEExt(p, rin, &xrcx, sea, REXT_I64); sext = REXT_I64; printf("\tfildq\t"); } else { sext = REXT_I32; printf("\tfildl\t"); } break; case REXT_I64: sext = REXT_I64; printf("\tfildq\t"); break; default: sext = 0; dpanic("Unknown/unsupported REXT %d", rin->ext1); break; } printEA(sea, sext); printf("\n"); /* * A bit of magic for unsigned 64-bits. */ if (rin->ext1 == REXT_I64 && isunsigned) { static rea_t gimmin; static rea_t gimmout; static int ulabel = 100; gimmin.eamode = EA_IMMEDIATE; gimmin.immlo = 1602224128; CreateGlobalImmediate(&gimmin, &gimmout, rin->ext1); printf("\ttestq\t%%rcx, %%rcx\n"); printf("\tjns\t.LITOF%d\n", ulabel); printf("\tfadds\t"); printEA(&gimmout, REXT_I64); printf("\n"); printf(".LITOF%d:\n", ulabel); ++ulabel; } printf("\tfstpt\t"); printEA(dea, REXT_I128); printf("\n"); if (dea != rin->arg2.rea) doMOVEExt(p, rin, dea, rin->arg2.rea, rin->ext2 | REXTF_FLOAT | REXTF_EA2); } } void InsnFTOUI(RASParser *p, rinsn_t *rin) { printf("\t#XXX supposed to be unsigned\n"); InsnFTOSI(p, rin); } static int LastWasFTOI; static void InsnFTOIProbe(RASParser *p, rinsn_t *rin) { static rea_t x2ea; if (rin == NULL || (rin->op != INSN_FTOSI && rin->op != INSN_FTOUI)) { if (LastWasFTOI) { x2ea.target_reg = X86_REG_RSP; x2ea.offset = -18; x2ea.eamode = EA_MEMORY; printf("\tfldcw\t"); printEA(&x2ea, rin->ext2); printf("\n"); } LastWasFTOI = 0; ProbeFunc = NULL; } } void InsnFTOSI(RASParser *p, rinsn_t *rin) { static rea_t xrea; static rea_t x2ea; if (rin->ext1 != REXT_I128) { /* * f32 and f64 */ xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; x2ea.target_reg = X86_REG_RCX; x2ea.eamode = EA_DIRECT; x2ea.offset = 0; doMOVE(p, rin, rin->arg1.rea, &xrea); if (rin->ext2 == REXT_I64) { printf("\tcvt%s2siq\t%%xmm0, %%rcx\n", x86ext(rin->ext1, RAF_FLOAT)); doMOVEExt(p, rin, &x2ea, rin->arg2.rea, REXTF_EA2); } else { printf("\tcvt%s2si\t%%xmm0, %%ecx\n", x86ext(rin->ext1, RAF_FLOAT)); doMOVEExt(p, rin, &x2ea, rin->arg2.rea, REXTF_EA2); } } else { /* * f128 (i.e. FP80) */ rea_t *sea; sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xrea, rin->ext1, -16, 1); printf("\tfldt\t"); printEA(sea, REXT_I128); printf("\n"); xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; if (LastWasFTOI == 0) { ProbeFunc = InsnFTOIProbe; x2ea.target_reg = X86_REG_RSP; x2ea.offset = -18; x2ea.eamode = EA_MEMORY; printf("\tfnstcw\t"); printEA(&x2ea, rin->ext2); printf("\n"); printf("\tmovzwl\t"); printEA(&x2ea, rin->ext2); printf(",%%ecx\n"); x2ea.target_reg = X86_REG_RSP; x2ea.offset = -20; x2ea.eamode = EA_MEMORY; printf("\torb\t$12, %%ch\n"); printf("\tmovw\t%%cx, "); printEA(&x2ea, rin->ext2); printf("\n"); printf("\tfldcw\t"); printEA(&x2ea, rin->ext2); printf("\n"); LastWasFTOI = 1; } printf("\tfistp%c\t", ((rin->ext2 == REXT_I8) ? 's' : ((rin->ext2 == REXT_I16) ? 's' : ((rin->ext2 == REXT_I32) ? 'l' : ((rin->ext2 == REXT_I64) ? 'q' : '?'))))); if (rin->arg2.rea->eamode != EA_MEMORY || rin->ext2 == REXT_I8) { x2ea.target_reg = X86_REG_RSP; x2ea.offset = -32; x2ea.eamode = EA_MEMORY; printEA(&x2ea, rin->ext2); printf("\n"); if (rin->arg2.rea->eamode == EA_MEMORY) { doMOVEExt(p, rin, &x2ea, &xrea, REXTF_EA2); doMOVEExt(p, rin, &xrea, rin->arg2.rea, REXTF_EA2); } else { doMOVEExt(p, rin, &x2ea, rin->arg2.rea, REXTF_EA2); } } else { printEA(rin->arg2.rea, rin->ext2); printf("\n"); } } } void InsnCASTF(RASParser *p, rinsn_t *rin) { static rea_t xrea; static rea_t x2ea; if (rin->ext1 != REXT_I128 && rin->ext2 != REXT_I128) { /* * f32/f64 <-> f32/f64 (SSE2) */ xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &xrea); printf("\tcvt%s2%s\t%%xmm0, %%xmm0\n", x86ext(rin->ext1, RAF_FLOAT), x86ext(rin->ext2, RAF_FLOAT)); doMOVEExt(p, rin, &xrea, rin->arg2.rea, REXTF_EA2); } else if (rin->ext1 == REXT_I128) { /* * f128 -> f32/f64 */ rea_t *sea; sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xrea, rin->ext1, -16, 1); printf("\tfldt\t"); printEA(sea, REXT_I128); printf("\n"); printf("\tfstp%c\t", (rin->ext2 == REXT_I32) ? 's' : 'l'); if (rin->arg2.rea->eamode != EA_MEMORY) { x2ea.target_reg = X86_REG_RSP; x2ea.offset = -32; x2ea.eamode = EA_MEMORY; printEA(&x2ea, rin->ext2); printf("\n"); doMOVEExt(p, rin, &x2ea, rin->arg2.rea, REXTF_EA2); } else { printEA(rin->arg2.rea, rin->ext2); printf("\n"); } } else { /* * f32/f64 -> f128 */ rea_t *sea; sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xrea, rin->ext1, -16, 1); printf("\tfld%c\t", (rin->ext1 == REXT_I32) ? 's' : 'l'); printEA(sea, REXT_I128); printf("\n"); printf("\tfstpt\t"); if (rin->arg2.rea->eamode != EA_MEMORY) { x2ea.target_reg = X86_REG_RSP; x2ea.offset = -32; x2ea.eamode = EA_MEMORY; printEA(&x2ea, rin->ext2); printf("\n"); doMOVEExt(p, rin, &x2ea, rin->arg2.rea, REXTF_EA2); } else { printEA(rin->arg2.rea, rin->ext2); printf("\n"); } } } void InsnFCMP(RASParser *p, rinsn_t *rin) { rea_t *sea1; rea_t *sea2; int rev; /* * Bleh, have to use FP stack, and 128-bit immediate values don't * work. */ if (rin->ext1 == REXT_I128) { rev = doInsnFloating3(p, rin, "fucomip", 1); } else { rev = InsnSimplifyMRFCMP(p, rin, &sea1, &sea2); /* * 128-bit is unfortunately different from 32-bit and 64-bit. */ printf("\tucomi%s\t", x86ext(rin->ext1, RAF_FLOAT)); printEA(sea1, rin->ext1); printf(", "); printEA(sea2, rin->ext1); printf("\n"); } if (rin->flags & RINSF_BRANCH) { /* * Compare and branch. Invert sense if we can optimize * the jmp. */ if (adjacentLabel(rin, rin->brtrue)) { /* * next insn matches true path, invert and branch * on-false. */ printf("\tj%s\t", x86branch(rin->op, 1, rev)); printBRLabel(rin->brfalse); printf("\n"); } else if (adjacentLabel(rin, rin->brfalse)) { /* * next insn matches false path, do not invert * and branch on-true. */ printf("\tj%s\t", x86branch(rin->op, 0, rev)); printBRLabel(rin->brtrue); printf("\n"); } else { /* * next insn does not match either path. */ printf("\tj%s\t", x86branch(rin->op, 0, rev)); printBRLabel(rin->brtrue); printf("\n"); printf("\tjmp\t"); printBRLabel(rin->brfalse); printf("\n"); } } else { /* * Compare and set result */ printf("\tset%s\t", x86branch(rin->op, 0, rev)); printEA(rin->arg3.rea, REXT_I8); printf("\n"); } } /* * Complex helper functions */ /* * Binary (1-arg or 2-arg) (i.e. FMOVE, MOVE, LEA) * * RM2 - reg,mem * MR2 - mem,reg * * NOTE: For CAST* insns which pass SEA1|EA2, we need to use only EA2 for * the second instruction when we have to break the operation up into * two since the first instructions handles converting ext1 into ext2 * for the intermediate result. */ static void doInsnRM2NoReadDst(RASParser *p, rinsn_t *rin, const char *x86op, uint8_t extov) { static rea_t xrea; if (rin->operands == 2) { if (sameEA(rin->arg1.rea, rin->arg2.rea)) { ; } else if (rin->arg1.rea->eamode == EA_MEMORY) { if (rin->arg1.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; } doMOVE(p, rin, rin->arg1.rea, &xrea); printINSN(rin, x86op, &xrea, rin->arg2.rea, extov & ~REXTF_SEA1); } else { printINSN(rin, x86op, rin->arg1.rea, rin->arg2.rea, extov); } } else { RasError(p, "Instruction must be 2-op only"); } } static void doInsnMR2NoReadDst(RASParser *p, rinsn_t *rin, const char *x86op, uint8_t extov) { static rea_t xrea; if (rin->operands == 2) { if (sameEA(rin->arg1.rea, rin->arg2.rea)) { ; } else if (rin->arg2.rea->eamode == EA_MEMORY) { if (rin->arg2.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; } printINSN(rin, x86op, rin->arg1.rea, &xrea, extov); doMOVEExt(p, rin, &xrea, rin->arg2.rea, extov & ~REXTF_SEA1); } else { printINSN(rin, x86op, rin->arg1.rea, rin->arg2.rea, extov); } } else { RasError(p, "Instruction must be 2-op only"); } } /* * Binary (2-arg or 3-arg) * * RM3 - imm/reg,*,mem * MR3 - mem,*,reg (so far not needed) * RMMR3- can be either * * If x86op is "mov" then we can optimize the 2-op instruction which is * particularly useful when moving zero into a FP register. */ static void doInsnRMMR3(RASParser *p, rinsn_t *rin, const char *x86op) { static rea_t xrea; static rea_t x2ea; int noimm; if (rin->op & INSNF_FLOAT) noimm = 1; else noimm = 0; if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) { if (rin->arg1.rea->eamode == EA_MEMORY || (noimm && (rin->arg1.rea->eamode == EA_IMMEDIATE || rin->arg1.rea->eamode == EA_IMMEDIATE16))) { if (rin->arg1.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; } if (rin->arg1.rea->eamode != EA_MEMORY && strcmp(x86op, "mov") == 0) { doMOVE(p, rin, rin->arg1.rea, rin->arg2.rea); } else { doMOVE(p, rin, rin->arg1.rea, &xrea); printINSN(rin, x86op, &xrea, rin->arg2.rea, 0); } } else { printINSN(rin, x86op, rin->arg1.rea, rin->arg2.rea, 0); } } else if (rin->operands == 3) { rea_t *dea; if (rin->arg3.rea->eamode == EA_DIRECT && !sameEA(rin->arg1.rea, rin->arg3.rea)) { /* * Optimize if target is a register. */ dea = rin->arg3.rea; } else if (rin->arg2.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM1; xrea.eamode = EA_DIRECT; dea = &xrea; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; dea = &xrea; } if (noimm && (rin->arg1.rea->eamode == EA_IMMEDIATE || rin->arg1.rea->eamode == EA_IMMEDIATE16)) { dassert(rin->arg1.flags & RAF_FLOAT); x2ea.target_reg = X86_REG_XMM0; x2ea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &x2ea); doMOVE(p, rin, rin->arg2.rea, dea); printINSN(rin, x86op, &x2ea, dea, 0); if (dea != rin->arg3.rea) doMOVE(p, rin, dea, rin->arg3.rea); } else { doMOVE(p, rin, rin->arg2.rea, dea); printINSN(rin, x86op, rin->arg1.rea, dea, 0); if (dea != rin->arg3.rea) doMOVE(p, rin, dea, rin->arg3.rea); } } else { RasError(p, "Instruction must be 2-op or 3-op only"); } } /* * Instruction requires mem,reg or reg,reg. If this is a floating point * instruction an immediate source must be placed in a register or be * accessed from memory. * * If nodrd is non-zero it means the destination in a 2-op instruction is * NOT read, so we don't have to copy arg2.rea into the destination before * issuing the instruction. */ static void doInsnMR3(RASParser *p, rinsn_t *rin, const char *x86op, int nodrd) { static rea_t xrea; static rea_t x2ea; rea_t *sea; /* * Floating point instructions can't have an immediate source. * doMOVE() will handle optimizing the immediate value for us. */ if ((rin->op & INSNF_FLOAT) && (rin->arg1.rea->eamode == EA_IMMEDIATE || rin->arg1.rea->eamode == EA_IMMEDIATE16)) { CreateGlobalImmediate(rin->arg1.rea, &x2ea, rin->ext1); sea = &x2ea; } else { sea = rin->arg1.rea; } /* * Handle 2-arg and 3-arg mechanics. A memory destination is not * allowed. */ if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) { if (rin->arg2.rea->eamode == EA_MEMORY) { if (rin->arg2.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; } if (nodrd == 0) doMOVE(p, rin, rin->arg2.rea, &xrea); printINSN(rin, x86op, sea, &xrea, 0); doMOVE(p, rin, &xrea, rin->arg2.rea); } else { printINSN(rin, x86op, sea, rin->arg2.rea, 0); } } else if (rin->operands == 3) { rea_t *dea; if (rin->arg3.rea->eamode == EA_DIRECT && !sameEA(rin->arg1.rea, rin->arg3.rea)) { /* * Optimize if target is a register. */ dea = rin->arg3.rea; } else if (rin->arg2.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; dea = &xrea; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; dea = &xrea; } doMOVE(p, rin, rin->arg2.rea, dea); printINSN(rin, x86op, sea, dea, 0); if (dea != rin->arg3.rea) doMOVE(p, rin, dea, rin->arg3.rea); } else { RasError(p, "Instruction must be 2-op or 3-op only"); } } /* * Unary (1-arg) with possible 2-arg optimization * * R - single argument to register, else 2-arg optimization to either * register or memory. */ static void doInsn1R(RASParser *p, rinsn_t *rin, const char *x86op1, const char *x86op2, const char *x86opt2arg) { static rea_t xrea; if (rin->operands == 1 || sameEA(rin->arg1.rea, rin->arg2.rea)) { if (rin->arg1.rea->eamode == EA_DIRECT) { /* * 1-operand unary insn with register target */ printINSN(rin, x86op1, rin->arg1.rea, NULL, 0); } else if (x86op2) { /* * Alternative for 1-operand unary insn with * memory argument */ printf("\t%s%s\t%s, ", x86op2, x86ext(rin->ext1, rin->arg1.flags), x86opt2arg); printEA(rin->arg1.rea, rin->ext1); printf("\n"); } else { /* * No alternative */ if (rin->arg1.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; } doMOVE(p, rin, rin->arg1.rea, &xrea); printINSN(rin, x86op1, &xrea, NULL, 0); doMOVE(p, rin, &xrea, rin->arg1.rea); } } else if (rin->operands == 2) { if (x86op2 && rin->arg2.rea->eamode == EA_DIRECT) { /* * Alternative for 2-operand unary insn */ doMOVE(p, rin, rin->arg1.rea, rin->arg2.rea); printf("\t%s%s\t%s, ", x86op2, x86ext(rin->ext1, rin->arg1.flags), x86opt2arg); printEA(rin->arg2.rea, rin->ext1); printf("\n"); } else { /* * No alternative */ if (rin->arg1.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; } doMOVE(p, rin, rin->arg1.rea, &xrea); printINSN(rin, x86op1, &xrea, NULL, 0); doMOVE(p, rin, &xrea, rin->arg2.rea); } } else { RasError(p, "Instruction must be 1-op or 2-op only"); } } /* * The shift instruction has two size extensions. ext1 applies to the shift * count, ext2 to the target. The shift count must be either an immediate * value or loaded into %cl (%ecx or %rcx ok). * * NOTE: We may have to adjust ext1/ext2 if we call other helper functions * to present the correct operand size. * * NOTE: Cannot shift floating point values (for now) */ static void doInsnSHIFT3(RASParser *p, rinsn_t *rin, const char *x86op) { static rea_t xrea; rea_t *sea; uint8_t tmpext; printf("\t# SHIFT3\n"); if (rin->arg1.rea->eamode == EA_IMMEDIATE) { /* * Immediate shift count */ sea = rin->arg1.rea; } else if (rin->arg1.rea->eamode != EA_DIRECT || rin->arg1.rea->target_reg != X86_REG_RCX) { /* * Else shift counter must be %cl/%cx/%ecx/%rcx */ xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &xrea); sea = &xrea; } else { sea = rin->arg1.rea; } tmpext = rin->ext1; rin->ext1 = rin->ext2; if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) { printINSN(rin, x86op, sea, rin->arg2.rea, REXTF_SEA1 | REXTF_EA2); } else if (rin->operands == 3) { static rea_t x2ea; rea_t *dea; if (rin->arg3.rea->eamode == EA_DIRECT) { /* * Optimize if target is a register. can't match * sea by definition. */ dea = rin->arg3.rea; } else if (rin->arg2.flags & RAF_FLOAT) { x2ea.target_reg = X86_REG_XMM0; x2ea.eamode = EA_DIRECT; dea = &x2ea; } else { x2ea.target_reg = X86_REG_RAX; x2ea.eamode = EA_DIRECT; dea = &x2ea; } doMOVEExt(p, rin, rin->arg2.rea, dea, REXTF_EA2); printINSN(rin, x86op, sea, dea, REXTF_SEA1 | REXTF_EA2); if (dea != rin->arg3.rea) doMOVEExt(p, rin, dea, rin->arg3.rea, REXTF_EA2); } else { RasError(p, "Instruction must be 2-op or 3-op only"); } rin->ext1 = tmpext; } /* * Binary (2-arg or 3-arg), destination is in %*DX:%*AX and result is in * either %*DX or %*AX. Requires spewing a multitude of adjustment * instructions. */ static void doInsnDXAX3(RASParser *p, rinsn_t *rin, const char *x86op, int issigned, int dxresult) { static rea_t xrea; static rea_t x2ea; rea_t *dea; printf("\t# DXAX3 for %s\n", x86op); if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) { dea = rin->arg2.rea; } else if (rin->operands == 3) { dea = rin->arg3.rea; } else { RasError(p, "Instruction must be 2-op or 3-op only"); return; } x2ea.target_reg = X86_REG_RCX; x2ea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &x2ea); xrea.target_reg = X86_REG_RAX; xrea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg2.rea, &xrea); if (issigned) { switch(rin->ext1) { case REXT_I8: printf("\tcbtw\n"); break; case REXT_I16: printf("\tcwtd\n"); break; case REXT_I32: printf("\tcltd\n"); break; case REXT_I64: printf("\tcqto\n"); break; default: dpanic("Unknown/unsupported REXT %d", rin->ext1); break; } } else { switch(rin->ext1) { case REXT_I8: printf("\txorl\t%%edx, %%edx\n"); break; case REXT_I16: printf("\txorl\t%%edx, %%edx\n"); break; case REXT_I32: printf("\txorl\t%%edx, %%edx\n"); break; case REXT_I64: printf("\txorl\t%%edx, %%edx\n"); break; default: dpanic("Unknown/unsupported REXT %d", rin->ext1); break; } } printf("\t%s%s\t", x86op, x86ext(rin->ext1, rin->arg1.flags)); printEA(&x2ea, rin->ext1); printf("\n"); if (dxresult) xrea.target_reg = X86_REG_RDX; doMOVE(p, rin, &xrea, dea); } /* * 80-bit is messy to say the least */ static int doInsnFloating3(RASParser *p, rinsn_t *rin, const char *x86op, int isfcmp) { int rev = 0; if (rin->ext1 != REXT_I128) { /* * NOTE: InsnFCMP path never hits this (it wouldn't work * anyway) */ doInsnMR3(p, rin, x86op, 0); } else { rea_t xsea; rea_t xmea; rea_t xdea; rea_t *sea; rea_t *mea; rea_t *dea; int sea_sp; int mea_sp; #if 0 int dea_sp; int ttl_sp; #endif sea_sp = (rin->arg1.rea->eamode == EA_DIRECT); mea_sp = (rin->arg2.rea->eamode == EA_DIRECT); #if 0 dea_sp = (isfcmp == 0 && rin->operands != 2 && !sameEA(rin->arg2.rea, rin->arg3.rea)); ttl_sp = (sea_sp + mea_sp + dea_sp) * 16; #endif #if 0 /* XXX removed, using red zone instead */ if (ttl_sp) { printf("\tsub%c\t$%d, ", X86Size, ttl_sp); printREGTGT(X86_REG_RSP, 0, ExtSize); printf("\n"); } #endif sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xsea, rin->ext1, -16, 1); #if 0 sea = rin->arg1.rea; if (sea->eamode == EA_IMMEDIATE16) { CreateGlobalImmediate(sea, &xsea, rin->ext1); sea = &xsea; } else if (sea->eamode != EA_MEMORY) { bzero(&xsea, sizeof(xsea)); xsea.target_reg = X86_REG_RSP; xsea.eamode = EA_MEMORY; xsea.offset = -16; doMOVE(p, rin, sea, &xsea); sea = &xsea; } #endif mea = InsnSimplifyFP128(p, rin, rin->arg2.rea, &xmea, rin->ext1, -32, 1); #if 0 mea = rin->arg2.rea; if (mea->eamode == EA_IMMEDIATE16) { CreateGlobalImmediate(mea, &xmea, rin->ext1); mea = &xmea; } else if (mea->eamode != EA_MEMORY) { bzero(&xmea, sizeof(xmea)); xmea.target_reg = X86_REG_RSP; xmea.eamode = EA_MEMORY; xmea.offset = -16 - sea_sp * 16; doMOVE(p, rin, mea, &xmea); mea = &xmea; } #endif /* * FCMP doesn't store a floating point result */ if (isfcmp == 0) { if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) { dea = mea; } else { dea = rin->arg3.rea; if (dea->eamode != EA_MEMORY) { bzero(&xdea, sizeof(xdea)); xdea.target_reg = X86_REG_RSP; xdea.eamode = EA_MEMORY; xdea.offset = -16 - (sea_sp + mea_sp) * 16; dea = &xdea; } } } else { dea = NULL; } printf("\tfldt\t"); printEA(sea, REXT_I128); printf("\n"); printf("\tfldt\t"); printEA(mea, REXT_I128); printf("\n"); if (isfcmp) { #if 0 if (ttl_sp) { printf("\tadd%c\t$%d, ", X86Size, ttl_sp); printREGTGT(X86_REG_RSP, 0, ExtSize); printf("\n"); } #endif printf("\t%s\t%%st(1), %%st\n", x86op); printf("\tfstp\t%%st(0)\n"); /* looks like we should leave rev 0 here */ } else { printf("\tf%sp\t%%st, %%st(1)\n", x86op); printf("\tfstpt\t"); printEA(dea, REXT_I128); printf("\n"); } if (isfcmp == 0) { if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) { if (rin->arg2.rea->eamode != EA_MEMORY) doMOVE(p, rin, dea, rin->arg2.rea); } else { if (rin->arg3.rea->eamode != EA_MEMORY) doMOVE(p, rin, dea, rin->arg3.rea); } #if 0 if (ttl_sp) { printf("\tadd%c\t$%d, ", X86Size, ttl_sp); printREGTGT(X86_REG_RSP, 0, ExtSize); printf("\n"); } #endif } } return rev; } /* * This helper can handle the following MOVEs for integer and * floating point: * * imm/reg * imm/mem * reg/reg * reg/mem * mem/reg * * This function does not handle mem/mem. * * This function may eat XMM0 or RCX, but is guaranteed not to eat any * registers if either the source or target is a register (an immediate * source does not count so imm,reg CAN eat another register). */ static void doMOVE(RASParser *p, rinsn_t *rin, rea_t *sea, rea_t *dea) { static rea_t xrea; if ((rin->arg1.flags & RAF_FLOAT) && ((sea->eamode == EA_IMMEDIATE && sea->immlo == 0) || (sea->eamode == EA_IMMEDIATE16 && sea->immlo == 0 && sea->immhi == 0))) { /* * MOVE $0,dea */ if (dea->eamode == EA_DIRECT) { printf("\tpxor\t"); printEA(dea, rin->ext1); printf(", "); printEA(dea, rin->ext1); printf("\n"); } else { if (p->opt_last & RASOPT_XMM0_ZERO) { printf("\t# pxor %%xmm0, %%xmm0\n"); } else { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; printf("\tpxor\t"); printEA(&xrea, rin->ext1); printf(", "); printEA(&xrea, rin->ext1); printf("\n"); } p->opt_flags |= RASOPT_XMM0_ZERO; doMOVE(p, rin, &xrea, dea); } } else if ((rin->arg1.flags & RAF_FLOAT) && (sea->eamode == EA_IMMEDIATE || sea->eamode == EA_IMMEDIATE16)) { rea_t irea; CreateGlobalImmediate(sea, &irea, rin->ext1); printf("\tmov%s\t", x86ext(rin->ext1, rin->arg1.flags)); printEA(&irea, rin->ext1); printf(", "); if (dea->eamode == EA_DIRECT) { printEA(dea, rin->ext1); printf("\n"); } else { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; printEA(&xrea, rin->ext1); printf("\n"); doMOVE(p, rin, &xrea, dea); } } else if (sea->eamode == EA_IMMEDIATE && sea->immlo == 0 && dea->eamode == EA_DIRECT) { /* * MOVE $0,IREG */ printf("\txorl\t"); /* avoid REX */ printEA(dea, REXT_I32); printf(", "); printEA(dea, REXT_I32); printf("\n"); } else { /* * MOVE generic (to/from register) */ printf("\tmov%s\t", x86ext(rin->ext1, rin->arg1.flags)); printEA(sea, rin->ext1); printf(", "); printEA(dea, rin->ext1); printf("\n"); } } static void doMOVEExt(RASParser *p, rinsn_t *rin, rea_t *sea, rea_t *dea, uint8_t extov) { uint8_t sext = rin->ext1; uint8_t dext = rin->ext1; uint8_t sflags = rin->arg1.flags; uint8_t dflags = rin->arg1.flags; if (extov & REXTF_MASK) { sext = extov & REXTF_MASK; dext = sext; } if (extov & REXTF_EA2) { sext = rin->ext2; dext = rin->ext2; sflags = rin->arg2.flags; dflags = rin->arg2.flags; } if (extov & REXTF_SEA1) { sext = rin->ext1; sflags = rin->arg1.flags; } if (sflags & RAF_FLOAT) sext |= REXTF_FLOAT; if (dflags & RAF_FLOAT) dext |= REXTF_FLOAT; if (sea->eamode == EA_IMMEDIATE && sea->immlo == 0 && dea->eamode == EA_DIRECT) { printf("\txorl\t"); /* avoid REX */ printEA(dea, REXT_I32); printf(", "); printEA(dea, REXT_I32); } else { printf("\tmov%s\t", x86ext(dext, dflags)); printEA(sea, sext); printf(", "); printEA(dea, dext); } printf("\n"); } /* * Load effective-address instead of contents. Note that some originally * EA_DIRECT EAs may have been converted to stack-temporary storage. In * this situation the effective-address is, in fact, the contents of the * storage. */ static void doLEA(RASParser *p, rea_t *sea, rea_t *dea) { if (sea->orig_eamode == EA_DIRECT) { printf("\tmov%c\t", X86Size); printEA(sea, ExtSize); printf(", "); printEA(dea, ExtSize); } else if (sea->eamode == EA_MEMORY && sea->sym == NULL && sea->regno == 0) { if (sea->offset == 0 && dea->eamode == EA_DIRECT) { printf("\txorl\t"); /* avoid REX */ printEA(dea, REXT_I32); printf(", "); printEA(dea, REXT_I32); } else { printf("\tmov%c\t$%jd, ", X86Size, sea->offset); printEA(dea, ExtSize); } } else { printf("\tlea%c\t", X86Size); printEA(sea, ExtSize); printf(", "); printEA(dea, ExtSize); } printf("\n"); } static void doSpecialSave(rinsn_t *rin) { uint64_t mask; size_t fpcount; size_t count; int i; mask = rin->special_save; if (mask == 0) return; printf("\t# SPECIAL SAVE\n"); fpcount = 0; count = 0; for (i = 0; mask; ++i) { if ((mask & (1LLU << i)) == 0) continue; if (i < X86_REG_FBASE) { mask &= ~(1LLU << i); printf("\tpush%c\t", X86Size); printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize); printf("\n"); count += sizeof(void *); } else { fpcount += 16; } } count = ((count + 15) & ~(size_t)15) - count; /* 16-byte align */ if (fpcount + count) { printf("\tsub%c\t$%zd, ", X86Size, (fpcount + count)); printREGTGT(X86_REG_RSP, 0, ExtSize); printf("\n"); } if (fpcount == 0) return; for (i = X86_REG_FBASE; mask; ++i) { if ((mask & (1LLU << i)) == 0) continue; mask &= ~(1LLU << i); printf("\tmovaps\t%%xmm%d, %zd(", i - X86_REG_FBASE, fpcount); printREGTGT(X86_REG_RSP, 0, ExtSize); printf(")\n"); fpcount += 16; } } static void doSpecialRestore(rinsn_t *rin) { uint64_t mask; size_t fpcount; size_t count; int i; mask = rin->special_save; if (mask == 0) return; printf("\t# SPECIAL RESTORE\n"); fpcount = 0; for (i = X86_REG_FBASE; mask & 0xFFFFFFFF00000000LLU; ++i) { if ((mask & (1LLU << i)) == 0) continue; mask &= ~(1LLU << i); printf("\tmovaps\t%zd(", fpcount); printREGTGT(X86_REG_RSP, 0, ExtSize); printf("), %%xmm%d\n", i - X86_REG_FBASE); fpcount += 16; } count = 0; for (i = 31; i >= 0; --i) { if ((mask & (1LLU << i)) == 0) continue; count += sizeof(void *); } count = ((count + 15) & ~(size_t)15) - count; /* 16-byte align */ if (fpcount + count) { printf("\tadd%c\t$%jd, ", X86Size, (intmax_t)(fpcount + count)); printREGTGT(X86_REG_RSP, 0, ExtSize); printf("\n"); } for (i = 31; mask; --i) { if ((mask & (1LLU << i)) == 0) continue; mask &= ~(1LLU << i); printf("\tpop%c\t", X86Size); printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize); printf("\n"); } } /* * Used for CMP instructions where arg1 and arg2.rea are both source arguments. * Allow either reg,mem or $imm,mem or mem,reg. * * We have an optimization to reverse the comparison and return the reverse * status to the caller (allows immediate arg2.rea to be optimized). */ static int InsnSimplifyRMMRCMP(RASParser *p, rinsn_t *rin, rea_t **sea1p, rea_t **sea2p) { static rea_t xrea; int rev; if (rin->arg2.rea->eamode == EA_IMMEDIATE || rin->arg2.rea->eamode == EA_IMMEDIATE16) { if (rin->arg1.rea->eamode == EA_IMMEDIATE || rin->arg1.rea->eamode == EA_IMMEDIATE16) { /* * Bleh, a constant expression didn't get collapsed. * Make it work. We don't interpret here (yet). */ if (rin->arg2.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; } *sea1p = rin->arg1.rea; *sea2p = &xrea; doMOVE(p, rin, rin->arg2.rea, &xrea); rev = 0; } else { /* * $imm,reg/mem - We are good. */ *sea1p = rin->arg2.rea; *sea2p = rin->arg1.rea; rev = 1; } } else if (rin->arg1.rea->eamode == EA_IMMEDIATE || rin->arg1.rea->eamode == EA_IMMEDIATE16) { /* * $imm,reg/mem - We are good. */ *sea1p = rin->arg1.rea; *sea2p = rin->arg2.rea; rev = 0; } else if (rin->arg1.rea->eamode != EA_DIRECT && rin->arg2.rea->eamode != EA_DIRECT) { /* * Usually better to put arg1 in register (but we could put * arg2.rea in the register). */ if (rin->arg1.flags & RAF_FLOAT) { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; } else { xrea.target_reg = X86_REG_RCX; xrea.eamode = EA_DIRECT; } *sea1p = &xrea; *sea2p = rin->arg2.rea; doMOVE(p, rin, rin->arg1.rea, &xrea); rev = 0; } else { /* * Otherwise we are good */ *sea1p = rin->arg1.rea; *sea2p = rin->arg2.rea; rev = 0; } return rev; } /* * Floating point version allows a memory or register source and register * destination. Unlike normal instructions, an immediate source is not * allowed. * * If possible, reverse the sense of the comparison if it will generate more * optimal code. */ static int InsnSimplifyMRFCMP(RASParser *p, rinsn_t *rin, rea_t **sea1p, rea_t **sea2p) { static rea_t xrea; static rea_t x2ea; int rev; if (rin->arg2.rea->eamode == EA_DIRECT) { /* * ARG2 is direct, which is optimal. ARG1 can be direct * or memory, but cannot be immediate. Don't reverse. */ rev = 0; if (rin->arg1.rea->eamode == EA_DIRECT || rin->arg1.rea->eamode == EA_MEMORY) { *sea1p = rin->arg1.rea; *sea2p = rin->arg2.rea; } else { xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &xrea); *sea1p = &xrea; *sea2p = rin->arg2.rea; } } else if (rin->arg1.rea->eamode == EA_DIRECT && (rin->arg2.rea->eamode == EA_IMMEDIATE || rin->arg2.rea->eamode == EA_IMMEDIATE16)) { /* * ARG1 is direct and ARG2 is immediate. Since we need a * memory-immediate, reverse it and optimize the immediate. */ rev = 1; CreateGlobalImmediate(rin->arg2.rea, &xrea, rin->ext1); *sea1p = &xrea; *sea2p = rin->arg1.rea; } else if (rin->arg1.rea->eamode == EA_DIRECT) { /* * ARG1 is direct so lets reverse it in case ARG2 is memory. * Plus this is optimal if ARG2 happens to be immediate. */ rev = 1; xrea.target_reg = X86_REG_XMM0; xrea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg2.rea, &xrea); *sea1p = &xrea; *sea2p = rin->arg1.rea; } else if ((rin->arg1.rea->eamode == EA_IMMEDIATE || rin->arg1.rea->eamode == EA_IMMEDIATE16) && (rin->arg2.rea->eamode == EA_IMMEDIATE || rin->arg2.rea->eamode == EA_IMMEDIATE16)) { /* * Sigh, both ARG1 and ARG2 are immediate. We can use * CreateGlobalImmediate() to get a memory-immediate for * arg1.rea. */ rev = 0; CreateGlobalImmediate(rin->arg1.rea, &xrea, rin->ext1); x2ea.target_reg = X86_REG_XMM1; x2ea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg2.rea, &x2ea); *sea1p = &xrea; *sea2p = &x2ea; } else if (rin->arg2.rea->eamode == EA_IMMEDIATE || rin->arg2.rea->eamode == EA_IMMEDIATE16) { /* * ARG1 is memory and arg2.rea is immediate. We have to * throw arg2.rea into a register so don't reverse. */ rev = 0; x2ea.target_reg = X86_REG_XMM1; x2ea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg2.rea, &x2ea); *sea1p = rin->arg1.rea; *sea2p = &x2ea; } else { /* * arg1 is immediate or memory, arg2 is memory. Since * immediate values aren't allowed both sides are effectively * memory. Reverse it (no reason, just feel like it). */ rev = 1; x2ea.target_reg = X86_REG_XMM0; x2ea.eamode = EA_DIRECT; doMOVE(p, rin, rin->arg1.rea, &x2ea); *sea1p = rin->arg2.rea; *sea2p = &x2ea; } return rev; } /* * Simplify an EA into a memory operand for float128's (FP80) */ static rea_t * InsnSimplifyFP128(RASParser *p, rinsn_t *rin, rea_t *sea, rea_t *tea, uint8_t ext, int rspoff, int loadme) { if (sea->eamode == EA_IMMEDIATE16) { CreateGlobalImmediate(sea, tea, ext); sea = tea; } else if (sea->eamode != EA_MEMORY) { bzero(tea, sizeof(*tea)); tea->target_reg = X86_REG_RSP; tea->eamode = EA_MEMORY; tea->offset = rspoff; if (loadme) doMOVE(p, rin, sea, tea); sea = tea; } return sea; } /* * Low level helper functions */ static void printREGTGT(uint16_t target_reg, uint32_t regno, uint8_t ext) { dassert(ext); if (target_reg) { const char *rid; char buf[16]; int mr; mr = target_reg; switch(mr) { case X86_REG_RAX: rid = "ax"; break; case X86_REG_RCX: rid = "cx"; break; case X86_REG_RDX: rid = "dx"; break; case X86_REG_RBX: rid = "bx"; break; case X86_REG_RSP: rid = "sp"; break; case X86_REG_RBP: rid = "bp"; break; case X86_REG_RSI: rid = "si"; break; case X86_REG_RDI: rid = "di"; break; case X86_REG_R8: rid = "r8"; break; case X86_REG_R9: rid = "r9"; break; case X86_REG_R10: rid = "r10"; break; case X86_REG_R11: rid = "r11"; break; case X86_REG_R12: rid = "r12"; break; case X86_REG_R13: rid = "r13"; break; case X86_REG_R14: rid = "r14"; break; case X86_REG_R15: rid = "r15"; break; case X86_REG_RIP: rid = "ip"; break; case X86_REG_XMM0: rid = "xmm0"; break; case X86_REG_XMM1: rid = "xmm1"; break; case X86_REG_XMM2: rid = "xmm2"; break; case X86_REG_XMM3: rid = "xmm3"; break; case X86_REG_XMM4: rid = "xmm4"; break; case X86_REG_XMM5: rid = "xmm5"; break; case X86_REG_XMM6: rid = "xmm6"; break; case X86_REG_XMM7: rid = "xmm7"; break; case X86_REG_XMM8: rid = "xmm8"; break; case X86_REG_XMM9: rid = "xmm9"; break; case X86_REG_XMM10: rid = "xmm10"; break; case X86_REG_XMM11: rid = "xmm11"; break; case X86_REG_XMM12: rid = "xmm12"; break; case X86_REG_XMM13: rid = "xmm13"; break; case X86_REG_XMM14: rid = "xmm14"; break; case X86_REG_XMM15: rid = "xmm15"; break; default: if ((mr & X86_REG_MASK) < 32) snprintf(buf, sizeof(buf), "rats%d", mr & 255); else snprintf(buf, sizeof(buf), "ratsF%d", (mr - 32) & 255); rid = buf; break; } mr &= X86_REG_MASK; switch(ext & REXTF_MASK) { case REXT_I8: dassert(mr < 32); if (mr < 4) { printf("%%%cl", rid[0]); } else if (mr < 8) { printf("%%%sl", rid); } else { printf("%%%sb", rid); } break; #if 0 case X86_SIZE_HIBYTE: dassert(mr < 4); printf("%%%ch", rid[0]); break; #endif case REXT_I16: dassert(mr < 32); if (mr < 8) printf("%%%s", rid); else printf("%%%sw", rid); break; case REXT_I32: if (mr < 8) printf("%%e%s", rid); else if (mr < 32) printf("%%%sd", rid); else printf("%%%s", rid); break; case REXT_I64: if (mr < 8) printf("%%r%s", rid); else if (mr < 32) printf("%%%s", rid); else printf("%%%s", rid); break; case REXT_I128: dassert(mr >= 32); printf("%%%s", rid); break; case REXT_F32: dassert(mr >= 32); printf("%%%s", rid); break; case REXT_F64: dassert(mr >= 32); printf("%%%s", rid); break; case REXT_F128: /* * Suitable for e.g. FMOVE, not suitable for * fp stack ops. FP insns will do those manually. */ dassert(mr >= 32); printf("%%%s", rid); break; default: printf("[REXT%d]", ext); break; } } else { if (regno & REGF_ADHOC) { if (regno & REGF_PTR) printf("%%q%d", regno & REGF_MASK); else printf("%%v%d", regno & REGF_MASK); } else if (regno & REGF_PTR) { switch(regno) { case REG_SG: printf("%%sg"); break; case REG_RP: printf("%%rp"); break; case REG_DB: printf("%%db"); break; case REG_TP: printf("%%tp"); break; case REG_AP: printf("%%ap"); break; case REG_FP: printf("%%fp"); break; case REG_PC: printf("%%pc"); break; default: printf("%%p%d", regno & REGF_MASK); break; } } else { printf("%%r%d", regno & REGF_MASK); } } } static void printREG(rea_t *rea, uint8_t ext) { printREGTGT(rea->target_reg, rea->regno, ext); } void InsnDebugREG(uint16_t target_reg, uint8_t ext) { rea_t rea; rea.target_reg = target_reg; rea.regno = 0; printREG(&rea, ext); } static void printINSN(rinsn_t *rin, const char *x86op, rea_t *sea, rea_t *dea, uint8_t extov) { int sext; int dext; if (extov & REXTF_EA2) { printf("\t%s%s", x86op, x86ext(rin->ext2, rin->arg2.flags)); sext = rin->ext2; dext = rin->ext2; } else { printf("\t%s%s", x86op, x86ext(rin->ext1, rin->arg1.flags)); sext = rin->ext1; dext = rin->ext1; } if (extov & REXTF_SEA1) sext = rin->ext1; if (extov & REXTF_MASK) { sext = extov & REXTF_MASK; dext = extov & REXTF_MASK; } if (sea && dea) { printf("\t"); printEA(sea, sext); printf(", "); printEA(dea, dext); } else if (sea) { printf("\t"); printEA(sea, sext); } else if (dea) { printf("\t"); printEA(dea, dext); } printf("\n"); } static void printEA(rea_t *rea, uint8_t ext) { rea_t *regea; /* * Print the EA */ switch(rea->eamode) { case EA_DIRECT: printREG(rea, ext); break; case EA_MEMORY: regea = rea; if (rea->direct) { regea = rea->direct; /* * This can occur if the register allocator is unable * to allocate a mandatory register. */ dassert(regea->eamode == EA_DIRECT); } if (rea->sym) { if (rea->sym->id[0] == '@') printf("%s", rea->sym->id + 1); else printf(".L%d%s", ProcNo, rea->sym->id); if (rea->offset > 0) printf("+"); } if (rea->offset || (rea->sym == NULL && (regea->regno & ~REGF_PTR) == 0)) { printf("%jd", (intmax_t)rea->offset); } if (regea->target_reg || (regea->regno & ~REGF_PTR)) { printf("("); printREG(regea, ExtSize); printf(")"); } /* * Catch broken stack-relative accesses. */ if (regea->target_reg == X86_REG_RSP && rea->offset >= ProcStackSize) { dpanic("RAS Assembly used an out-of-bounds frame " "offset (%zd/%zd)", rea->offset, ProcStackSize); } break; case EA_IMMEDIATE: if (rea->sym) { printf("%s", rea->sym->id); if (rea->immlo > 0) printf("+"); } if (rea->immlo || rea->sym == NULL) printf("$%jd", (intmax_t)rea->immlo); break; case EA_IMMEDIATE16: printf("$0x%016jx%016jx", (intmax_t)rea->immhi, (intmax_t)rea->immlo); break; default: printf("?ea=%d", rea->eamode); break; } } static void printBRLabel(rsym_t *label) { printf(".L%d%s", ProcNo, label->id); } /************************************************************************ * REGISTGER ALLOCATOR * ************************************************************************ * */ static void regAllocEA(RASParser *p, rinsn_t *rin, rspan_t *arg, rspan_t *argd, int pass); static uint64_t regAllocSpan(rinsn_t *rin, rspan_t *span, uint16_t target_reg, int special_case); static void regClearEA(RASParser *p, rspan_t *span, rspan_t *spand); static void RegAllocatorX86(RASParser *p, rblock_t *rblock, runesize_t bytes) { int wlow = 0; int whigh = 65536; int wmid; /* * Try maximum registerization first. */ RegAllocWeight = 0; ExtraStackSpace = 0; ExtraStackBase = (bytes + CPOINTER_ALIGN) & ~CPOINTER_ALIGN; SaveMask = 0; RegAllocatorScan(p, rblock); if ((SaveMask & ~X86_REGF_GOOD) == 0) return; /* * Didn't work, search the weighting. */ while (wlow < whigh - 1) { RegAllocatorClear(p, rblock); wmid = wlow + (whigh - wlow) / 2; RegAllocWeight = wmid; SaveMask = 0; ExtraStackSpace = 0; ExtraStackBase = (bytes + CPOINTER_ALIGN) & ~CPOINTER_ALIGN; RegAllocatorScan(p, rblock); if (SaveMask & ~X86_REGF_GOOD) { /* * Too many registers were allocated, increase * weight requirement. */ wlow = wmid; } else { /* * Not enough registers were allocated, increase * weight requirement. */ whigh = wmid; } } if (SaveMask & ~X86_REGF_GOOD) { RegAllocatorClear(p, rblock); ++RegAllocWeight; SaveMask = 0; ExtraStackSpace = 0; ExtraStackBase = (bytes + CPOINTER_ALIGN) & ~CPOINTER_ALIGN; RegAllocatorScan(p, rblock); } if (SaveMask & ~X86_REGF_GOOD) fprintf(stderr, "UseWeight %d - FAILED (complexity %d)\n", RegAllocWeight, p->pcomplexity); else fprintf(stderr, "UseWeight %d - Success\n", RegAllocWeight); fflush(stderr); } static void RegAllocatorScan(RASParser *p, rblock_t *rblock) { rinsn_t *rin; uint64_t incmask = 0; int pass; if (rblock->flags & RBLKF_REGALLOCATOR) return; rblock->flags |= RBLKF_REGALLOCATOR; for (pass = 1; pass < 2; ++pass) { RUNE_FOREACH(rin, &rblock->rinsn_list, node) { if (rin->op == INSN_LABEL) continue; /* * Note that as we are post-merge, some of these EAs * may be the same. */ incmask |= rin->regused_init; regAllocEA(p, rin, &rin->arg1, &rin->arg1d, pass); regAllocEA(p, rin, &rin->arg2, &rin->arg2d, pass); regAllocEA(p, rin, &rin->arg3, &rin->arg3d, pass); regAllocEA(p, rin, &rin->arg4, &rin->arg4d, pass); } if (rblock->btrue) { RegAllocatorScan(p, rblock->btrue); if (rblock->bfalse) RegAllocatorScan(p, rblock->bfalse); } } SaveMask |= incmask; } /* * Allocate a register for an rea, adjust the rea's span to account for * the register. * * Generally speaking we have a 64-bit mask of which, really, only 16 * general registers are mapped. Remaining bits are used to help * calculate overflow. If we fill up all 64-bits we will start allocating * register %rats64. */ static void regAllocEA(RASParser *p, rinsn_t *rin, rspan_t *arg, rspan_t *argd, int pass) { int special_case; uint64_t agg; rea_t *rea; rea = arg->rea; if (rea == NULL) return; if (rea->direct) regAllocEA(p, rin, argd, NULL, pass); dassert(rin == arg->rin); if ((rea->flags & REAF_CACHEABLE) == 0) return; dassert(rea->refs == 1 || arg->root); /* * Already allocated (SaveMask also already set). Happens for many * reasons including rea's representing the same object being shared. */ if (rea->flags & REAF_REGALLOCD) return; /* * If the instruction is being dropped adjust the masks just so the * deallocator is happy (the bit is most likely just going to get * cleared again), but do NOT set the register in SaveMask yet as * we are not actually going to use it in this insn. * * This allows someone else to use this register and still span * this instruction. * * XXX It would be even better if the BlockReach*() code could * chain the drops upward and potentially drop even more * instructions, but DROPME is set later in the BlockCollapse() * so at the moment this is the best we can do. Regardless, * this is still 100% efficient IF this was the only instruction * using this cacheable EA. */ if (rin->flags & RINSF_DROPME) return; /* * We don't need to reallocate if we already allocated it or * if the register was statically assigned. */ if (rea->target_reg) /* statically allocated */ return; /* * Check EA weight, do not allocate register if too low. * * Distinguish between direct and indirect registers. This is a bit * confusing. For EA_DIRECT, if REAF_DIRECT is set we are pushed * into a rea->direct rea and working on an indirect register. If * not set, then we are working on a direct register. * * Generally speaking, we must allocate a real register for indirect * registers. */ special_case = 0; if (rea->regweight < RegAllocWeight || (rea->flags & REAF_ADDRUSED)) { if (rea->orig_eamode == EA_MEMORY) return; if (rea->orig_eamode == EA_DIRECT && (rea->flags & REAF_DIRECT) == 0) { runesize_t bytes; if (rea->regno & REGF_PTR) { bytes = CPOINTER_SIZE; } else { switch(rin->ext1) { case REXT_I8: bytes = 1; break; case REXT_I16: bytes = 2; break; case REXT_I32: bytes = 4; break; case REXT_I64: bytes = 8; break; case REXT_I128: bytes = 16; break; case REXT_F32: bytes = 4; break; case REXT_F64: bytes = 8; break; case REXT_F128: bytes = 16; break; default: dpanic("Unknown REXT %d", rin->ext1); bytes = 1; break; } } ExtraStackSpace = (ExtraStackSpace + bytes - 1) & ~(bytes - 1); rea->eamode = EA_MEMORY; rea->offset = ExtraStackBase + ExtraStackSpace; rea->orig_offset = rea->offset; rea->target_reg = X86_REG_RSP; ExtraStackSpace += bytes; rea->flags |= REAF_REGALLOCD; return; } special_case = 1; } /* * If allocating. Once allocated the target_reg remains intact * through potentially many shared references and passes. Keep * in mind that many instructions may share this rea. */ switch(rea->eamode) { case EA_IMMEDIATE: break; case EA_MEMORY: if (rea->regno == 0) break; agg = regAllocSpan(rin, arg, 0, special_case); agg |= X86_REGF_RSP | X86_REGF_RIP; if (arg->flags & RAF_FLOAT) agg |= X86_REGF_IMASK; else agg |= X86_REGF_FMASK; rea->target_reg = findfreebit(0, agg); #if 0 if ((1LLU << rea->target_reg) & ~X86_REGF_GOOD) printf("MEMALLOCFAIL-%d %016jx [%016jx] @%s[%p]\n", special_case, agg, regAllocSpan(rin, arg, 0, special_case), rin->opname, rea); #endif dassert(rea->regno == REG_FP || rea->target_reg != (X86_REG_RSP & X86_REG_MASK)); SaveMask |= 1LLU << rea->target_reg; rea->target_reg |= X86_SIZE_UNSPEC; regAllocSpan(rin, arg, rea->target_reg, special_case); rea->eamode = EA_DIRECT; rea->flags |= REAF_REGALLOCD; break; case EA_DIRECT: dassert(rea->regno); agg = regAllocSpan(rin, arg, 0, special_case); agg |= X86_REGF_RSP | X86_REGF_RIP; if (arg->flags & RAF_FLOAT) agg |= X86_REGF_IMASK; else agg |= X86_REGF_FMASK; rea->target_reg = findfreebit(rea->regno, agg); #if 0 if ((1LLU << rea->target_reg) & ~X86_REGF_GOOD) printf("DIRALLOCFAIL-%d %016jx [%016jx] @%s[%p]\n", special_case, agg, regAllocSpan(rin, arg, 0, special_case), rin->opname, rea); #endif dassert(rea->regno == REG_FP || rea->target_reg != (X86_REG_RSP & X86_REG_MASK)); SaveMask |= 1LLU << rea->target_reg; rea->target_reg |= X86_SIZE_UNSPEC; regAllocSpan(rin, arg, rea->target_reg, special_case); rea->eamode = EA_DIRECT; rea->flags |= REAF_REGALLOCD; break; } } static uint64_t regAllocSpanRecurse(rspan_t *skel, uint64_t mask, uint64_t bit, uint16_t target_reg, int special_case); static uint64_t regAllocSpan(rinsn_t *rin, rspan_t *span, uint16_t target_reg, int special_case) { uint64_t mask; uint64_t bit; bit = 1LLU << (target_reg & X86_REG_MASK); if (special_case) mask = rin->regused_init & ~X86_REGF_SPECIAL_SAVE; else mask = rin->regused_init; mask |= rin->regused_agg; if (target_reg) { rin->regused_agg |= bit; if (special_case && (bit & rin->regused_init & X86_REGF_SPECIAL_SAVE)) { rin->special_save |= bit; } } if (span->root) { mask = regAllocSpanRecurse(span->root, mask, bit, target_reg, special_case); SpanClear(span->root); } return mask; } static uint64_t regAllocSpanRecurse(rspan_t *skel, uint64_t mask, uint64_t bit, uint16_t target_reg, int special_case) { rinsn_t *rin; rspan_t *span; if (skel->flags & RAF_SPAN) /* already scanned */ return (mask); skel->flags |= RAF_SPAN; span = skel->link; if (skel->flags & RAF_SKELETON) { if (span) rin = span->rin; else rin = NULL; } else { rin = RUNE_FIRST(&skel->block->rinsn_list); } while (rin) { /* * Incorporate bit */ if (special_case) mask |= rin->regused_init & ~X86_REGF_SPECIAL_SAVE; else mask |= rin->regused_init; mask |= rin->regused_agg; if (target_reg) { rin->regused_agg |= bit; if (special_case && (bit & rin->regused_init & X86_REGF_SPECIAL_SAVE)) { rin->special_save |= bit; } } /* * Check span for rin iteration. * * If there are no more instructions in this block using our * variable, check our block linkages. If all block linkages * set RAF_SKELETON then the variable is not used any more and * we can stop. */ while (span && span->rin == rin) span = span->link; if (span == NULL) { if ((skel->strue == NULL || (skel->strue->flags & RAF_SKELETON)) && (skel->sfalse == NULL || (skel->sfalse->flags & RAF_SKELETON))) { break; } } rin = RUNE_NEXT(rin, node); } /* * Recurse skeleton */ if (skel->strue) mask = regAllocSpanRecurse(skel->strue, mask, bit, target_reg, special_case); if (skel->sfalse) mask = regAllocSpanRecurse(skel->sfalse, mask, bit, target_reg, special_case); return mask; } static void RegAllocatorClear(RASParser *p, rblock_t *rblock) { rinsn_t *rin; if ((rblock->flags & RBLKF_REGALLOCATOR) == 0) return; rblock->flags &= ~RBLKF_REGALLOCATOR; RUNE_FOREACH(rin, &rblock->rinsn_list, node) { rin->regused_agg = 0; if (rin->op == INSN_LABEL) continue; rin->special_save = 0; if (rin->arg1.rea) regClearEA(p, &rin->arg1, &rin->arg1d); if (rin->arg2.rea) regClearEA(p, &rin->arg2, &rin->arg2d); if (rin->arg3.rea) regClearEA(p, &rin->arg3, &rin->arg3d); if (rin->arg4.rea) regClearEA(p, &rin->arg4, &rin->arg4d); if (rin->brtrue) { dassert(RUNE_NEXT(rin, node) == NULL); RegAllocatorClear(p, rin->brtrue->label_block); if (rin->brfalse) RegAllocatorClear(p, rin->brfalse->label_block); } } } static void regClearEA(RASParser *p, rspan_t *span, rspan_t *spand) { rea_t *rea = span->rea; if (rea->direct) { dassert(rea->direct == spand->rea); regClearEA(p, spand, NULL); } if (rea->flags & REAF_REGALLOCD) { rea->target_reg = 0; rea->eamode = rea->orig_eamode; rea->regno = rea->orig_regno; rea->offset = rea->orig_offset; rea->flags &= ~REAF_REGALLOCD; } } /************************************************************************ * HELPER FUNCTIONS * ************************************************************************ * */ static int findfreebit(uint32_t regno, uint64_t mask) { static int lookup[16] = { /* 0 */ 0, /* 1 */ 1, /* 2 */ 0, /* 3 */ 2, /* 4 */ 0, /* 5 */ 1, /* 6 */ 0, /* 7 */ 3, /* 8 */ 0, /* 9 */ 1, /* A */ 0, /* B */ 2, /* C */ 0, /* D */ 1, /* E */ 0, /* F */ 4 }; /* * Prefer certain registers, disallow others. */ if (regno) { switch(regno) { case REG_PC: return (X86_REG_RIP & X86_REG_MASK); case REG_FP: return (X86_REG_RSP & X86_REG_MASK); case REG_AP: if (mask & (1LLU << (X86_REG_RSI & X86_REG_MASK))) break; return (X86_REG_RSI & X86_REG_MASK); break; case REG_RP: if (mask & (1LLU << (X86_REG_RDI & X86_REG_MASK))) break; return (X86_REG_RDI & X86_REG_MASK); break; case REG_SG: if (mask & (1LLU << (X86_REG_RDX & X86_REG_MASK))) break; return (X86_REG_RDX & X86_REG_MASK); break; } } /* * General bit find */ if ((mask & 0x00000000FFFFFFFFLLU) == 0x00000000FFFFFFFFLLU) return (findfreebit(0, mask >> 32) + 32); if ((mask & 0x000000000000FFFFLLU) == 0x000000000000FFFFLLU) return (findfreebit(0, mask >> 16) + 16); if ((mask & 0x00000000000000FFLLU) == 0x00000000000000FFLLU) return (findfreebit(0, mask >> 8) + 8); if ((mask & 0x000000000000000FLLU) == 0x000000000000000FLLU) return (findfreebit(0, mask >> 4) + 4); return (lookup[(int)mask & 15]); } #if 0 static void clearregbit(uint64_t *mask, rea_t *rea) { if (rea->target_reg) *mask &= ~(1LLU << (rea->target_reg & X86_REG_MASK)); } #endif static const char * x86ext(char c, uint8_t argflags) { if (argflags & RAF_FLOAT) c |= REXTF_FLOAT; switch(c) { case REXT_I8: return("b"); case REXT_I16: return("w"); case REXT_I32: return("l"); case REXT_I64: return("q"); case REXT_F32: return("ss"); case REXT_F64: return("sd"); case REXT_F128: return("aps"); default: return("?"); } } static const char * x86branch(uint32_t op, int invert, int reverse) { if (invert == 0) { if (reverse) { switch(op & 0x0F00) { case 0x0000: /* EQ */ return("e"); case 0x0100: /* NE */ return("ne"); case 0x0200: /* UGT */ return("a"); case 0x0300: /* UGE */ return("nb"); case 0x0400: /* ULT */ return("b"); case 0x0500: /* ULE */ return("be"); case 0x0600: /* SGT */ return("g"); case 0x0700: /* SGE */ return("ge"); case 0x0800: /* SLT */ return("l"); case 0x0900: /* SLE */ return("le"); default: return("??"); } } else { switch(op & 0x0F00) { case 0x0000: /* EQ */ return("e"); case 0x0100: /* NE */ return("ne"); case 0x0200: /* UGT */ return("b"); case 0x0300: /* UGE */ return("be"); case 0x0400: /* ULT */ return("a"); case 0x0500: /* ULE */ return("nb"); case 0x0600: /* SGT */ return("l"); case 0x0700: /* SGE */ return("le"); case 0x0800: /* SLT */ return("g"); case 0x0900: /* SLE */ return("ge"); default: return("??"); } } } else { if (reverse) { switch(op & 0x0F00) { case 0x0000: /* EQ */ return("ne"); case 0x0100: /* NE */ return("e"); case 0x0200: /* UGT */ return("be"); case 0x0300: /* UGE */ return("b"); case 0x0400: /* ULT */ return("nb"); case 0x0500: /* ULE */ return("a"); case 0x0600: /* SGT */ return("le"); case 0x0700: /* SGE */ return("l"); case 0x0800: /* SLT */ return("ge"); case 0x0900: /* SLE */ return("g"); default: return("??"); } } else { switch(op & 0x0F00) { case 0x0000: /* EQ */ return("ne"); case 0x0100: /* NE */ return("e"); case 0x0200: /* UGT */ return("nb"); case 0x0300: /* UGE */ return("a"); case 0x0400: /* ULT */ return("be"); case 0x0500: /* ULE */ return("b"); case 0x0600: /* SGT */ return("ge"); case 0x0700: /* SGE */ return("g"); case 0x0800: /* SLT */ return("le"); case 0x0900: /* SLE */ return("l"); default: return("??"); } } } } static int sameEA(rea_t *rea1, rea_t *rea2) { if (rea1->eamode == EA_DIRECT && rea2->eamode == EA_DIRECT) { dassert(rea1->target_reg); dassert(rea2->target_reg); } /* * NOTE: No need to test cache_id at this point. */ if (rea1->regno == rea2->regno && rea1->target_reg == rea2->target_reg && rea1->offset == rea2->offset && rea1->sym == rea2->sym && rea1->immlo == rea2->immlo && rea1->immhi == rea2->immhi) return 1; else return 0; } static int adjacentLabel(rinsn_t *rin, rsym_t *sym) { rinsn_t *scan; rblock_t *block; scan = RUNE_NEXT(rin, node); for (;;) { while (scan) { if (scan->op != INSN_LABEL) return 0; if (sym == scan->label) return 1; scan = RUNE_NEXT(scan, node); } block = RUNE_NEXT(rin->block, node); if (block == NULL) break; rin = scan = RUNE_FIRST(&block->rinsn_list); } return 0; } static rinsn_t * allocInsnBlock(RASParser *p, rblock_t *rblock, uint32_t op, uint8_t ext, int args) { rinsn_t *rin; rea_t *rea; int i; rin = zalloc(sizeof(*rin)); rin->op = op; rin->operands = args; rin->ext1 = ext; for (i = 0; i < args; ++i) { rea = zalloc(sizeof(*rea)); rea->refs = 1; switch(i) { case 0: rin->arg1.rea = rea; break; case 1: rin->arg2.rea = rea; break; case 2: rin->arg3.rea = rea; break; case 3: rin->arg4.rea = rea; break; default: dpanic("Too many EAs for allocInsnBlock()"); break; } } switch(op) { case INSN_MOVE: rin->opname = "MOVE"; rin->func = InsnMOVE; break; default: dpanic("Unsupported insn for allocInsnBlock()"); break; } /* rin->block = rblock; handled by block scan */ InsnTargetAdjust(p, rin); RasBlockAdd(rblock, rin); return rin; } /* * Initialize an rea, make it cacheable if EA_DIRECT+reg or EA_IMMEDIATE* */ static void initEA(rea_t *rea, uint8_t eamode, uint32_t regno, uint16_t target_reg) { rea->eamode = eamode; rea->regno = regno; rea->target_reg = target_reg; rea->orig_eamode = eamode; rea->orig_regno = regno; if (target_reg == 0 && regno && eamode == EA_DIRECT) rea->flags |= REAF_CACHEABLE; if (eamode == EA_IMMEDIATE || eamode == EA_IMMEDIATE16) rea->flags |= REAF_CACHEABLE; }