4 * (c)Copyright 2016, Matthew Dillon, All Rights Reserved. See the COPYRIGHT
5 * file at the base of the distribution.
7 * Emit the instruction as x86 assembly. If the EA/argument form is not
8 * compatible we make it compatible.
14 static int doHandleImmediate(rea_t *srea, rea_t *drea, uint8_t extov);
15 static void doInsnRM2NoReadDst(RASParser *p, rinsn_t *rin, const char *x86op,
17 static void doInsnMR2NoReadDst(RASParser *p, rinsn_t *rin, const char *x86op,
19 static void doInsnRMMR3(RASParser *p, rinsn_t *rin, const char *x86op);
20 static void doInsnMR3(RASParser *p, rinsn_t *rin, const char *x86op, int nodrd);
21 static void doInsn1R(RASParser *p, rinsn_t *rin, const char *x86op1,
22 const char *x86op2, const char *x86opt2arg);
23 static void doInsnSHIFT3(RASParser *p, rinsn_t *rin, const char *x86op);
24 static void doInsnDXAX3(RASParser *p, rinsn_t *rin, const char *x86op,
25 int issigned, int dxresult);
26 static int doInsnFloating3(RASParser *p, rinsn_t *rin, const char *x86op,
28 static void doMOVE(RASParser *p, rinsn_t *rin, rea_t *sea, rea_t *dea);
29 static void doMOVEExt(RASParser *p, rinsn_t *rin, rea_t *sea, rea_t *dea,
31 static void doLEA(RASParser *p, rea_t *sea, rea_t *dea);
32 static void doSpecialSave(rinsn_t *rin);
33 static void doSpecialRestore(rinsn_t *rin);
34 static int InsnSimplifyRMMRCMP(RASParser *p, rinsn_t *rin,
35 rea_t **sea1p, rea_t **sea2p);
36 static int InsnSimplifyMRFCMP(RASParser *p, rinsn_t *rin,
37 rea_t **sea1p, rea_t **sea2p);
38 static rea_t *InsnSimplifyFP128(RASParser *p, rinsn_t *rin,
39 rea_t *sea, rea_t *tea, uint8_t ext,
40 int rspoff, int loadme);
42 static void printINSN(rinsn_t *rin, const char *x86op, rea_t *sea,
43 rea_t *dea, uint8_t extov);
44 static void printEA(rea_t *rea, uint8_t extov);
45 static void printREGTGT(uint16_t target_reg, uint32_t regno, uint8_t ext);
46 static void printREG(rea_t *rea, uint8_t ext);
47 static void printBRLabel(rsym_t *label);
48 static const char *x86ext(char c, uint8_t argflags);
49 static int findfreebit(uint32_t regno, uint64_t mask);
51 static void clearregbit(uint64_t *mask, rea_t *rea);
53 static const char *x86branch(uint32_t op, int invert, int reverse);
54 static int sameEA(rea_t *rea1, rea_t *rea2);
55 static int adjacentLabel(rinsn_t *rin, rsym_t *sym);
56 static rinsn_t *allocInsnBlock(RASParser *p, rblock_t *rblock,
57 uint32_t op, uint8_t ext, int args);
58 static void initEA(rea_t *rea, uint8_t eamode,
59 uint32_t regno, uint16_t target_reg);
60 static void RegAllocatorX86(RASParser *p, rblock_t *rblock,
61 urunesize_t stacksize);
62 static void RegAllocatorScan(RASParser *p, rblock_t *rblock);
63 static void RegAllocatorClear(RASParser *p, rblock_t *rblock);
65 static uint8_t ExtSize;
67 static uint64_t SaveMask;
68 static int RegAllocWeight;
69 static urunesize_t ExtraStackBase;
70 static urunesize_t ExtraStackSpace;
76 if (sea->eamode == EA_IMMEDIATE &&
77 (sea->immlo < (int)0x80000000 ||
78 sea->immlo > (int)0x7FFFFFFF)) {
86 * Global initialization (occurs after initial symbol load)
91 ExtSize = (sizeof(urunesize_t) == 4) ? REXT_I32 : REXT_I64;
92 X86Size = x86ext(ExtSize, 0)[0];
93 RegAllocator = RegAllocatorX86;
97 * Target-specific adjustments for this assembly emitter. Called during
98 * initial parsing, prior to any characterization or optimization.
100 * Specify registers which get clobbered by certain instructions. Note that
101 * most procedure calls will clobber a *lot* of registers, including all the
102 * %xmm* registers (if we want to be compatible with C anyhow).
104 * Our register optimizer is capable of saving/restoring some scratch
105 * registers around procedure calls, see X86_REGF_SPECIAL_SAVE.
108 InsnTargetAdjust(RASParser *p __unused, rinsn_t *rin)
117 * This is a bit of a brute-force approach, but the registerizer has to
118 * know before we actually start emitting instructions.
120 if (rin->op & INSNF_FLOAT) {
122 * FP instructions may need to use XMM0 and/or XMM1 as temporaries to
123 * fix incompatible EAs. Generally speaking all operands for 128-bit
124 * FP instructions set ADDRUSED to try to prevent caching in %xmm
125 * registers because we have to use memory ops for 80/128-bit FP
128 * We have a bit of sophistication here to allow conditionals with
129 * boolean results to cache the boolean result.
131 raf = X86_REGF_XMM0 | X86_REGF_XMM1;
132 if (rin->ext1 == REXT_I128) {
133 rea1 = rin->arg1.rea;
134 rea2 = rin->arg2.rea;
135 rea3 = rin->arg3.rea;
137 rea1->flags |= REAF_ADDRUSED;
138 /* this messes up matching */
139 /* rea1->flags &= ~REAF_CACHEABLE; */
141 if (rin->op & INSNF_COND) {
142 if (rin->operands > 2 ||
143 (rin->operands == 2 &&
144 (rin->flags & RINSF_BRANCH))) {
145 rea2->flags |= REAF_ADDRUSED;
146 /* this messes up matching */
147 /* rea2->flags &= ~REAF_CACHEABLE; */
149 if (rin->operands > 3 ||
150 (rin->operands == 3 &&
151 (rin->flags & RINSF_BRANCH))) {
152 rea3->flags |= REAF_ADDRUSED;
153 /* this messes up matching */
154 /* rea3->flags &= ~REAF_CACHEABLE; */
158 rea2->flags |= REAF_ADDRUSED;
159 /* this messes up matching */
160 /* rea2->flags &= ~REAF_CACHEABLE; */
163 rea3->flags |= REAF_ADDRUSED;
164 /* this messes up matching */
165 /* rea3->flags &= ~REAF_CACHEABLE; */
168 rea4 = rin->arg4.rea;
170 rea4->flags |= REAF_ADDRUSED;
171 /* rea4->flags &= ~REAF_CACHEABLE; */
176 * Integer instructions might need to use RCX as a temporary to fix
177 * incompatible EAs. Other requirements are switched on below.
188 raf |= X86_REGF_RCX | X86_REGF_RDI;
194 raf |= X86_REGF_RCX | X86_REGF_RSI | X86_REGF_RDI;
203 * We may need/clobber these regs, depending.
205 raf |= X86_REGF_RAX | X86_REGF_RDX;
209 * System interface call
212 raf |= X86_SYSF_ARG1 | X86_SYSF_ARG2 | X86_SYSF_ARG3;
213 raf |= X86_REGF_CALLSCR_SYS;
217 raf |= X86_RUNTIMEF_ARG1 | X86_RUNTIMEF_ARG2 |
219 raf |= X86_REGF_CALLSCR_RUNTIME;
224 * Call arguments %ap:RSI %rp:RDI [%sg:RDX] (linux/bsd) Call
225 * arguments %ap:RCX %rp:RDX [%sg:R8] (ms)
228 raf |= X86_RUNEF_ARG1 | X86_RUNEF_ARG2 | X86_RUNEF_ARG3;
229 raf |= X86_REGF_CALLSCR_RUNE;
234 raf |= X86_REGF_RAX | X86_REGF_RCX;
238 * BCHECK only calls the BoundsTrap on compare failure, which does
239 * not return. Do not save/restore registers.
243 raf |= X86_REGF_CALLSCR_RUNTIME;
266 raf |= X86_REGF_CALLSCR_RUNTIME;
271 raf |= X86_REGF_CALLSCR_RUNTIME;
279 * Misc FP conversions might use these three registers.
281 raf = X86_REGF_XMM0 | X86_REGF_XMM1 | X86_REGF_RCX;
286 rin->regused_init |= raf;
290 * We need to formally add some instructions to the basic block to access the
291 * procedure arguments (%ap, %rp, and potentially %sg). This allows us to
292 * use the register allocator to optimize which regs they go into.
294 * These instructions are set up to make sure that the register allocator
295 * does not blow away the arguments before we've had a chance to load them
296 * into the proper registers. We must also ensure that the source and
297 * destination EAs never match which we do by setting cache_id.
300 InsnProcedureBasicBlock(RASParser * p, rblock_t * rblock,
301 urunesize_t bytes __unused, urunesize_t align __unused)
305 rin = allocInsnBlock(p, rblock, INSN_MOVE, ExtSize, 2);
306 initEA(rin->arg1.rea, EA_DIRECT, REG_AP, X86_RUNE_ARG1);
307 initEA(rin->arg2.rea, EA_DIRECT, REG_AP, 0);
308 rin->arg2.flags |= RAF_WRITE;
309 rin->arg1.rea->cache_id = 1;
310 rin->regused_init |= X86_RUNEF_ARG2;
311 rin->regused_init |= X86_RUNEF_ARG3;
313 rin = allocInsnBlock(p, rblock, INSN_MOVE, ExtSize, 2);
314 initEA(rin->arg1.rea, EA_DIRECT, REG_RP, X86_RUNE_ARG2);
315 initEA(rin->arg2.rea, EA_DIRECT, REG_RP, 0);
316 rin->arg2.flags |= RAF_WRITE;
317 rin->arg1.rea->cache_id = 1;
318 rin->regused_init |= X86_RUNEF_ARG3;
320 rin = allocInsnBlock(p, rblock, INSN_MOVE, ExtSize, 2);
321 initEA(rin->arg1.rea, EA_DIRECT, REG_SG, X86_RUNE_ARG3);
322 initEA(rin->arg2.rea, EA_DIRECT, REG_SG, 0);
323 rin->arg2.flags |= RAF_WRITE;
324 rin->arg1.rea->cache_id = 1;
331 InsnProcedureStart(RASParser *p, urunesize_t bytes, urunesize_t align)
334 rsym_t *psym = p->psym;
335 int count; /* total bytes pushed on stack */
336 int fcount; /* floating point subsection */
340 printf("\t# PROC %s, %jd, %jd\n",
346 printf("\t.globl\t%s\n", psym->id + 1); /* skip the '@' */
347 printf("\t.type\t%s, @function\n", psym->id + 1);
348 printf("%s:\n", psym->id + 1);
349 printf("\t.cfi_startproc\n");
352 for (i = 0; i < (X86_REG_XMM0 & ~X86_SIZE_UNSPEC); ++i) {
353 if ((SaveMask & (1LLU << i)) == 0)
355 if (X86_REGF_CALLSAVE_RUNE & (1LLU << i)) {
356 printf("\tpush%c\t", X86Size);
357 printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize);
359 count += sizeof(void *);
360 /* do not bump bytes, not included */
365 for (i = (X86_REG_XMM0 & ~X86_SIZE_UNSPEC); i < 64; ++i) {
366 if ((SaveMask & (1LLU << i)) == 0)
368 if (X86_REGF_CALLSAVE_RUNE & (1LLU << i))
371 bytes += fcount * 16; /* (temporary) */
372 if (fcount && align < 16)
376 * We may need extra stack space for ABI calls
378 if (ExtraStackSpace) {
381 bytes = (bytes + 15) & ~15;
382 bytes += ExtraStackSpace;
392 bytes = (bytes + align - 1) & ~(align - 1);
395 * 16-byte align our pushes + %rip to ensure that the resulting stack
396 * pointer is fully aligned.
398 count += sizeof(void *); /* include %rip */
399 pad = ((count + 15) & ~15) - count; /* 16-byte align */
402 xrsp.target_reg = X86_REG_RSP;
403 xrsp.eamode = EA_DIRECT;
404 printf("\tsub%c\t$%jd, ", X86Size, (intmax_t) bytes);
405 printEA(&xrsp, ExtSize);
408 bytes -= pad; /* undo */
415 * Save FP registers past the nominal stack and extra space.
417 bytes -= fcount * 16; /* undo */
419 for (i = (X86_REG_XMM0 & ~X86_SIZE_UNSPEC); i < 64; ++i) {
420 if ((SaveMask & (1LLU << i)) == 0)
422 if (X86_REGF_CALLSAVE_RUNE & (1LLU << i)) {
423 printf("\tmovaps\t");
424 printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize);
425 printf(", %zd(%%rsp)\n", bytes + fcount *16);
431 * count is the whole frame
433 * ProcStackSize is just the base variable and extra space, and does not
434 * count the register save area, pad, or %rip.
436 printf("\t.cfi_def_cfa_offset\t%jd\n", (intmax_t) (count));
437 ProcStackSize = (ssize_t)bytes;
441 InsnProcedureEnd(RASParser *p __unused,
442 urunesize_t bytes, urunesize_t align __unused)
449 * Restore FP regs after the variable space
452 for (i = 32; i < 64; ++i) {
453 if ((SaveMask & (1LLU << i)) == 0)
455 if (X86_REGF_CALLSAVE_RUNE & (1LLU << i)) {
456 printf("\tmovaps\t");
457 printf("%zd(%%rsp), ", ProcStackSize + fcount *16);
458 printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize);
465 * Include the FP and %rip save space in the %rsp restore
467 bytes = ProcStackSize + fcount * 16 + ProcStackPad;
469 xrsp.target_reg = X86_REG_RSP;
470 xrsp.eamode = EA_DIRECT;
471 printf("\tadd%c\t$%zd, ", X86Size, bytes);
472 printEA(&xrsp, ExtSize);
477 * Use pop for normal registers
479 for (i = 31; i >= 0; --i) {
480 if ((SaveMask & (1LLU << i)) == 0)
482 if (X86_REGF_CALLSAVE_RUNE & (1LLU << i)) {
483 printf("\tpop%c\t", X86Size);
484 printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize);
489 printf("\tret%c\n", X86Size);
490 printf("\t.cfi_endproc\n");
494 InsnLABEL(RASParser *p __unused, rinsn_t *rin)
496 printf(".L%d%s:\n", ProcNo, rin->label->id);
500 InsnMOVE(RASParser *p, rinsn_t *rin)
502 if (rin->arg1.rea->eamode == EA_MEMORY &&
503 rin->arg2.rea->eamode == EA_MEMORY &&
504 rin->ext1 == REXT_I128) {
505 rin->arg1.flags |= RAF_FLOAT;
506 rin->arg2.flags |= RAF_FLOAT;
508 if (rin->arg1.rea->eamode == EA_MEMORY)
509 doInsnMR2NoReadDst(p, rin, "mov", 0);
511 doInsnRM2NoReadDst(p, rin, "mov", 0);
515 InsnADD(RASParser *p, rinsn_t *rin)
517 doInsnRMMR3(p, rin, "add");
521 InsnSUB(RASParser *p, rinsn_t *rin)
523 doInsnRMMR3(p, rin, "sub");
527 InsnAND(RASParser *p, rinsn_t *rin)
529 doInsnRMMR3(p, rin, "and");
533 InsnOR(RASParser *p, rinsn_t *rin)
535 doInsnRMMR3(p, rin, "or");
539 InsnXOR(RASParser *p, rinsn_t *rin)
541 doInsnRMMR3(p, rin, "xor");
545 InsnNOT(RASParser *p, rinsn_t *rin)
549 if (rin->operands == 1 || rin->ext1 == rin->ext2) {
550 if (rin->ext1 == REXT_I8) {
551 printf("\tcmp%s\t$0, ", x86ext(rin->ext1, 0));
552 printEA(rin->arg1.rea, rin->ext1);
555 if (rin->operands == 1)
556 printEA(rin->arg1.rea, rin->ext1);
558 printEA(rin->arg2.rea, rin->ext1);
560 xrcx.target_reg = X86_REG_RCX;
561 xrcx.eamode = EA_DIRECT;
562 printf("\txorl\t%%ecx, %%ecx\n");
563 printf("\tcmp%s\t$0, ", x86ext(rin->ext1, 0));
564 printEA(rin->arg1.rea, rin->ext1);
566 printf("\tsete\t%%cl\n");
567 if (rin->operands == 1)
568 doMOVE(p, rin, &xrcx, rin->arg1.rea);
570 doMOVE(p, rin, &xrcx, rin->arg2.rea);
574 dassert(rin->operands == 2);
575 printf("\tcmp%s\t$0, ", x86ext(rin->ext1, 0));
576 printEA(rin->arg1.rea, rin->ext1);
579 printEA(rin->arg2.rea, rin->ext2);
585 InsnCOM(RASParser *p, rinsn_t *rin)
587 doInsn1R(p, rin, "not", "xor", "$-1");
591 InsnNEG(RASParser *p, rinsn_t *rin)
593 doInsn1R(p, rin, "neg", NULL, NULL);
597 InsnPOS(RASParser *p, rinsn_t *rin)
603 InsnASL(RASParser *p, rinsn_t *rin)
605 doInsnSHIFT3(p, rin, "sal");
609 InsnASR(RASParser *p, rinsn_t *rin)
611 doInsnSHIFT3(p, rin, "sar");
615 InsnLSR(RASParser *p, rinsn_t *rin)
617 doInsnSHIFT3(p, rin, "shr");
621 InsnADDC(RASParser *p __unused, rinsn_t *rin __unused)
623 dpanic("ADDC not supported");
627 InsnSUBC(RASParser *p __unused, rinsn_t *rin __unused)
629 dpanic("SUBC not supported");
634 InsnMULU(RASParser *p, rinsn_t *rin)
636 doInsnDXAX3(p, rin, "mul", 0, 0);
637 /* doInsnMR3(p, rin, "imul"); *//* imul work for signed? */
641 InsnMULS(RASParser *p, rinsn_t *rin)
643 doInsnMR3(p, rin, "imul", 0);
647 * WARNING: Intermediate value in %rdx:%rax
650 InsnDIVU(RASParser *p, rinsn_t *rin)
652 doInsnDXAX3(p, rin, "div", 0, 0);
656 * WARNING: Intermediate value in %rdx:%rax
659 InsnDIVS(RASParser *p, rinsn_t *rin)
661 doInsnDXAX3(p, rin, "idiv", 1, 0);
665 InsnMODU(RASParser *p, rinsn_t *rin)
667 doInsnDXAX3(p, rin, "div", 0, 1); /* result in %*dx */
671 InsnMODS(RASParser *p, rinsn_t *rin)
673 doInsnDXAX3(p, rin, "idiv", 1, 1); /* result in %*dx */
678 InsnINC(RASParser *p, rinsn_t *rin)
680 doInsn1R(p, rin, "inc", "add", "$1");
684 InsnDEC(RASParser *p, rinsn_t *rin)
686 doInsn1R(p, rin, "dec", "sub", "$1");
691 InsnCMP(RASParser *p, rinsn_t *rin)
700 rev = InsnSimplifyRMMRCMP(p, rin, &sea1, &sea2);
701 printf("\tcmp%s\t", x86ext(rin->ext1, 0));
702 printEA(sea1, rin->ext1);
704 printEA(sea2, rin->ext1);
708 if (rin->flags & RINSF_BRANCH) {
710 * Compare and branch. Invert sense if we can optimize the jmp.
712 if (adjacentLabel(rin, rin->brtrue)) {
714 * next insn matches true path, invert and branch on-false.
716 printf("\tj%s\t", x86branch(rin->op, 1, rev));
717 printBRLabel(rin->brfalse);
719 } else if (adjacentLabel(rin, rin->brfalse)) {
721 * next insn matches false path, do not invert and branch
724 printf("\tj%s\t", x86branch(rin->op, 0, rev));
725 printBRLabel(rin->brtrue);
729 * next insn does not match either path.
731 printf("\tj%s\t", x86branch(rin->op, 0, rev));
732 printBRLabel(rin->brtrue);
735 printBRLabel(rin->brfalse);
740 * Compare and set result
742 printf("\tset%s\t", x86branch(rin->op, 0, rev));
743 printEA(rin->arg3.rea, REXT_I8);
750 InsnMOVEA(RASParser *p, rinsn_t *rin)
756 InsnADDA(RASParser *p, rinsn_t *rin)
762 InsnSUBA(RASParser *p, rinsn_t *rin)
768 InsnADDAU(RASParser *p, rinsn_t *rin)
774 InsnSUBAU(RASParser *p, rinsn_t *rin)
780 InsnSUBAA(RASParser *p, rinsn_t *rin)
786 InsnLEA(RASParser *p, rinsn_t *rin)
788 rea_t *rea1 = rin->arg1.rea;
789 rea_t *rea2 = rin->arg2.rea;
791 if (rea1->eamode != EA_MEMORY) {
792 doInsnRM2NoReadDst(p, rin, "mov", 0);
793 } else if (rea1->eamode == EA_MEMORY && rea1->sym == NULL &&
794 rea1->direct->target_reg && rea1->offset == 0) {
795 doMOVE(p, rin, rea1->direct, rea2);
797 doInsnMR2NoReadDst(p, rin, "lea", ExtSize);
803 * BCOPY.align bytes,sea,dea (non-overlapping guaranteed)
806 InsnBCOPY(RASParser *p, rinsn_t *rin)
812 xrdi.target_reg = X86_REG_RDI;
813 xrdi.eamode = EA_DIRECT;
814 xrsi.target_reg = X86_REG_RSI;
815 xrsi.eamode = EA_DIRECT;
816 xrcx.target_reg = X86_REG_RCX;
817 xrcx.eamode = EA_DIRECT;
820 doMOVEExt(p, rin, rin->arg1.rea, &xrcx, ExtSize);
821 doLEA(p, rin->arg2.rea, &xrsi);
822 doLEA(p, rin->arg3.rea, &xrdi);
828 * BZERO.align bytes,dea
831 InsnBZERO(RASParser *p, rinsn_t *rin)
833 static rea_t xrax __unused;
836 static rea_t xmm0 __unused;
838 urunesize_t value __unused;
840 xrax.target_reg = X86_REG_RAX;
841 xrax.eamode = EA_DIRECT;
842 xrdi.target_reg = X86_REG_RDI;
843 xrdi.eamode = EA_DIRECT;
844 xrcx.target_reg = X86_REG_RCX;
845 xrcx.eamode = EA_DIRECT;
846 xmm0.target_reg = X86_REG_XMM0;
847 xmm0.eamode = EA_DIRECT;
849 if (rin->arg1.rea->eamode == EA_IMMEDIATE)
850 value = rin->arg1.rea->immlo;
854 /* retain zerod register optimizations for mixed BZEROs */
855 p->opt_flags |= p->opt_last & (RASOPT_RAX_ZERO | RASOPT_XMM0_ZERO);
857 rea2 = *rin->arg2.rea;
858 if (rin->ext2 == REXT_I8 && value <= 4) {
859 dassert(value == 1 || rea2.eamode == EA_MEMORY);
860 if (p->opt_last & RASOPT_RAX_ZERO)
861 printf("\t# xorl\t%%eax, %%eax\n");
863 printf("\txorl\t%%eax, %%eax\n");
864 p->opt_flags |= RASOPT_RAX_ZERO;
866 doMOVEExt(p, rin, &xrax, &rea2, rin->ext2);
870 } else if (rin->ext2 == REXT_I16 && (value & 1) == 0 && value <= 8) {
871 dassert(value == 2 || rea2.eamode == EA_MEMORY);
872 if (p->opt_last & RASOPT_RAX_ZERO)
873 printf("\t# xorl\t%%eax, %%eax\n");
875 printf("\txorl\t%%eax, %%eax\n");
876 p->opt_flags |= RASOPT_RAX_ZERO;
878 doMOVEExt(p, rin, &xrax, &rea2, rin->ext2);
882 } else if (rin->ext2 == REXT_I32 && (value & 3) == 0 && value <= 16) {
883 dassert(value == 4 || rea2.eamode == EA_MEMORY);
884 if (p->opt_last & RASOPT_RAX_ZERO)
885 printf("\t# xorl\t%%eax, %%eax\n");
887 printf("\txorl\t%%eax, %%eax\n");
888 p->opt_flags |= RASOPT_RAX_ZERO;
890 doMOVEExt(p, rin, &xrax, &rea2, rin->ext2);
894 } else if (rin->ext2 == REXT_I64 && (value & 7) == 0 && value <= 40 &&
895 ExtSize == REXT_I64) {
896 dassert(value == 8 || rea2.eamode == EA_MEMORY);
897 if (p->opt_last & RASOPT_RAX_ZERO)
898 printf("\t# xorl\t%%eax, %%eax\n");
900 printf("\txorl\t%%eax, %%eax\n");
901 p->opt_flags |= RASOPT_RAX_ZERO;
903 doMOVEExt(p, rin, &xrax, &rea2, rin->ext2);
907 } else if (rin->ext2 == REXT_I128 && (value & 15) == 0 && value <= 80) {
908 dassert(value == 16 || rea2.eamode == EA_MEMORY);
909 if (p->opt_last & RASOPT_XMM0_ZERO)
910 printf("\t# pxor\t%%xmm0, %%xmm0\n");
912 printf("\tpxor\t%%xmm0, %%xmm0\n");
913 p->opt_flags |= RASOPT_XMM0_ZERO;
915 doMOVEExt(p, rin, &xmm0, &rea2,
916 rin->ext2 | REXTF_FLOAT);
922 printf("\txorl\t%%eax, %%eax\n");
923 doMOVEExt(p, rin, rin->arg1.rea, &xrcx, ExtSize);
924 doLEA(p, &rea2, &xrdi);
932 InsnCMPTYPE(RASParser *p, rinsn_t *rin)
938 xarg1.target_reg = X86_RUNTIME_ARG1;
939 xarg1.eamode = EA_DIRECT;
940 xarg2.target_reg = X86_RUNTIME_ARG2;
941 xarg2.eamode = EA_DIRECT;
943 doLEA(p, rin->arg1.rea, &xarg1);
944 doLEA(p, rin->arg2.rea, &xarg2);
946 printf("\tcall\tRuneRunTime_SWCmpType\n");
947 doSpecialRestore(rin);
948 if (rin->flags & RINSF_BRANCH) {
949 xrax.target_reg = X86_REG_RAX;
950 xrax.eamode = EA_DIRECT;
951 printf("\tcmp%c\t$0, ", X86Size);
952 printEA(&xrax, ExtSize);
954 if (adjacentLabel(rin, rin->brtrue)) {
956 * next insn matches true path, invert and branch on-false.
959 printBRLabel(rin->brfalse);
960 } else if (adjacentLabel(rin, rin->brfalse)) {
962 * next insn matches false path, do not invert and branch
966 printBRLabel(rin->brtrue);
969 * next insn does not match either path.
972 printBRLabel(rin->brtrue);
975 printBRLabel(rin->brfalse);
979 xrax.target_reg = X86_REG_RAX;
980 xrax.eamode = EA_DIRECT;
981 doMOVEExt(p, rin, &xrax, rin->arg3.rea, REXT_I8);
987 * Cast unsigned integer to size
990 InsnCASTU(RASParser *p, rinsn_t *rin)
994 if (rin->ext1 == rin->ext2) {
995 /* doMOVE(p, rin, rin->arg1.rea, rin->arg2.rea); */
997 } else if (rin->arg1.rea->eamode == EA_IMMEDIATE ||
998 rin->arg1.rea->eamode == EA_IMMEDIATE16) {
999 doMOVEExt(p, rin, rin->arg1.rea, rin->arg2.rea, REXTF_EA2);
1000 } else if (rin->ext1 < rin->ext2) {
1001 switch (rin->ext1) {
1003 doInsnMR2NoReadDst(p, rin, "movzb",
1004 REXTF_SEA1 | REXTF_EA2);
1007 doInsnMR2NoReadDst(p, rin, "movzw",
1008 REXTF_SEA1 | REXTF_EA2);
1011 if (rin->arg1.rea->eamode == EA_MEMORY) {
1012 xrea.target_reg = X86_REG_RCX;
1013 xrea.eamode = EA_DIRECT;
1014 doMOVEExt(p, rin, rin->arg1.rea, &xrea,
1016 printINSN(rin, "mov", &xrea, rin->arg2.rea,
1019 doMOVEExt(p, rin, rin->arg1.rea, rin->arg1.rea,
1021 printINSN(rin, "mov",
1022 rin->arg1.rea, rin->arg2.rea,
1027 dpanic("Unknown/unsupported REXT %d", rin->ext1);
1031 if (rin->arg1.rea->eamode == EA_MEMORY) {
1032 xrea.target_reg = X86_REG_RCX;
1033 xrea.eamode = EA_DIRECT;
1034 doMOVE(p, rin, rin->arg1.rea, &xrea);
1035 printINSN(rin, "mov", &xrea, rin->arg2.rea, REXTF_EA2);
1037 printINSN(rin, "mov", rin->arg1.rea, rin->arg2.rea,
1044 * Cast signed integer to size
1047 InsnCASTS(RASParser *p, rinsn_t *rin)
1051 if (rin->ext1 == rin->ext2) {
1052 /* doMOVE(p, rin, rin->arg1.rea, rin->arg2.rea); */
1054 } else if (rin->arg1.rea->eamode == EA_IMMEDIATE ||
1055 rin->arg1.rea->eamode == EA_IMMEDIATE16) {
1056 doMOVEExt(p, rin, rin->arg1.rea, rin->arg2.rea, REXTF_EA2);
1057 } else if (rin->ext1 < rin->ext2) {
1058 switch (rin->ext1) {
1060 doInsnMR2NoReadDst(p, rin, "movsb",
1061 REXTF_SEA1 | REXTF_EA2);
1064 doInsnMR2NoReadDst(p, rin, "movsw",
1065 REXTF_SEA1 | REXTF_EA2);
1068 doInsnMR2NoReadDst(p, rin, "movsl",
1069 REXTF_SEA1 | REXTF_EA2);
1072 dpanic("Unknown/unsupported REXT %d", rin->ext1);
1076 if (rin->arg1.rea->eamode == EA_MEMORY) {
1077 xrea.target_reg = X86_REG_RCX;
1078 xrea.eamode = EA_DIRECT;
1079 doMOVE(p, rin, rin->arg1.rea, &xrea);
1080 printINSN(rin, "mov", &xrea, rin->arg2.rea, REXTF_EA2);
1082 printINSN(rin, "mov", rin->arg1.rea, rin->arg2.rea,
1089 * CASTP - Cast a pointer to an integer.
1092 InsnCASTP(RASParser *p, rinsn_t *rin)
1099 insnZeroArgNoReturn(RASParser *p __unused, rinsn_t *rin, const char *fname)
1102 printf("\tcall\tRuneRunTime_%s\n", fname);
1103 doSpecialRestore(rin);
1108 * Used to be used to obtain implicit context, e.g. STKIGET. We may have
1109 * a use for this later so keep it around.
1113 insnZeroArgRet(RASParser *p, rinsn_t *rin, const char *fname)
1117 xrax.target_reg = X86_REG_RAX;
1118 xrax.eamode = EA_DIRECT;
1121 printf("\tcall\tRuneRunTime_%s\n", fname);
1122 doSpecialRestore(rin);
1123 if ((rin->arg2.flags & RAF_LIFE_END) == 0) {
1124 xrax.target_reg = X86_REG_RAX;
1125 xrax.eamode = EA_DIRECT;
1126 doMOVEExt(p, rin, &xrax, rin->arg1.rea, ExtSize);
1133 insnOneArgNoReturn(RASParser *p, rinsn_t *rin, const char *fname)
1137 xarg1.target_reg = X86_RUNTIME_ARG1;
1138 xarg1.eamode = EA_DIRECT;
1140 doLEA(p, rin->arg1.rea, &xarg1);
1142 printf("\tcall\tRuneRunTime_%s\n", fname);
1143 doSpecialRestore(rin);
1148 insnOneArg32NoReturn(RASParser *p, rinsn_t *rin, const char *fname)
1152 xarg1.target_reg = X86_RUNTIME_ARG1;
1153 xarg1.eamode = EA_DIRECT;
1155 doMOVE(p, rin, rin->arg1.rea, &xarg1);
1157 printf("\tcall\tRuneRunTime_%s\n", fname);
1158 doSpecialRestore(rin);
1163 * Used to be used for e.g. PIGET but we may have a use for it again later,
1164 * so keep it around.
1168 insnOneArgRet(RASParser *p, rinsn_t *rin, const char *fname)
1173 xarg1.target_reg = X86_RUNTIME_ARG1;
1174 xarg1.eamode = EA_DIRECT;
1175 doLEA(p, rin->arg1.rea, &xarg1);
1178 printf("\tcall\tRuneRunTime_%s\n", fname);
1179 doSpecialRestore(rin);
1181 if ((rin->arg2.flags & RAF_LIFE_END) == 0) {
1182 xrax.target_reg = X86_REG_RAX;
1183 xrax.eamode = EA_DIRECT;
1184 doMOVEExt(p, rin, &xrax, rin->arg2.rea, ExtSize);
1191 * Used to be used for PCOPY but we may have a use for it again later,
1192 * so keep it around.
1196 insnTwoArgNoReturn(RASParser *p, rinsn_t *rin, const char *fname)
1201 xarg1.target_reg = X86_RUNTIME_ARG1;
1202 xarg1.eamode = EA_DIRECT;
1203 xarg2.target_reg = X86_RUNTIME_ARG2;
1204 xarg2.eamode = EA_DIRECT;
1206 doLEA(p, rin->arg1.rea, &xarg1);
1207 doLEA(p, rin->arg2.rea, &xarg2);
1209 printf("\tcall\tRuneRunTime_%s\n", fname);
1210 doSpecialRestore(rin);
1217 insnTwoArgRet(RASParser *p, rinsn_t *rin, const char *fname)
1223 xarg1.target_reg = X86_RUNTIME_ARG1;
1224 xarg1.eamode = EA_DIRECT;
1225 doLEA(p, rin->arg1.rea, &xarg1);
1227 xarg2.target_reg = X86_RUNTIME_ARG2;
1228 xarg2.eamode = EA_DIRECT;
1229 doLEA(p, rin->arg2.rea, &xarg2);
1232 printf("\tcall\tRuneRunTime_%s\n", fname);
1233 doSpecialRestore(rin);
1235 xrax.target_reg = X86_REG_RAX;
1236 xrax.eamode = EA_DIRECT;
1237 doMOVEExt(p, rin, &xrax, rin->arg3.rea, ExtSize);
1243 InsnPCOPY(RASParser *p, rinsn_t *rin)
1245 insnTwoArgNoReturn(p, rin, "PCopy");
1250 * Reference context routines
1253 InsnPGET(RASParser *p, rinsn_t *rin)
1255 insnOneArgNoReturn(p, rin, "PGet");
1259 InsnPGETH(RASParser *p, rinsn_t *rin)
1261 insnOneArgNoReturn(p, rin, "PGetH");
1265 InsnPPUT(RASParser *p, rinsn_t *rin)
1267 insnOneArgNoReturn(p, rin, "PPut");
1271 InsnPPUTH(RASParser *p, rinsn_t *rin)
1273 insnOneArgNoReturn(p, rin, "PPutH");
1277 InsnPREF(RASParser *p, rinsn_t *rin)
1279 insnOneArgNoReturn(p, rin, "PRef");
1283 InsnPREL(RASParser *p, rinsn_t *rin)
1285 insnOneArgNoReturn(p, rin, "PRel");
1289 InsnPLOCK(RASParser *p, rinsn_t *rin)
1291 insnOneArgNoReturn(p, rin, "PLock");
1295 InsnPLOCKH(RASParser *p, rinsn_t *rin)
1297 insnOneArgNoReturn(p, rin, "PLockH");
1301 InsnPUNLOCK(RASParser *p, rinsn_t *rin)
1303 insnOneArgNoReturn(p, rin, "PUnlock");
1307 InsnPUNLOCKH(RASParser *p, rinsn_t *rin)
1309 insnOneArgNoReturn(p, rin, "PUnlockH");
1313 * Object context routines
1316 InsnIGET(RASParser *p, rinsn_t *rin)
1318 insnOneArgNoReturn(p, rin, "IGet");
1322 InsnIGETH(RASParser *p, rinsn_t *rin)
1324 insnOneArgNoReturn(p, rin, "IGetH");
1328 InsnIPUT(RASParser *p, rinsn_t *rin)
1330 insnOneArgNoReturn(p, rin, "IPut");
1334 InsnIPUTH(RASParser *p, rinsn_t *rin)
1336 insnOneArgNoReturn(p, rin, "IPutH");
1340 InsnIREF(RASParser *p, rinsn_t *rin)
1342 insnOneArgNoReturn(p, rin, "IRef");
1346 InsnIREL(RASParser *p, rinsn_t *rin)
1348 insnOneArgNoReturn(p, rin, "IRel");
1352 InsnILOCK(RASParser *p, rinsn_t *rin)
1354 insnOneArgNoReturn(p, rin, "ILock");
1358 InsnILOCKH(RASParser *p, rinsn_t *rin)
1360 insnOneArgNoReturn(p, rin, "ILockH");
1364 InsnIUNLOCK(RASParser *p, rinsn_t *rin)
1366 insnOneArgNoReturn(p, rin, "IUnlock");
1370 InsnIUNLOCKH(RASParser *p, rinsn_t *rin)
1372 insnOneArgNoReturn(p, rin, "IUnlockH");
1376 InsnLVALLOC(RASParser *p, rinsn_t *rin)
1382 xarg1.target_reg = X86_RUNTIME_ARG1;
1383 xarg1.eamode = EA_DIRECT;
1384 doLEA(p, rin->arg1.rea, &xarg1);
1386 xarg2.target_reg = X86_RUNTIME_ARG2;
1387 xarg2.eamode = EA_DIRECT;
1388 doLEA(p, rin->arg2.rea, &xarg2);
1391 printf("\tcall\tRuneRunTime_LVAlloc\n");
1392 doSpecialRestore(rin);
1394 if ((rin->arg3.flags & RAF_LIFE_END) == 0) {
1395 xrax.target_reg = X86_REG_RAX;
1396 xrax.eamode = EA_DIRECT;
1397 doMOVEExt(p, rin, &xrax, rin->arg3.rea, REXT_I8);
1402 InsnBCHECK(RASParser *p, rinsn_t *rin)
1409 * Issue comparison, branch past bounds trap if valid.
1411 * WARNING! BCHECK instruction regs assume that BoundsTrap never
1412 * returns, does not save/restore registers.
1414 rev = InsnSimplifyRMMRCMP(p, rin, &sea1, &sea2);
1415 printf("\tcmp%s\t", x86ext(rin->ext1, 0));
1416 printEA(sea1, rin->ext1);
1418 printEA(sea2, rin->ext1);
1421 printf("\tjb\t1f\n");
1423 printf("\tjae\t1f\n");
1424 printf("\tcall\tRuneRunTime_BoundsTrap\n");
1429 InsnBNDTRAP(RASParser *p __unused, rinsn_t *rin __unused)
1431 printf("\tcall\tRuneRunTime_BoundsTrap\n");
1435 InsnCMPA(RASParser *p, rinsn_t *rin)
1444 InsnCALL(RASParser *p, rinsn_t *rin)
1452 /* XXX eamode not really deterministic */
1453 if (rin->arg1.rea->eamode == EA_DIRECT || rin->arg1.rea->regno) {
1454 xrax.target_reg = X86_REG_RAX;
1455 xrax.eamode = EA_DIRECT;
1456 doMOVEExt(p, rin, rin->arg1.rea, &xrax, ExtSize);
1458 } else if (rin->op == INSN_TCALL) {
1459 xarg3.target_reg = X86_RUNE_ARG3;
1460 xarg3.eamode = EA_DIRECT;
1461 doLEA(p, rin->arg1.rea, &xarg3);
1464 xarg1.target_reg = X86_RUNE_ARG1;
1465 xarg1.eamode = EA_DIRECT;
1466 xarg2.target_reg = X86_RUNE_ARG2;
1467 xarg2.eamode = EA_DIRECT;
1468 if (rin->arg4.rea) {
1469 dassert(rin->op != INSN_TCALL); /* XXX fixme */
1470 xarg3.target_reg = X86_RUNE_ARG3;
1471 xarg3.eamode = EA_DIRECT;
1472 if (rin->arg4.rea->eamode == EA_DIRECT ||
1473 rin->arg4.rea->regno) {
1474 doMOVEExt(p, rin, rin->arg4.rea, &xarg3, ExtSize);
1476 doLEA(p, rin->arg4.rea, &xarg3);
1480 doLEA(p, rin->arg2.rea, &xarg1);
1482 doLEA(p, rin->arg3.rea, &xarg2);
1484 if (rin->op == INSN_TCALL) {
1485 printf("\tcall\tRuneRunTime_ThreadedCall\n");
1487 if (ExtSize == REXT_I32)
1488 printf("\tcall\t*%%eax\n");
1490 printf("\tcall\t*%%rax\n");
1493 printEA(rin->arg1.rea, ExtSize);
1496 doSpecialRestore(rin);
1500 * Rune-Rune threaded call
1503 InsnTCALL(RASParser *p, rinsn_t *rin)
1509 InsnLCALL(RASParser *p __unused, rinsn_t *rin __unused)
1515 * System interfacing call (e.g. native libc, system call, etc)
1518 InsnQCALL(RASParser *p, rinsn_t *rin)
1525 /* XXX eamode not really deterministic */
1526 if (rin->arg1.rea->eamode == EA_DIRECT || rin->arg1.rea->regno) {
1527 xrax.target_reg = X86_REG_RAX;
1528 xrax.eamode = EA_DIRECT;
1529 doMOVEExt(p, rin, rin->arg1.rea, &xrax, ExtSize);
1533 xarg1.target_reg = X86_SYS_ARG1;
1534 xarg1.eamode = EA_DIRECT;
1535 xarg2.target_reg = X86_SYS_ARG2;
1536 xarg2.eamode = EA_DIRECT;
1537 dassert(rin->arg4.rea == NULL);
1539 doLEA(p, rin->arg2.rea, &xarg1);
1541 doLEA(p, rin->arg3.rea, &xarg2);
1544 if (ExtSize == REXT_I32)
1545 printf("\tcall\t*%%eax\n");
1547 printf("\tcall\t*%%rax\n");
1550 printEA(rin->arg1.rea, ExtSize);
1553 doSpecialRestore(rin);
1557 * RuneRunTime_*() call ABI
1560 InsnRCALL(RASParser *p, rinsn_t *rin)
1567 /* XXX eamode not really deterministic */
1568 if (rin->arg1.rea->eamode == EA_DIRECT || rin->arg1.rea->regno) {
1569 xrax.target_reg = X86_REG_RAX;
1570 xrax.eamode = EA_DIRECT;
1571 doMOVEExt(p, rin, rin->arg1.rea, &xrax, ExtSize);
1575 xarg1.target_reg = X86_RUNTIME_ARG1;
1576 xarg1.eamode = EA_DIRECT;
1577 xarg2.target_reg = X86_RUNTIME_ARG2;
1578 xarg2.eamode = EA_DIRECT;
1579 dassert(rin->arg4.rea == NULL);
1581 doLEA(p, rin->arg2.rea, &xarg1);
1583 doLEA(p, rin->arg3.rea, &xarg2);
1586 if (ExtSize == REXT_I32)
1587 printf("\tcall\t*%%eax\n");
1589 printf("\tcall\t*%%rax\n");
1592 printEA(rin->arg1.rea, ExtSize);
1595 doSpecialRestore(rin);
1599 InsnRET(RASParser *p __unused, rinsn_t *rin)
1604 * If we are at the end of the procedure we can just fall through,
1605 * otherwise jump to the return label.
1607 scan = RUNE_NEXT(rin, node);
1608 while (scan && scan->op == INSN_LABEL)
1609 scan = RUNE_NEXT(scan, node);
1611 printf("\tjmp\t.LRET%d\n", ProcNo);
1615 InsnDET(RASParser *p, rinsn_t *rin)
1617 insnZeroArgNoReturn(p, rin, "ThreadedDetach");
1621 InsnLINK(RASParser *p __unused, rinsn_t *rin __unused)
1627 InsnTSCHED(RASParser *p, rinsn_t *rin)
1629 insnOneArg32NoReturn(p, rin, "TSched");
1633 InsnJMP(RASParser *p __unused, rinsn_t *rin)
1635 if (!adjacentLabel(rin, rin->brtrue)) {
1637 printBRLabel(rin->brtrue);
1643 InsnTEST(RASParser *p __unused, rinsn_t *rin)
1646 const char *brfalse;
1649 * For TEST_EQ this is 'e' For TEST_NE this is 'ne'
1651 brtrue = x86branch(rin->op, 0, 0);
1652 brfalse = x86branch(rin->op, 1, 0);
1654 printf("\tcmp%s\t$0, ", x86ext(rin->ext1, 0));
1655 printEA(rin->arg1.rea, rin->ext1);
1657 if (rin->flags & RINSF_BRANCH) {
1658 if (adjacentLabel(rin, rin->brtrue)) {
1660 * next insn matches true path, invert and branch on-false.
1662 printf("\tj%s\t", brfalse);
1663 printBRLabel(rin->brfalse);
1664 } else if (adjacentLabel(rin, rin->brfalse)) {
1666 * next insn matches false path, do not invert and branch
1669 printf("\tj%s\t", brtrue);
1670 printBRLabel(rin->brtrue);
1673 * next insn does not match either path.
1675 printf("\tj%s\t", brtrue);
1676 printBRLabel(rin->brtrue);
1679 printBRLabel(rin->brfalse);
1682 printf("\tset%s\t", brtrue);
1683 printEA(rin->arg2.rea, REXT_I8);
1690 InsnFMOVE(RASParser *p, rinsn_t *rin)
1692 doInsnRMMR3(p, rin, "mov");
1696 InsnFADD(RASParser *p, rinsn_t *rin)
1698 doInsnFloating3(p, rin, "add", 0);
1702 InsnFSUB(RASParser *p, rinsn_t *rin)
1704 doInsnFloating3(p, rin, "sub", 0);
1708 InsnFMUL(RASParser *p, rinsn_t *rin)
1710 doInsnFloating3(p, rin, "mul", 0);
1714 InsnFDIV(RASParser *p, rinsn_t *rin)
1716 doInsnFloating3(p, rin, "div", 0);
1721 getFPImm(RASParser *p __unused, rinsn_t *rin, int v)
1737 switch (rin->ext1) {
1740 imm.eamode = EA_IMMEDIATE;
1746 imm.eamode = EA_IMMEDIATE;
1751 imm.eamode = EA_IMMEDIATE16;
1752 ww.x = (long double)v;
1753 #if _BYTE_ORDER == _LITTLE_ENDIAN
1754 imm.immlo = ww.v[0];
1755 imm.immhi = ww.v[1];
1757 imm.immlo = ww.v[1];
1758 imm.immhi = ww.v[0];
1762 dpanic("Unknown/unsupported REXT %d", rin->ext1);
1769 InsnFINC(RASParser *p, rinsn_t *rin)
1771 static rinsn_t srin;
1773 srin.operands = rin->operands + 1;
1774 srin.arg1.flags = srin.arg2.flags = srin.arg3.flags = RAF_FLOAT;
1775 srin.arg1.rea = getFPImm(p, rin, 1);
1776 srin.arg2.rea = rin->arg1.rea;
1777 srin.arg3.rea = rin->arg2.rea;
1778 srin.ext1 = rin->ext1;
1780 doInsnFloating3(p, &srin, "add", 0);
1784 InsnFDEC(RASParser *p, rinsn_t *rin)
1786 static rinsn_t srin;
1788 srin.operands = rin->operands + 1;
1789 srin.arg1.flags = srin.arg2.flags = srin.arg3.flags = RAF_FLOAT;
1790 srin.arg1.rea = getFPImm(p, rin, 1);
1791 srin.arg2.rea = rin->arg1.rea;
1792 srin.arg3.rea = rin->arg2.rea;
1793 srin.ext1 = rin->ext1;
1795 doInsnFloating3(p, &srin, "sub", 0);
1799 InsnFNEG(RASParser *p, rinsn_t *rin)
1801 static rinsn_t srin;
1804 srin.arg1.flags = srin.arg2.flags = srin.arg3.flags = RAF_FLOAT;
1805 if (rin->operands == 1) {
1806 srin.arg1.rea = rin->arg1.rea;
1807 srin.arg2.rea = getFPImm(p, rin, 0);
1808 srin.arg3.rea = rin->arg1.rea;
1810 srin.arg1.rea = rin->arg1.rea;
1811 srin.arg2.rea = getFPImm(p, rin, 0);
1812 srin.arg3.rea = rin->arg2.rea;
1814 srin.ext1 = rin->ext1;
1816 doInsnFloating3(p, &srin, "sub", 0);
1820 InsnFPOS(RASParser *p, rinsn_t *rin)
1826 InsnFNOT(RASParser *p, rinsn_t *rin)
1829 dassert(rin->operands == 2);
1830 dassert(rin->ext2 == REXT_I8);
1832 if (rin->ext1 != REXT_I128) {
1833 printf("\tpxor\t%%xmm0, %%xmm0\n");
1834 printf("\tucomis%c\t",
1835 ((rin->ext1 == REXT_I32) ? 's' : 'd'));
1836 printEA(rin->arg1.rea, rin->ext1);
1837 printf(", %%xmm0\n");
1840 printEA(rin->arg2.rea, rin->ext2);
1846 sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xrea,
1849 printEA(sea, REXT_I128);
1853 printf("\tfucomip\t%%st(1), %%st\n");
1854 printf("\tfstp\t%%st(0)\n");
1857 printEA(rin->arg2.rea, rin->ext2);
1864 InsnUITOF(RASParser *p, rinsn_t *rin)
1871 InsnSITOF(RASParser *p, rinsn_t *rin)
1876 int isunsigned = (rin->op == INSN_UITOF);
1878 if (rin->ext2 != REXT_I128) {
1882 xsea.target_reg = X86_REG_RCX;
1883 xsea.eamode = EA_DIRECT;
1885 xdea.target_reg = X86_REG_XMM0;
1886 xdea.eamode = EA_DIRECT;
1888 doMOVE(p, rin, rin->arg1.rea, &xsea);
1889 if (rin->ext1 == REXT_I8) {
1891 printf("\tmovzbl\t%%cl, %%ecx\n");
1893 printf("\tmovsbl\t%%cl, %%ecx\n");
1895 if (rin->ext1 == REXT_I16) {
1897 printf("\tmovzwl\t%%cx, %%ecx\n");
1899 printf("\tmovswl\t%%cx, %%ecx\n");
1902 if (rin->ext1 == REXT_I64) {
1903 printf("\tcvtsi2%sq\t%%rcx, %%xmm0\n",
1904 x86ext(rin->ext2, RAF_FLOAT));
1905 doMOVEExt(p, rin, &xdea, rin->arg2.rea, REXTF_EA2);
1907 printf("\tcvtsi2%s\t%%ecx, %%xmm0\n",
1908 x86ext(rin->ext2, RAF_FLOAT));
1909 doMOVEExt(p, rin, &xdea, rin->arg2.rea, REXTF_EA2);
1917 * SimplifyFP128 ensures that the requested argument is in memory and
1918 * not a register or immediate. We have to do more work for signed
1921 if (rin->ext1 == REXT_I8 ||
1922 (isunsigned && rin->ext1 != REXT_I64)) {
1923 xsea.target_reg = X86_REG_RSP;
1924 xsea.eamode = EA_MEMORY;
1928 sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xsea,
1932 dea = InsnSimplifyFP128(p, rin, rin->arg2.rea, &xdea,
1935 switch (rin->ext1) {
1937 xrcx.target_reg = X86_REG_RCX;
1938 xrcx.eamode = EA_DIRECT;
1940 doMOVEExt(p, rin, rin->arg1.rea, &xrcx,
1941 rin->ext1 | REXTF_SEA1);
1943 printf("\tmovzbw\t%%cl, %%cx\n");
1945 printf("\tmovsbw\t%%cl, %%cx\n");
1946 doMOVEExt(p, rin, &xrcx, sea, REXT_I16);
1948 printf("\tfilds\t");
1952 xrcx.target_reg = X86_REG_RCX;
1953 xrcx.eamode = EA_DIRECT;
1955 doMOVEExt(p, rin, rin->arg1.rea, &xrcx,
1956 rin->ext1 | REXTF_SEA1);
1957 printf("\tmovzwl\t%%cx, %%ecx\n");
1958 doMOVEExt(p, rin, &xrcx, sea, REXT_I32);
1960 printf("\tfildl\t");
1963 printf("\tfilds\t");
1968 xrcx.target_reg = X86_REG_RCX;
1969 xrcx.eamode = EA_DIRECT;
1971 /* moves to %e* zero-ext to %r* */
1972 doMOVEExt(p, rin, rin->arg1.rea, &xrcx,
1973 rin->ext1 | REXTF_SEA1);
1974 doMOVEExt(p, rin, &xrcx, sea, REXT_I64);
1976 printf("\tfildq\t");
1979 printf("\tfildl\t");
1984 printf("\tfildq\t");
1988 dpanic("Unknown/unsupported REXT %d", rin->ext1);
1995 * A bit of magic for unsigned 64-bits.
1997 if (rin->ext1 == REXT_I64 && isunsigned) {
1998 static rea_t gimmin;
1999 static rea_t gimmout;
2000 static int ulabel = 100;
2002 gimmin.eamode = EA_IMMEDIATE;
2003 gimmin.immlo = 1602224128;
2005 CreateGlobalImmediate(&gimmin, &gimmout, rin->ext1);
2006 printf("\ttestq\t%%rcx, %%rcx\n");
2007 printf("\tjns\t.LITOF%d\n", ulabel);
2009 printf("\tfadds\t");
2010 printEA(&gimmout, REXT_I64);
2013 printf(".LITOF%d:\n", ulabel);
2017 printf("\tfstpt\t");
2018 printEA(dea, REXT_I128);
2021 if (dea != rin->arg2.rea)
2022 doMOVEExt(p, rin, dea, rin->arg2.rea,
2023 rin->ext2 | REXTF_FLOAT | REXTF_EA2);
2028 InsnFTOUI(RASParser *p, rinsn_t *rin)
2030 printf("\t#XXX supposed to be unsigned\n");
2035 static int LastWasFTOI;
2038 InsnFTOIProbe(RASParser *p __unused, rinsn_t *rin)
2043 (rin->op != INSN_FTOSI && rin->op != INSN_FTOUI)) {
2045 x2ea.target_reg = X86_REG_RSP;
2047 x2ea.eamode = EA_MEMORY;
2048 printf("\tfldcw\t");
2049 printEA(&x2ea, rin->ext2);
2058 InsnFTOSI(RASParser *p, rinsn_t *rin)
2063 if (rin->ext1 != REXT_I128) {
2067 xrea.target_reg = X86_REG_XMM0;
2068 xrea.eamode = EA_DIRECT;
2069 x2ea.target_reg = X86_REG_RCX;
2070 x2ea.eamode = EA_DIRECT;
2072 doMOVE(p, rin, rin->arg1.rea, &xrea);
2074 if (rin->ext2 == REXT_I64) {
2075 printf("\tcvt%s2siq\t%%xmm0, %%rcx\n",
2076 x86ext(rin->ext1, RAF_FLOAT));
2077 doMOVEExt(p, rin, &x2ea, rin->arg2.rea, REXTF_EA2);
2079 printf("\tcvt%s2si\t%%xmm0, %%ecx\n",
2080 x86ext(rin->ext1, RAF_FLOAT));
2081 doMOVEExt(p, rin, &x2ea, rin->arg2.rea, REXTF_EA2);
2089 sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xrea,
2092 printEA(sea, REXT_I128);
2095 xrea.target_reg = X86_REG_RCX;
2096 xrea.eamode = EA_DIRECT;
2098 if (LastWasFTOI == 0) {
2099 ProbeFunc = InsnFTOIProbe;
2101 x2ea.target_reg = X86_REG_RSP;
2103 x2ea.eamode = EA_MEMORY;
2104 printf("\tfnstcw\t");
2105 printEA(&x2ea, rin->ext2);
2108 printf("\tmovzwl\t");
2109 printEA(&x2ea, rin->ext2);
2112 x2ea.target_reg = X86_REG_RSP;
2114 x2ea.eamode = EA_MEMORY;
2115 printf("\torb\t$12, %%ch\n");
2116 printf("\tmovw\t%%cx, ");
2117 printEA(&x2ea, rin->ext2);
2120 printf("\tfldcw\t");
2121 printEA(&x2ea, rin->ext2);
2126 printf("\tfistp%c\t",
2127 ((rin->ext2 == REXT_I8) ? 's' :
2128 ((rin->ext2 == REXT_I16) ? 's' :
2129 ((rin->ext2 == REXT_I32) ? 'l' :
2130 ((rin->ext2 == REXT_I64) ? 'q' : '?')))));
2132 if (rin->arg2.rea->eamode != EA_MEMORY || rin->ext2 == REXT_I8) {
2133 x2ea.target_reg = X86_REG_RSP;
2135 x2ea.eamode = EA_MEMORY;
2136 printEA(&x2ea, rin->ext2);
2138 if (rin->arg2.rea->eamode == EA_MEMORY) {
2139 doMOVEExt(p, rin, &x2ea, &xrea, REXTF_EA2);
2140 doMOVEExt(p, rin, &xrea, rin->arg2.rea,
2143 doMOVEExt(p, rin, &x2ea, rin->arg2.rea,
2147 printEA(rin->arg2.rea, rin->ext2);
2155 InsnCASTF(RASParser *p, rinsn_t *rin)
2160 if (rin->ext1 != REXT_I128 && rin->ext2 != REXT_I128) {
2162 * f32/f64 <-> f32/f64 (SSE2)
2164 xrea.target_reg = X86_REG_XMM0;
2165 xrea.eamode = EA_DIRECT;
2166 doMOVE(p, rin, rin->arg1.rea, &xrea);
2167 printf("\tcvt%s2%s\t%%xmm0, %%xmm0\n",
2168 x86ext(rin->ext1, RAF_FLOAT),
2169 x86ext(rin->ext2, RAF_FLOAT));
2170 doMOVEExt(p, rin, &xrea, rin->arg2.rea, REXTF_EA2);
2171 } else if (rin->ext1 == REXT_I128) {
2177 sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xrea,
2180 printEA(sea, REXT_I128);
2183 printf("\tfstp%c\t",
2184 (rin->ext2 == REXT_I32) ? 's' : 'l');
2185 if (rin->arg2.rea->eamode != EA_MEMORY) {
2186 x2ea.target_reg = X86_REG_RSP;
2188 x2ea.eamode = EA_MEMORY;
2189 printEA(&x2ea, rin->ext2);
2191 doMOVEExt(p, rin, &x2ea, rin->arg2.rea, REXTF_EA2);
2193 printEA(rin->arg2.rea, rin->ext2);
2202 sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xrea,
2205 (rin->ext1 == REXT_I32) ? 's' : 'l');
2206 printEA(sea, REXT_I128);
2209 printf("\tfstpt\t");
2210 if (rin->arg2.rea->eamode != EA_MEMORY) {
2211 x2ea.target_reg = X86_REG_RSP;
2213 x2ea.eamode = EA_MEMORY;
2214 printEA(&x2ea, rin->ext2);
2216 doMOVEExt(p, rin, &x2ea, rin->arg2.rea, REXTF_EA2);
2218 printEA(rin->arg2.rea, rin->ext2);
2226 InsnFCMP(RASParser *p, rinsn_t *rin)
2233 * Bleh, have to use FP stack, and 128-bit immediate values don't work.
2235 if (rin->ext1 == REXT_I128) {
2236 rev = doInsnFloating3(p, rin, "fucomip", 1);
2238 rev = InsnSimplifyMRFCMP(p, rin, &sea1, &sea2);
2241 * 128-bit is unfortunately different from 32-bit and 64-bit.
2243 printf("\tucomi%s\t", x86ext(rin->ext1, RAF_FLOAT));
2244 printEA(sea1, rin->ext1);
2246 printEA(sea2, rin->ext1);
2250 if (rin->flags & RINSF_BRANCH) {
2252 * Compare and branch. Invert sense if we can optimize the jmp.
2254 if (adjacentLabel(rin, rin->brtrue)) {
2256 * next insn matches true path, invert and branch on-false.
2258 printf("\tj%s\t", x86branch(rin->op, 1, rev));
2259 printBRLabel(rin->brfalse);
2261 } else if (adjacentLabel(rin, rin->brfalse)) {
2263 * next insn matches false path, do not invert and branch
2266 printf("\tj%s\t", x86branch(rin->op, 0, rev));
2267 printBRLabel(rin->brtrue);
2271 * next insn does not match either path.
2273 printf("\tj%s\t", x86branch(rin->op, 0, rev));
2274 printBRLabel(rin->brtrue);
2277 printBRLabel(rin->brfalse);
2282 * Compare and set result
2284 printf("\tset%s\t", x86branch(rin->op, 0, rev));
2285 printEA(rin->arg3.rea, REXT_I8);
2291 * Complex helper functions
2295 * srea is an immediate value, determine if it can be directly specified in
2296 * assembly or if it must be loaded into a register first.
2300 doHandleImmediate(rea_t *srea, rea_t *drea, uint8_t extov)
2302 if ((extov & REXTF_MASK) == REXT_I64 &&
2303 (srea->immlo < (int64_t) (int)0x80000000U ||
2304 srea->immlo > (int64_t) 0x7FFFFFFFU)) {
2305 drea->target_reg = X86_REG_RCX;
2306 drea->eamode = EA_DIRECT;
2308 printf("\tmovabsq\t");
2309 printEA(srea, extov);
2311 printEA(drea, extov);
2319 * Binary (1-arg or 2-arg) (i.e. FMOVE, MOVE, LEA)
2321 * RM2 - reg,mem MR2 - mem,reg
2323 * NOTE: For CAST* insns which pass SEA1|EA2, we need to use only EA2 for the
2324 * second instruction when we have to break the operation up into two since
2325 * the first instructions handles converting ext1 into ext2 for the
2326 * intermediate result.
2330 doInsnRM2NoReadDst(RASParser *p, rinsn_t *rin, const char *x86op, uint8_t extov)
2334 if (rin->operands == 2) {
2335 if (sameEA(rin->arg1.rea, rin->arg2.rea)) {
2337 } else if (rin->arg1.rea->eamode == EA_MEMORY ||
2338 ISIMM64(rin->arg1.rea)) {
2339 if (rin->arg1.flags & RAF_FLOAT) {
2340 xrea.target_reg = X86_REG_XMM0;
2341 xrea.eamode = EA_DIRECT;
2343 xrea.target_reg = X86_REG_RCX;
2344 xrea.eamode = EA_DIRECT;
2346 doMOVE(p, rin, rin->arg1.rea, &xrea);
2347 printINSN(rin, x86op, &xrea, rin->arg2.rea,
2348 extov & ~REXTF_SEA1);
2350 printINSN(rin, x86op, rin->arg1.rea, rin->arg2.rea, extov);
2353 RasError(p, "Instruction must be 2-op only");
2359 doInsnMR2NoReadDst(RASParser *p, rinsn_t *rin, const char *x86op, uint8_t extov)
2363 if (rin->operands == 2) {
2364 if (sameEA(rin->arg1.rea, rin->arg2.rea)) {
2366 } else if (rin->arg2.rea->eamode == EA_MEMORY) {
2367 if (rin->arg2.flags & RAF_FLOAT) {
2368 xrea.target_reg = X86_REG_XMM0;
2369 xrea.eamode = EA_DIRECT;
2371 xrea.target_reg = X86_REG_RCX;
2372 xrea.eamode = EA_DIRECT;
2374 printINSN(rin, x86op, rin->arg1.rea, &xrea, extov);
2375 doMOVEExt(p, rin, &xrea, rin->arg2.rea,
2376 extov & ~REXTF_SEA1);
2378 printINSN(rin, x86op, rin->arg1.rea, rin->arg2.rea, extov);
2381 RasError(p, "Instruction must be 2-op only");
2386 * Binary (2-arg or 3-arg)
2388 * RM3 - imm/reg,*,mem MR3 - mem,*,reg (so far not needed)
2389 * RMMR3- can be either
2391 * If x86op is "mov" then we can optimize the 2-op instruction which is
2392 * particularly useful when moving zero into a FP register.
2396 doInsnRMMR3(RASParser *p, rinsn_t *rin, const char *x86op)
2402 if (rin->op & INSNF_FLOAT)
2407 if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) {
2408 if (rin->arg1.rea->eamode == EA_MEMORY ||
2409 (noimm && (rin->arg1.rea->eamode == EA_IMMEDIATE ||
2410 rin->arg1.rea->eamode == EA_IMMEDIATE16))) {
2411 if (rin->arg1.flags & RAF_FLOAT) {
2412 xrea.target_reg = X86_REG_XMM0;
2413 xrea.eamode = EA_DIRECT;
2415 xrea.target_reg = X86_REG_RCX;
2416 xrea.eamode = EA_DIRECT;
2418 if (rin->arg1.rea->eamode != EA_MEMORY &&
2419 strcmp(x86op, "mov") == 0) {
2420 doMOVE(p, rin, rin->arg1.rea, rin->arg2.rea);
2422 doMOVE(p, rin, rin->arg1.rea, &xrea);
2423 printINSN(rin, x86op, &xrea, rin->arg2.rea, 0);
2426 printINSN(rin, x86op, rin->arg1.rea, rin->arg2.rea, 0);
2428 } else if (rin->operands == 3) {
2431 if (rin->arg3.rea->eamode == EA_DIRECT &&
2432 !sameEA(rin->arg1.rea, rin->arg3.rea)) {
2434 * Optimize if target is a register.
2436 dea = rin->arg3.rea;
2437 } else if (rin->arg2.flags & RAF_FLOAT) {
2438 xrea.target_reg = X86_REG_XMM1;
2439 xrea.eamode = EA_DIRECT;
2442 xrea.target_reg = X86_REG_RCX;
2443 xrea.eamode = EA_DIRECT;
2446 if (noimm && (rin->arg1.rea->eamode == EA_IMMEDIATE ||
2447 rin->arg1.rea->eamode == EA_IMMEDIATE16)) {
2448 dassert(rin->arg1.flags & RAF_FLOAT);
2449 x2ea.target_reg = X86_REG_XMM0;
2450 x2ea.eamode = EA_DIRECT;
2451 doMOVE(p, rin, rin->arg1.rea, &x2ea);
2452 doMOVE(p, rin, rin->arg2.rea, dea);
2453 printINSN(rin, x86op, &x2ea, dea, 0);
2454 if (dea != rin->arg3.rea)
2455 doMOVE(p, rin, dea, rin->arg3.rea);
2457 doMOVE(p, rin, rin->arg2.rea, dea);
2458 printINSN(rin, x86op, rin->arg1.rea, dea, 0);
2459 if (dea != rin->arg3.rea)
2460 doMOVE(p, rin, dea, rin->arg3.rea);
2463 RasError(p, "Instruction must be 2-op or 3-op only");
2468 * Instruction requires mem,reg or reg,reg. If this is a floating point
2469 * instruction an immediate source must be placed in a register or be
2470 * accessed from memory.
2472 * If nodrd is non-zero it means the destination in a 2-op instruction is NOT
2473 * read, so we don't have to copy arg2.rea into the destination before
2474 * issuing the instruction.
2478 doInsnMR3(RASParser *p, rinsn_t *rin, const char *x86op, int nodrd)
2485 * Floating point instructions can't have an immediate source. doMOVE()
2486 * will handle optimizing the immediate value for us.
2488 if ((rin->op & INSNF_FLOAT) &&
2489 (rin->arg1.rea->eamode == EA_IMMEDIATE ||
2490 rin->arg1.rea->eamode == EA_IMMEDIATE16)) {
2491 CreateGlobalImmediate(rin->arg1.rea, &x2ea, rin->ext1);
2494 sea = rin->arg1.rea;
2498 * Handle 2-arg and 3-arg mechanics. A memory destination is not
2501 if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) {
2502 if (rin->arg2.rea->eamode == EA_MEMORY) {
2503 if (rin->arg2.flags & RAF_FLOAT) {
2504 xrea.target_reg = X86_REG_XMM0;
2505 xrea.eamode = EA_DIRECT;
2507 xrea.target_reg = X86_REG_RCX;
2508 xrea.eamode = EA_DIRECT;
2511 doMOVE(p, rin, rin->arg2.rea, &xrea);
2512 printINSN(rin, x86op, sea, &xrea, 0);
2513 doMOVE(p, rin, &xrea, rin->arg2.rea);
2515 printINSN(rin, x86op, sea, rin->arg2.rea, 0);
2517 } else if (rin->operands == 3) {
2520 if (rin->arg3.rea->eamode == EA_DIRECT &&
2521 !sameEA(rin->arg1.rea, rin->arg3.rea)) {
2523 * Optimize if target is a register.
2525 dea = rin->arg3.rea;
2526 } else if (rin->arg2.flags & RAF_FLOAT) {
2527 xrea.target_reg = X86_REG_XMM0;
2528 xrea.eamode = EA_DIRECT;
2531 xrea.target_reg = X86_REG_RCX;
2532 xrea.eamode = EA_DIRECT;
2535 doMOVE(p, rin, rin->arg2.rea, dea);
2536 printINSN(rin, x86op, sea, dea, 0);
2537 if (dea != rin->arg3.rea)
2538 doMOVE(p, rin, dea, rin->arg3.rea);
2540 RasError(p, "Instruction must be 2-op or 3-op only");
2545 * Unary (1-arg) with possible 2-arg optimization
2547 * R - single argument to register, else 2-arg optimization to either
2548 * register or memory.
2552 doInsn1R(RASParser * p, rinsn_t * rin, const char *x86op1,
2553 const char *x86op2, const char *x86opt2arg)
2557 if (rin->operands == 1 || sameEA(rin->arg1.rea, rin->arg2.rea)) {
2558 if (rin->arg1.rea->eamode == EA_DIRECT) {
2560 * 1-operand unary insn with register target
2562 printINSN(rin, x86op1, rin->arg1.rea, NULL, 0);
2563 } else if (x86op2) {
2565 * Alternative for 1-operand unary insn with memory argument
2567 printf("\t%s%s\t%s, ",
2568 x86op2, x86ext(rin->ext1, rin->arg1.flags),
2570 printEA(rin->arg1.rea, rin->ext1);
2576 if (rin->arg1.flags & RAF_FLOAT) {
2577 xrea.target_reg = X86_REG_XMM0;
2578 xrea.eamode = EA_DIRECT;
2580 xrea.target_reg = X86_REG_RCX;
2581 xrea.eamode = EA_DIRECT;
2583 doMOVE(p, rin, rin->arg1.rea, &xrea);
2584 printINSN(rin, x86op1, &xrea, NULL, 0);
2585 doMOVE(p, rin, &xrea, rin->arg1.rea);
2587 } else if (rin->operands == 2) {
2588 if (x86op2 && rin->arg2.rea->eamode == EA_DIRECT) {
2590 * Alternative for 2-operand unary insn
2592 doMOVE(p, rin, rin->arg1.rea, rin->arg2.rea);
2593 printf("\t%s%s\t%s, ",
2594 x86op2, x86ext(rin->ext1, rin->arg1.flags),
2596 printEA(rin->arg2.rea, rin->ext1);
2602 if (rin->arg1.flags & RAF_FLOAT) {
2603 xrea.target_reg = X86_REG_XMM0;
2604 xrea.eamode = EA_DIRECT;
2606 xrea.target_reg = X86_REG_RCX;
2607 xrea.eamode = EA_DIRECT;
2609 doMOVE(p, rin, rin->arg1.rea, &xrea);
2610 printINSN(rin, x86op1, &xrea, NULL, 0);
2611 doMOVE(p, rin, &xrea, rin->arg2.rea);
2614 RasError(p, "Instruction must be 1-op or 2-op only");
2619 * The shift instruction has two size extensions. ext1 applies to the shift
2620 * count, ext2 to the target. The shift count must be either an immediate
2621 * value or loaded into %cl (%ecx or %rcx ok).
2623 * NOTE: We may have to adjust ext1/ext2 if we call other helper functions to
2624 * present the correct operand size.
2626 * NOTE: Cannot shift floating point values (for now)
2630 doInsnSHIFT3(RASParser *p, rinsn_t *rin, const char *x86op)
2636 printf("\t# SHIFT3\n");
2637 if (rin->arg1.rea->eamode == EA_IMMEDIATE) {
2639 * Immediate shift count
2641 sea = rin->arg1.rea;
2642 } else if (rin->arg1.rea->eamode != EA_DIRECT ||
2643 rin->arg1.rea->target_reg != X86_REG_RCX) {
2645 * Else shift counter must be %cl/%cx/%ecx/%rcx
2647 xrea.target_reg = X86_REG_RCX;
2648 xrea.eamode = EA_DIRECT;
2649 doMOVE(p, rin, rin->arg1.rea, &xrea);
2652 sea = rin->arg1.rea;
2655 rin->ext1 = rin->ext2;
2657 if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) {
2658 printINSN(rin, x86op, sea, rin->arg2.rea,
2659 REXTF_SEA1 | REXTF_EA2);
2660 } else if (rin->operands == 3) {
2664 if (rin->arg3.rea->eamode == EA_DIRECT) {
2666 * Optimize if target is a register. can't match sea by
2669 dea = rin->arg3.rea;
2670 } else if (rin->arg2.flags & RAF_FLOAT) {
2671 x2ea.target_reg = X86_REG_XMM0;
2672 x2ea.eamode = EA_DIRECT;
2675 x2ea.target_reg = X86_REG_RAX;
2676 x2ea.eamode = EA_DIRECT;
2679 doMOVEExt(p, rin, rin->arg2.rea, dea, REXTF_EA2);
2680 printINSN(rin, x86op, sea, dea, REXTF_SEA1 | REXTF_EA2);
2681 if (dea != rin->arg3.rea)
2682 doMOVEExt(p, rin, dea, rin->arg3.rea, REXTF_EA2);
2684 RasError(p, "Instruction must be 2-op or 3-op only");
2690 * Binary (2-arg or 3-arg), destination is in %*DX:%*AX and result is in
2691 * either %*DX or %*AX. Requires spewing a multitude of adjustment
2696 doInsnDXAX3(RASParser * p, rinsn_t * rin, const char *x86op,
2697 int issigned, int dxresult)
2703 printf("\t# DXAX3 for %s\n", x86op);
2704 if (rin->operands == 2 || sameEA(rin->arg2.rea, rin->arg3.rea)) {
2705 dea = rin->arg2.rea;
2706 } else if (rin->operands == 3) {
2707 dea = rin->arg3.rea;
2709 RasError(p, "Instruction must be 2-op or 3-op only");
2712 x2ea.target_reg = X86_REG_RCX;
2713 x2ea.eamode = EA_DIRECT;
2714 doMOVE(p, rin, rin->arg1.rea, &x2ea);
2716 xrea.target_reg = X86_REG_RAX;
2717 xrea.eamode = EA_DIRECT;
2718 doMOVE(p, rin, rin->arg2.rea, &xrea);
2720 switch (rin->ext1) {
2734 dpanic("Unknown/unsupported REXT %d", rin->ext1);
2738 switch (rin->ext1) {
2740 printf("\txorl\t%%edx, %%edx\n");
2743 printf("\txorl\t%%edx, %%edx\n");
2746 printf("\txorl\t%%edx, %%edx\n");
2749 printf("\txorl\t%%edx, %%edx\n");
2752 dpanic("Unknown/unsupported REXT %d", rin->ext1);
2756 printf("\t%s%s\t", x86op, x86ext(rin->ext1, rin->arg1.flags));
2757 printEA(&x2ea, rin->ext1);
2760 xrea.target_reg = X86_REG_RDX;
2761 doMOVE(p, rin, &xrea, dea);
2765 * 80-bit is messy to say the least
2769 doInsnFloating3(RASParser *p, rinsn_t *rin, const char *x86op, int isfcmp)
2773 if (rin->ext1 != REXT_I128) {
2775 * NOTE: InsnFCMP path never hits this (it wouldn't work anyway)
2777 doInsnMR3(p, rin, x86op, 0);
2792 sea_sp = (rin->arg1.rea->eamode == EA_DIRECT);
2793 mea_sp = (rin->arg2.rea->eamode == EA_DIRECT);
2795 dea_sp = (isfcmp == 0 && rin->operands != 2 &&
2796 !sameEA(rin->arg2.rea, rin->arg3.rea));
2797 ttl_sp = (sea_sp + mea_sp + dea_sp) *16;
2801 /* XXX removed, using red zone instead */
2803 printf("\tsub%c\t$%d, ", X86Size, ttl_sp);
2804 printREGTGT(X86_REG_RSP, 0, ExtSize);
2809 sea = InsnSimplifyFP128(p, rin, rin->arg1.rea, &xsea,
2812 sea = rin->arg1.rea;
2813 if (sea->eamode == EA_IMMEDIATE16) {
2814 CreateGlobalImmediate(sea, &xsea, rin->ext1);
2816 } else if (sea->eamode != EA_MEMORY) {
2817 bzero(&xsea, sizeof(xsea));
2818 xsea.target_reg = X86_REG_RSP;
2819 xsea.eamode = EA_MEMORY;
2821 doMOVE(p, rin, sea, &xsea);
2825 mea = InsnSimplifyFP128(p, rin, rin->arg2.rea, &xmea,
2829 mea = rin->arg2.rea;
2830 if (mea->eamode == EA_IMMEDIATE16) {
2831 CreateGlobalImmediate(mea, &xmea, rin->ext1);
2833 } else if (mea->eamode != EA_MEMORY) {
2834 bzero(&xmea, sizeof(xmea));
2835 xmea.target_reg = X86_REG_RSP;
2836 xmea.eamode = EA_MEMORY;
2837 xmea.offset = -16 - sea_sp * 16;
2838 doMOVE(p, rin, mea, &xmea);
2844 * FCMP doesn't store a floating point result
2847 if (rin->operands == 2 ||
2848 sameEA(rin->arg2.rea, rin->arg3.rea)) {
2851 dea = rin->arg3.rea;
2852 if (dea->eamode != EA_MEMORY) {
2853 bzero(&xdea, sizeof(xdea));
2854 xdea.target_reg = X86_REG_RSP;
2855 xdea.eamode = EA_MEMORY;
2857 (sea_sp + mea_sp) *16;
2865 printEA(sea, REXT_I128);
2869 printEA(mea, REXT_I128);
2875 printf("\tadd%c\t$%d, ", X86Size, ttl_sp);
2876 printREGTGT(X86_REG_RSP, 0, ExtSize);
2880 printf("\t%s\t%%st(1), %%st\n", x86op);
2881 printf("\tfstp\t%%st(0)\n");
2882 /* looks like we should leave rev 0 here */
2884 printf("\tf%sp\t%%st, %%st(1)\n", x86op);
2885 printf("\tfstpt\t");
2886 printEA(dea, REXT_I128);
2891 if (rin->operands == 2 ||
2892 sameEA(rin->arg2.rea, rin->arg3.rea)) {
2893 if (rin->arg2.rea->eamode != EA_MEMORY)
2894 doMOVE(p, rin, dea, rin->arg2.rea);
2896 if (rin->arg3.rea->eamode != EA_MEMORY)
2897 doMOVE(p, rin, dea, rin->arg3.rea);
2901 printf("\tadd%c\t$%d, ", X86Size, ttl_sp);
2902 printREGTGT(X86_REG_RSP, 0, ExtSize);
2912 * This helper can handle the following MOVEs for integer and floating point:
2914 * imm/reg imm/mem reg/reg reg/mem mem/reg
2916 * This function does not handle mem/mem.
2918 * This function may eat XMM0 or RCX, but is guaranteed not to eat any
2919 * registers if either the source or target is a register (an immediate
2920 * source does not count so imm,reg CAN eat another register).
2924 doMOVE(RASParser *p, rinsn_t *rin, rea_t *sea, rea_t *dea)
2928 if ((rin->arg1.flags & RAF_FLOAT) &&
2929 ((sea->eamode == EA_IMMEDIATE && sea->immlo == 0) ||
2930 (sea->eamode == EA_IMMEDIATE16 && sea->immlo == 0 &&
2931 sea->immhi == 0))) {
2935 if (dea->eamode == EA_DIRECT) {
2937 printEA(dea, rin->ext1);
2939 printEA(dea, rin->ext1);
2942 if (p->opt_last & RASOPT_XMM0_ZERO) {
2943 printf("\t# pxor %%xmm0, %%xmm0\n");
2945 xrea.target_reg = X86_REG_XMM0;
2946 xrea.eamode = EA_DIRECT;
2948 printEA(&xrea, rin->ext1);
2950 printEA(&xrea, rin->ext1);
2953 p->opt_flags |= RASOPT_XMM0_ZERO;
2954 doMOVE(p, rin, &xrea, dea);
2956 } else if ((rin->arg1.flags & RAF_FLOAT) &&
2957 (sea->eamode == EA_IMMEDIATE ||
2958 sea->eamode == EA_IMMEDIATE16)) {
2961 CreateGlobalImmediate(sea, &irea, rin->ext1);
2962 printf("\tmov%s\t", x86ext(rin->ext1, rin->arg1.flags));
2963 printEA(&irea, rin->ext1);
2965 if (dea->eamode == EA_DIRECT) {
2966 printEA(dea, rin->ext1);
2969 xrea.target_reg = X86_REG_XMM0;
2970 xrea.eamode = EA_DIRECT;
2971 printEA(&xrea, rin->ext1);
2973 doMOVE(p, rin, &xrea, dea);
2975 } else if (sea->eamode == EA_IMMEDIATE && sea->immlo == 0 &&
2976 dea->eamode == EA_DIRECT) {
2980 printf("\txorl\t"); /* avoid REX */
2981 printEA(dea, REXT_I32);
2983 printEA(dea, REXT_I32);
2985 } else if (ISIMM64(sea)) {
2987 * MOVE 64-bit immediate to target
2989 if (dea->eamode == EA_DIRECT) {
2990 printf("\tmovabsq\t");
2991 printEA(sea, rin->ext1);
2993 printEA(dea, rin->ext1);
2996 xrea.target_reg = X86_REG_RCX;
2997 xrea.eamode = EA_DIRECT;
2998 printf("\tmovabsq\t");
2999 printEA(sea, rin->ext1);
3001 printEA(&xrea, rin->ext1);
3004 printf("\tmov%s\t", x86ext(rin->ext1, rin->arg1.flags));
3005 printEA(&xrea, rin->ext1);
3007 printEA(dea, rin->ext1);
3012 * MOVE generic (to/from register)
3014 printf("\tmov%s\t", x86ext(rin->ext1, rin->arg1.flags));
3015 printEA(sea, rin->ext1);
3017 printEA(dea, rin->ext1);
3024 doMOVEExt(RASParser *p __unused, rinsn_t *rin, rea_t *sea, rea_t *dea,
3027 uint8_t sext = rin->ext1;
3028 uint8_t dext = rin->ext1;
3029 uint8_t sflags = rin->arg1.flags;
3030 uint8_t dflags = rin->arg1.flags;
3032 if (extov & REXTF_MASK) {
3033 sext = extov & REXTF_MASK;
3036 if (extov & REXTF_EA2) {
3039 sflags = rin->arg2.flags;
3040 dflags = rin->arg2.flags;
3042 if (extov & REXTF_SEA1) {
3044 sflags = rin->arg1.flags;
3046 if (sflags & RAF_FLOAT)
3047 sext |= REXTF_FLOAT;
3048 if (dflags & RAF_FLOAT)
3049 dext |= REXTF_FLOAT;
3051 if (sea->eamode == EA_IMMEDIATE && sea->immlo == 0 &&
3052 dea->eamode == EA_DIRECT) {
3053 printf("\txorl\t"); /* avoid REX */
3054 printEA(dea, REXT_I32);
3056 printEA(dea, REXT_I32);
3058 printf("\tmov%s\t", x86ext(dext, dflags));
3067 * Load effective-address instead of contents. Note that some originally
3068 * EA_DIRECT EAs may have been converted to stack-temporary storage. In this
3069 * situation the effective-address is, in fact, the contents of the storage.
3073 doLEA(RASParser *p __unused, rea_t *sea, rea_t *dea)
3075 if (sea->orig_eamode == EA_DIRECT) {
3076 printf("\tmov%c\t", X86Size);
3077 printEA(sea, ExtSize);
3079 printEA(dea, ExtSize);
3080 } else if (sea->eamode == EA_MEMORY &&
3083 if (sea->offset == 0 && dea->eamode == EA_DIRECT) {
3084 printf("\txorl\t"); /* avoid REX */
3085 printEA(dea, REXT_I32);
3087 printEA(dea, REXT_I32);
3089 printf("\tmov%c\t$%jd, ", X86Size, sea->offset);
3090 printEA(dea, ExtSize);
3093 printf("\tlea%c\t", X86Size);
3094 printEA(sea, ExtSize);
3096 printEA(dea, ExtSize);
3103 doSpecialSave(rinsn_t *rin)
3110 mask = rin->special_save;
3113 printf("\t# SPECIAL SAVE\n");
3116 for (i = 0; mask; ++i) {
3117 if ((mask & (1LLU << i)) == 0)
3119 if (i < X86_REG_FBASE) {
3120 mask &= ~(1LLU << i);
3121 printf("\tpush%c\t", X86Size);
3122 printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize);
3124 count += sizeof(void *);
3129 count = ((count + 15) & ~(size_t) 15) - count; /* 16-byte align */
3130 if (fpcount + count) {
3131 printf("\tsub%c\t$%zd, ", X86Size, (fpcount + count));
3132 printREGTGT(X86_REG_RSP, 0, ExtSize);
3138 for (i = X86_REG_FBASE; mask; ++i) {
3139 if ((mask & (1LLU << i)) == 0)
3141 mask &= ~(1LLU << i);
3142 printf("\tmovaps\t%%xmm%d, %zd(", i - X86_REG_FBASE, fpcount);
3143 printREGTGT(X86_REG_RSP, 0, ExtSize);
3151 doSpecialRestore(rinsn_t *rin)
3158 mask = rin->special_save;
3161 printf("\t# SPECIAL RESTORE\n");
3164 for (i = X86_REG_FBASE; mask & 0xFFFFFFFF00000000LLU; ++i) {
3165 if ((mask & (1LLU << i)) == 0)
3167 mask &= ~(1LLU << i);
3168 printf("\tmovaps\t%zd(", fpcount);
3169 printREGTGT(X86_REG_RSP, 0, ExtSize);
3170 printf("), %%xmm%d\n", i - X86_REG_FBASE);
3175 for (i = 31; i >= 0; --i) {
3176 if ((mask & (1LLU << i)) == 0)
3178 count += sizeof(void *);
3180 count = ((count + 15) & ~(size_t) 15) - count; /* 16-byte align */
3182 if (fpcount + count) {
3183 printf("\tadd%c\t$%jd, ", X86Size, (intmax_t) (fpcount + count));
3184 printREGTGT(X86_REG_RSP, 0, ExtSize);
3187 for (i = 31; mask; --i) {
3188 if ((mask & (1LLU << i)) == 0)
3190 mask &= ~(1LLU << i);
3191 printf("\tpop%c\t", X86Size);
3192 printREGTGT(i | X86_SIZE_UNSPEC, 0, ExtSize);
3199 * Used for CMP instructions where arg1 and arg2.rea are both source
3200 * arguments. Allow either reg,mem or $imm,mem or mem,reg.
3202 * We have an optimization to reverse the comparison and return the reverse
3203 * status to the caller (allows immediate arg2.rea to be optimized).
3207 InsnSimplifyRMMRCMP(RASParser *p, rinsn_t *rin, rea_t **sea1p, rea_t **sea2p)
3212 if (rin->arg2.rea->eamode == EA_IMMEDIATE ||
3213 rin->arg2.rea->eamode == EA_IMMEDIATE16) {
3214 if (rin->arg1.rea->eamode == EA_IMMEDIATE ||
3215 rin->arg1.rea->eamode == EA_IMMEDIATE16) {
3217 * Bleh, a constant expression didn't get collapsed. Make it
3218 * work. We don't interpret here (yet).
3220 if (rin->arg2.flags & RAF_FLOAT) {
3221 xrea.target_reg = X86_REG_XMM0;
3222 xrea.eamode = EA_DIRECT;
3224 xrea.target_reg = X86_REG_RCX;
3225 xrea.eamode = EA_DIRECT;
3227 *sea1p = rin->arg1.rea;
3229 doMOVE(p, rin, rin->arg2.rea, &xrea);
3233 * $imm,reg/mem - We are good.
3235 if (doHandleImmediate(rin->arg2.rea, &xrea, rin->ext1))
3238 *sea1p = rin->arg2.rea;
3239 *sea2p = rin->arg1.rea;
3242 } else if (rin->arg1.rea->eamode == EA_IMMEDIATE ||
3243 rin->arg1.rea->eamode == EA_IMMEDIATE16) {
3245 * $imm,reg/mem - We are good.
3247 if (doHandleImmediate(rin->arg1.rea, &xrea, rin->ext1))
3250 *sea1p = rin->arg1.rea;
3251 *sea2p = rin->arg2.rea;
3253 } else if (rin->arg1.rea->eamode != EA_DIRECT &&
3254 rin->arg2.rea->eamode != EA_DIRECT) {
3256 * Usually better to put arg1 in register (but we could put arg2.rea
3259 if (rin->arg1.flags & RAF_FLOAT) {
3260 xrea.target_reg = X86_REG_XMM0;
3261 xrea.eamode = EA_DIRECT;
3263 xrea.target_reg = X86_REG_RCX;
3264 xrea.eamode = EA_DIRECT;
3267 *sea2p = rin->arg2.rea;
3268 doMOVE(p, rin, rin->arg1.rea, &xrea);
3272 * Otherwise we are good
3274 *sea1p = rin->arg1.rea;
3275 *sea2p = rin->arg2.rea;
3282 * Floating point version allows a memory or register source and register
3283 * destination. Unlike normal instructions, an immediate source is not
3286 * If possible, reverse the sense of the comparison if it will generate more
3291 InsnSimplifyMRFCMP(RASParser *p, rinsn_t *rin, rea_t **sea1p, rea_t **sea2p)
3297 if (rin->arg2.rea->eamode == EA_DIRECT) {
3299 * ARG2 is direct, which is optimal. ARG1 can be direct or memory,
3300 * but cannot be immediate. Don't reverse.
3303 if (rin->arg1.rea->eamode == EA_DIRECT ||
3304 rin->arg1.rea->eamode == EA_MEMORY) {
3305 *sea1p = rin->arg1.rea;
3306 *sea2p = rin->arg2.rea;
3308 xrea.target_reg = X86_REG_XMM0;
3309 xrea.eamode = EA_DIRECT;
3310 doMOVE(p, rin, rin->arg1.rea, &xrea);
3312 *sea2p = rin->arg2.rea;
3314 } else if (rin->arg1.rea->eamode == EA_DIRECT &&
3315 (rin->arg2.rea->eamode == EA_IMMEDIATE ||
3316 rin->arg2.rea->eamode == EA_IMMEDIATE16)) {
3318 * ARG1 is direct and ARG2 is immediate. Since we need a
3319 * memory-immediate, reverse it and optimize the immediate.
3322 CreateGlobalImmediate(rin->arg2.rea, &xrea, rin->ext1);
3324 *sea2p = rin->arg1.rea;
3325 } else if (rin->arg1.rea->eamode == EA_DIRECT) {
3327 * ARG1 is direct so lets reverse it in case ARG2 is memory. Plus
3328 * this is optimal if ARG2 happens to be immediate.
3331 xrea.target_reg = X86_REG_XMM0;
3332 xrea.eamode = EA_DIRECT;
3333 doMOVE(p, rin, rin->arg2.rea, &xrea);
3335 *sea2p = rin->arg1.rea;
3336 } else if ((rin->arg1.rea->eamode == EA_IMMEDIATE ||
3337 rin->arg1.rea->eamode == EA_IMMEDIATE16) &&
3338 (rin->arg2.rea->eamode == EA_IMMEDIATE ||
3339 rin->arg2.rea->eamode == EA_IMMEDIATE16)) {
3341 * Sigh, both ARG1 and ARG2 are immediate. We can use
3342 * CreateGlobalImmediate() to get a memory-immediate for arg1.rea.
3345 CreateGlobalImmediate(rin->arg1.rea, &xrea, rin->ext1);
3346 x2ea.target_reg = X86_REG_XMM1;
3347 x2ea.eamode = EA_DIRECT;
3348 doMOVE(p, rin, rin->arg2.rea, &x2ea);
3351 } else if (rin->arg2.rea->eamode == EA_IMMEDIATE ||
3352 rin->arg2.rea->eamode == EA_IMMEDIATE16) {
3354 * ARG1 is memory and arg2.rea is immediate. We have to throw
3355 * arg2.rea into a register so don't reverse.
3358 x2ea.target_reg = X86_REG_XMM1;
3359 x2ea.eamode = EA_DIRECT;
3360 doMOVE(p, rin, rin->arg2.rea, &x2ea);
3361 *sea1p = rin->arg1.rea;
3365 * arg1 is immediate or memory, arg2 is memory. Since immediate
3366 * values aren't allowed both sides are effectively memory. Reverse
3367 * it (no reason, just feel like it).
3370 x2ea.target_reg = X86_REG_XMM0;
3371 x2ea.eamode = EA_DIRECT;
3372 doMOVE(p, rin, rin->arg1.rea, &x2ea);
3373 *sea1p = rin->arg2.rea;
3380 * Simplify an EA into a memory operand for float128's (FP80)
3384 InsnSimplifyFP128(RASParser * p, rinsn_t * rin, rea_t * sea, rea_t * tea,
3385 uint8_t ext, int rspoff, int loadme)
3387 if (sea->eamode == EA_IMMEDIATE16) {
3388 CreateGlobalImmediate(sea, tea, ext);
3390 } else if (sea->eamode != EA_MEMORY) {
3391 bzero(tea, sizeof(*tea));
3392 tea->target_reg = X86_REG_RSP;
3393 tea->eamode = EA_MEMORY;
3394 tea->offset = rspoff;
3396 doMOVE(p, rin, sea, tea);
3403 * Low level helper functions
3407 printREGTGT(uint16_t target_reg, uint32_t regno, uint8_t ext)
3518 if ((mr & X86_REG_MASK) < 32)
3519 snprintf(buf, sizeof(buf), "rats%d",
3522 snprintf(buf, sizeof(buf), "ratsF%d",
3529 switch (ext & REXTF_MASK) {
3533 printf("%%%cl", rid[0]);
3534 } else if (mr < 8) {
3535 printf("%%%sl", rid);
3537 printf("%%%sb", rid);
3541 case X86_SIZE_HIBYTE:
3543 printf("%%%ch", rid[0]);
3549 printf("%%%s", rid);
3551 printf("%%%sw", rid);
3555 printf("%%e%s", rid);
3557 printf("%%%sd", rid);
3559 printf("%%%s", rid);
3563 printf("%%r%s", rid);
3565 printf("%%%s", rid);
3567 printf("%%%s", rid);
3571 printf("%%%s", rid);
3575 printf("%%%s", rid);
3579 printf("%%%s", rid);
3583 * Suitable for e.g. FMOVE, not suitable for fp stack ops. FP
3584 * insns will do those manually.
3587 printf("%%%s", rid);
3590 printf("[REXT%d]", ext);
3594 if (regno & REGF_ADHOC) {
3595 if (regno & REGF_PTR)
3596 printf("%%q%d", regno & REGF_MASK);
3598 printf("%%v%d", regno & REGF_MASK);
3599 } else if (regno & REGF_PTR) {
3623 printf("%%p%d", regno & REGF_MASK);
3627 printf("%%r%d", regno & REGF_MASK);
3634 printREG(rea_t *rea, uint8_t ext)
3636 printREGTGT(rea->target_reg, rea->regno, ext);
3640 InsnDebugREG(uint16_t target_reg, uint8_t ext)
3644 rea.target_reg = target_reg;
3646 printREG(&rea, ext);
3651 printINSN(rinsn_t * rin, const char *x86op, rea_t * sea, rea_t * dea,
3657 if (extov & REXTF_EA2) {
3658 printf("\t%s%s", x86op, x86ext(rin->ext2, rin->arg2.flags));
3662 printf("\t%s%s", x86op, x86ext(rin->ext1, rin->arg1.flags));
3666 if (extov & REXTF_SEA1)
3668 if (extov & REXTF_MASK) {
3669 sext = extov & REXTF_MASK;
3670 dext = extov & REXTF_MASK;
3690 printEA(rea_t *rea, uint8_t ext)
3697 switch (rea->eamode) {
3704 regea = rea->direct;
3707 * This can occur if the register allocator is unable to allocate
3708 * a mandatory register.
3710 dassert(regea->eamode == EA_DIRECT);
3713 if (rea->sym->id[0] == '@')
3714 printf("%s", rea->sym->id + 1);
3716 printf(".L%d%s", ProcNo, rea->sym->id);
3717 if (rea->offset > 0)
3721 (rea->sym == NULL && (regea->regno & ~REGF_PTR) == 0)) {
3722 printf("%jd", (intmax_t) rea->offset);
3724 if (regea->target_reg || (regea->regno & ~REGF_PTR)) {
3726 printREG(regea, ExtSize);
3731 * Catch broken stack-relative accesses.
3733 if (regea->target_reg == X86_REG_RSP && rea->offset >= ProcStackSize) {
3734 printf(" [OUT OF BOUNDS] ");
3736 dwarn("RAS Assembly used an out-of-bounds frame "
3738 rea->offset, ProcStackSize);
3744 printf("%s", rea->sym->id);
3748 if (rea->immlo || rea->sym == NULL)
3749 printf("$%jd", (intmax_t) rea->immlo);
3751 case EA_IMMEDIATE16:
3752 printf("$0x%016jx%016jx",
3753 (intmax_t) rea->immhi, (intmax_t) rea->immlo);
3756 printf("?ea=%d", rea->eamode);
3763 printBRLabel(rsym_t *label)
3765 printf(".L%d%s", ProcNo, label->id);
3768 /************************************************************************
3769 * REGISTGER ALLOCATOR *
3770 ************************************************************************
3774 regAllocEA(RASParser * p, rinsn_t * rin,
3775 rspan_t * arg, rspan_t * argd, int pass);
3776 static uint64_t regAllocSpan(rinsn_t * rin, rspan_t * span,
3777 uint16_t target_reg, int special_case);
3778 static void regClearEA(RASParser *p, rspan_t *span, rspan_t *spand);
3782 RegAllocatorX86(RASParser *p, rblock_t *rblock, urunesize_t bytes)
3789 * Try maximum registerization first.
3792 ExtraStackSpace = 0;
3793 ExtraStackBase = (bytes + RAWPTR_ALIGN) & ~RAWPTR_ALIGN;
3795 RegAllocatorScan(p, rblock);
3796 if ((SaveMask & ~X86_REGF_GOOD) == 0)
3800 * Didn't work, search the weighting.
3802 while (wlow < whigh - 1) {
3803 RegAllocatorClear(p, rblock);
3804 wmid = wlow + (whigh - wlow) / 2;
3805 RegAllocWeight = wmid;
3807 ExtraStackSpace = 0;
3808 ExtraStackBase = (bytes + RAWPTR_ALIGN) & ~RAWPTR_ALIGN;
3809 RegAllocatorScan(p, rblock);
3810 if (SaveMask & ~X86_REGF_GOOD) {
3812 * Too many registers were allocated, increase weight
3818 * Not enough registers were allocated, increase weight
3824 if (SaveMask & ~X86_REGF_GOOD) {
3825 RegAllocatorClear(p, rblock);
3828 ExtraStackSpace = 0;
3829 ExtraStackBase = (bytes + RAWPTR_ALIGN) & ~RAWPTR_ALIGN;
3830 RegAllocatorScan(p, rblock);
3832 if (SaveMask & ~X86_REGF_GOOD)
3833 fprintf(stderr, "UseWeight %d - FAILED (complexity %d)\n",
3834 RegAllocWeight, p->pcomplexity);
3836 fprintf(stderr, "UseWeight %d - Success\n", RegAllocWeight);
3842 RegAllocatorScan(RASParser *p, rblock_t *rblock)
3845 uint64_t incmask = 0;
3848 if (rblock->flags & RBLKF_REGALLOCATOR)
3850 rblock->flags |= RBLKF_REGALLOCATOR;
3852 for (pass = 1; pass < 2; ++pass) {
3853 RUNE_FOREACH(rin, &rblock->rinsn_list, node) {
3854 if (rin->op == INSN_LABEL)
3858 * Note that as we are post-merge, some of these EAs may be the
3861 incmask |= rin->regused_init;
3862 regAllocEA(p, rin, &rin->arg1, &rin->arg1d, pass);
3863 regAllocEA(p, rin, &rin->arg2, &rin->arg2d, pass);
3864 regAllocEA(p, rin, &rin->arg3, &rin->arg3d, pass);
3865 regAllocEA(p, rin, &rin->arg4, &rin->arg4d, pass);
3867 if (rblock->btrue) {
3868 RegAllocatorScan(p, rblock->btrue);
3870 RegAllocatorScan(p, rblock->bfalse);
3873 SaveMask |= incmask;
3877 * Allocate a register for an rea, adjust the rea's span to account for the
3880 * Generally speaking we have a 64-bit mask of which, really, only 16 general
3881 * registers are mapped. Remaining bits are used to help calculate overflow.
3882 * If we fill up all 64-bits we will start allocating register %rats64.
3886 regAllocEA(RASParser *p, rinsn_t *rin, rspan_t *arg, rspan_t *argd, int pass)
3896 regAllocEA(p, rin, argd, NULL, pass);
3897 dassert(rin == arg->rin);
3898 if ((rea->flags & REAF_CACHEABLE) == 0)
3900 dassert(rea->refs == 1 || arg->root);
3903 * Already allocated (SaveMask also already set). Happens for many
3904 * reasons including rea's representing the same object being shared.
3906 if (rea->flags & REAF_REGALLOCD)
3910 * If the instruction is being dropped adjust the masks just so the
3911 * deallocator is happy (the bit is most likely just going to get cleared
3912 * again), but do NOT set the register in SaveMask yet as we are not
3913 * actually going to use it in this insn.
3915 * This allows someone else to use this register and still span this
3918 * XXX It would be even better if the BlockReach*() code could chain the
3919 * drops upward and potentially drop even more instructions, but DROPME
3920 * is set later in the BlockCollapse() so at the moment this is the best
3921 * we can do. Regardless, this is still 100% efficient IF this was the
3922 * only instruction using this cacheable EA.
3924 if (rin->flags & RINSF_DROPME)
3928 * We don't need to reallocate if we already allocated it or if the
3929 * register was statically assigned.
3931 if (rea->target_reg) /* statically allocated */
3935 * Check EA weight, do not allocate register if too low.
3937 * Distinguish between direct and indirect registers. This is a bit
3938 * confusing. For EA_DIRECT, if REAF_DIRECT is set we are pushed into a
3939 * rea->direct rea and working on an indirect register. If not set, then
3940 * we are working on a direct register.
3942 * Generally speaking, we must allocate a real register for indirect
3946 if (rea->regweight < RegAllocWeight || (rea->flags & REAF_ADDRUSED)) {
3947 if (rea->orig_eamode == EA_MEMORY)
3949 if (rea->orig_eamode == EA_DIRECT &&
3950 (rea->flags & REAF_DIRECT) == 0) {
3953 if (rea->regno & REGF_PTR) {
3954 bytes = RAWPTR_SIZE;
3956 switch (rin->ext1) {
3982 dpanic("Unknown REXT %d", rin->ext1);
3987 ExtraStackSpace = (ExtraStackSpace + bytes - 1) &
3989 rea->eamode = EA_MEMORY;
3990 rea->offset = ExtraStackBase + ExtraStackSpace;
3991 rea->orig_offset = rea->offset;
3992 rea->target_reg = X86_REG_RSP;
3993 ExtraStackSpace += bytes;
3994 rea->flags |= REAF_REGALLOCD;
4001 * If allocating. Once allocated the target_reg remains intact through
4002 * potentially many shared references and passes. Keep in mind that many
4003 * instructions may share this rea.
4005 switch (rea->eamode) {
4009 if (rea->regno == 0)
4011 agg = regAllocSpan(rin, arg, 0, special_case);
4012 agg |= X86_REGF_RSP | X86_REGF_RIP;
4013 if (arg->flags & RAF_FLOAT)
4014 agg |= X86_REGF_IMASK;
4016 agg |= X86_REGF_FMASK;
4017 rea->target_reg = findfreebit(0, agg);
4019 if ((1LLU << rea->target_reg) & ~X86_REGF_GOOD)
4020 printf("MEMALLOCFAIL-%d %016jx [%016jx] @%s[%p]\n",
4023 regAllocSpan(rin, arg, 0, special_case),
4026 dassert(rea->regno == REG_FP ||
4027 rea->target_reg != (X86_REG_RSP & X86_REG_MASK));
4028 SaveMask |= 1LLU << rea->target_reg;
4029 rea->target_reg |= X86_SIZE_UNSPEC;
4030 regAllocSpan(rin, arg, rea->target_reg, special_case);
4031 rea->eamode = EA_DIRECT;
4032 rea->flags |= REAF_REGALLOCD;
4035 dassert(rea->regno);
4036 agg = regAllocSpan(rin, arg, 0, special_case);
4037 agg |= X86_REGF_RSP | X86_REGF_RIP;
4038 if (arg->flags & RAF_FLOAT)
4039 agg |= X86_REGF_IMASK;
4041 agg |= X86_REGF_FMASK;
4042 rea->target_reg = findfreebit(rea->regno, agg);
4044 if ((1LLU << rea->target_reg) & ~X86_REGF_GOOD)
4045 printf("DIRALLOCFAIL-%d %016jx [%016jx] @%s[%p]\n",
4048 regAllocSpan(rin, arg, 0, special_case),
4051 dassert(rea->regno == REG_FP ||
4052 rea->target_reg != (X86_REG_RSP & X86_REG_MASK));
4053 SaveMask |= 1LLU << rea->target_reg;
4054 rea->target_reg |= X86_SIZE_UNSPEC;
4055 regAllocSpan(rin, arg, rea->target_reg, special_case);
4056 rea->eamode = EA_DIRECT;
4057 rea->flags |= REAF_REGALLOCD;
4062 static uint64_t regAllocSpanRecurse(rspan_t * skel, uint64_t mask, uint64_t bit,
4063 uint16_t target_reg, int special_case);
4067 regAllocSpan(rinsn_t *rin, rspan_t *span, uint16_t target_reg, int special_case)
4072 bit = 1LLU << (target_reg & X86_REG_MASK);
4075 mask = rin->regused_init & ~X86_REGF_SPECIAL_SAVE;
4077 mask = rin->regused_init;
4079 mask |= rin->regused_agg;
4081 rin->regused_agg |= bit;
4083 (bit & rin->regused_init & X86_REGF_SPECIAL_SAVE)) {
4084 rin->special_save |= bit;
4088 mask = regAllocSpanRecurse(span->root, mask, bit,
4089 target_reg, special_case);
4090 SpanClear(span->root);
4098 regAllocSpanRecurse(rspan_t * skel, uint64_t mask, uint64_t bit,
4099 uint16_t target_reg, int special_case)
4104 if (skel->flags & RAF_SPAN) /* already scanned */
4106 skel->flags |= RAF_SPAN;
4109 if (skel->flags & RAF_SKELETON) {
4115 rin = RUNE_FIRST(&skel->block->rinsn_list);
4122 mask |= rin->regused_init & ~X86_REGF_SPECIAL_SAVE;
4124 mask |= rin->regused_init;
4125 mask |= rin->regused_agg;
4127 rin->regused_agg |= bit;
4128 if (special_case && (bit & rin->regused_init &
4129 X86_REGF_SPECIAL_SAVE)) {
4130 rin->special_save |= bit;
4135 * Check span for rin iteration.
4137 * If there are no more instructions in this block using our
4138 * variable, check our block linkages. If all block linkages set
4139 * RAF_SKELETON then the variable is not used any more and we can
4142 while (span && span->rin == rin)
4145 if ((skel->strue == NULL ||
4146 (skel->strue->flags & RAF_SKELETON)) &&
4147 (skel->sfalse == NULL ||
4148 (skel->sfalse->flags & RAF_SKELETON))) {
4152 rin = RUNE_NEXT(rin, node);
4159 mask = regAllocSpanRecurse(skel->strue, mask, bit,
4160 target_reg, special_case);
4162 mask = regAllocSpanRecurse(skel->sfalse, mask, bit,
4163 target_reg, special_case);
4169 RegAllocatorClear(RASParser *p, rblock_t *rblock)
4173 if ((rblock->flags & RBLKF_REGALLOCATOR) == 0)
4175 rblock->flags &= ~RBLKF_REGALLOCATOR;
4176 RUNE_FOREACH(rin, &rblock->rinsn_list, node) {
4177 rin->regused_agg = 0;
4178 if (rin->op == INSN_LABEL)
4180 rin->special_save = 0;
4182 regClearEA(p, &rin->arg1, &rin->arg1d);
4184 regClearEA(p, &rin->arg2, &rin->arg2d);
4186 regClearEA(p, &rin->arg3, &rin->arg3d);
4188 regClearEA(p, &rin->arg4, &rin->arg4d);
4191 dassert(RUNE_NEXT(rin, node) == NULL);
4192 RegAllocatorClear(p, rin->brtrue->label_block);
4194 RegAllocatorClear(p, rin->brfalse->label_block);
4201 regClearEA(RASParser *p, rspan_t *span, rspan_t *spand)
4203 rea_t *rea = span->rea;
4206 dassert(rea->direct == spand->rea);
4207 regClearEA(p, spand, NULL);
4209 if (rea->flags & REAF_REGALLOCD) {
4210 rea->target_reg = 0;
4211 rea->eamode = rea->orig_eamode;
4212 rea->regno = rea->orig_regno;
4213 rea->offset = rea->orig_offset;
4214 rea->flags &= ~REAF_REGALLOCD;
4218 /************************************************************************
4219 * HELPER FUNCTIONS *
4220 ************************************************************************
4225 findfreebit(uint32_t regno, uint64_t mask)
4227 static int lookup[16] = {
4247 * Prefer certain registers, disallow others.
4252 return (X86_REG_RIP & X86_REG_MASK);
4254 return (X86_REG_RSP & X86_REG_MASK);
4256 if (mask & (1LLU << (X86_REG_RSI & X86_REG_MASK)))
4258 return (X86_REG_RSI & X86_REG_MASK);
4261 if (mask & (1LLU << (X86_REG_RDI & X86_REG_MASK)))
4263 return (X86_REG_RDI & X86_REG_MASK);
4266 if (mask & (1LLU << (X86_REG_RDX & X86_REG_MASK)))
4268 return (X86_REG_RDX & X86_REG_MASK);
4276 if ((mask & 0x00000000FFFFFFFFLLU) == 0x00000000FFFFFFFFLLU)
4277 return (findfreebit(0, mask >> 32) + 32);
4278 if ((mask & 0x000000000000FFFFLLU) == 0x000000000000FFFFLLU)
4279 return (findfreebit(0, mask >> 16) + 16);
4280 if ((mask & 0x00000000000000FFLLU) == 0x00000000000000FFLLU)
4281 return (findfreebit(0, mask >> 8) + 8);
4282 if ((mask & 0x000000000000000FLLU) == 0x000000000000000FLLU)
4283 return (findfreebit(0, mask >> 4) + 4);
4284 return (lookup[(int)mask & 15]);
4290 clearregbit(uint64_t *mask, rea_t *rea)
4292 if (rea->target_reg)
4293 *mask &= ~(1LLU << (rea->target_reg & X86_REG_MASK));
4299 x86ext(char c, uint8_t argflags)
4301 if (argflags & RAF_FLOAT)
4327 x86branch(uint32_t op, int invert, int reverse)
4331 switch (op & 0x0F00) {
4332 case 0x0000: /* EQ */
4334 case 0x0100: /* NE */
4336 case 0x0200: /* UGT */
4338 case 0x0300: /* UGE */
4340 case 0x0400: /* ULT */
4342 case 0x0500: /* ULE */
4344 case 0x0600: /* SGT */
4346 case 0x0700: /* SGE */
4348 case 0x0800: /* SLT */
4350 case 0x0900: /* SLE */
4356 switch (op & 0x0F00) {
4357 case 0x0000: /* EQ */
4359 case 0x0100: /* NE */
4361 case 0x0200: /* UGT */
4363 case 0x0300: /* UGE */
4365 case 0x0400: /* ULT */
4367 case 0x0500: /* ULE */
4369 case 0x0600: /* SGT */
4371 case 0x0700: /* SGE */
4373 case 0x0800: /* SLT */
4375 case 0x0900: /* SLE */
4383 switch (op & 0x0F00) {
4384 case 0x0000: /* EQ */
4386 case 0x0100: /* NE */
4388 case 0x0200: /* UGT */
4390 case 0x0300: /* UGE */
4392 case 0x0400: /* ULT */
4394 case 0x0500: /* ULE */
4396 case 0x0600: /* SGT */
4398 case 0x0700: /* SGE */
4400 case 0x0800: /* SLT */
4402 case 0x0900: /* SLE */
4408 switch (op & 0x0F00) {
4409 case 0x0000: /* EQ */
4411 case 0x0100: /* NE */
4413 case 0x0200: /* UGT */
4415 case 0x0300: /* UGE */
4417 case 0x0400: /* ULT */
4419 case 0x0500: /* ULE */
4421 case 0x0600: /* SGT */
4423 case 0x0700: /* SGE */
4425 case 0x0800: /* SLT */
4427 case 0x0900: /* SLE */
4438 sameEA(rea_t *rea1, rea_t *rea2)
4440 if (rea1->eamode == EA_DIRECT && rea2->eamode == EA_DIRECT) {
4441 dassert(rea1->target_reg);
4442 dassert(rea2->target_reg);
4446 * NOTE: No need to test cache_id at this point.
4448 if (rea1->regno == rea2->regno &&
4449 rea1->target_reg == rea2->target_reg &&
4450 rea1->offset == rea2->offset &&
4451 rea1->sym == rea2->sym &&
4452 rea1->immlo == rea2->immlo &&
4453 rea1->immhi == rea2->immhi)
4461 adjacentLabel(rinsn_t *rin, rsym_t *sym)
4466 scan = RUNE_NEXT(rin, node);
4469 if (scan->op != INSN_LABEL)
4471 if (sym == scan->label)
4473 scan = RUNE_NEXT(scan, node);
4475 block = RUNE_NEXT(rin->block, node);
4478 rin = scan = RUNE_FIRST(&block->rinsn_list);
4485 allocInsnBlock(RASParser * p, rblock_t * rblock, uint32_t op,
4486 uint8_t ext, int args)
4492 rin = zalloc(sizeof(*rin));
4494 rin->operands = args;
4497 for (i = 0; i < args; ++i) {
4498 rea = zalloc(sizeof(*rea));
4503 rin->arg1.rea = rea;
4506 rin->arg2.rea = rea;
4509 rin->arg3.rea = rea;
4512 rin->arg4.rea = rea;
4515 dpanic("Too many EAs for allocInsnBlock()");
4521 rin->opname = "MOVE";
4522 rin->func = InsnMOVE;
4525 dpanic("Unsupported insn for allocInsnBlock()");
4529 /* rin->block = rblock; handled by block scan */
4530 InsnTargetAdjust(p, rin);
4531 RasBlockAdd(rblock, rin);
4537 * Initialize an rea, make it cacheable if EA_DIRECT+reg or EA_IMMEDIATE*
4541 initEA(rea_t *rea, uint8_t eamode, uint32_t regno, uint16_t target_reg)
4543 rea->eamode = eamode;
4545 rea->target_reg = target_reg;
4546 rea->orig_eamode = eamode;
4547 rea->orig_regno = regno;
4548 if (target_reg == 0 && regno && eamode == EA_DIRECT)
4549 rea->flags |= REAF_CACHEABLE;
4550 if (eamode == EA_IMMEDIATE || eamode == EA_IMMEDIATE16)
4551 rea->flags |= REAF_CACHEABLE;