1 ;; GCC machine description for i386 synchronization instructions.
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
25 UNSPEC_MOVA ; For __atomic support
30 (define_c_enum "unspecv" [
36 (define_expand "sse2_lfence"
38 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
41 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
42 MEM_VOLATILE_P (operands[0]) = 1;
45 (define_insn "*sse2_lfence"
46 [(set (match_operand:BLK 0 "" "")
47 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
50 [(set_attr "type" "sse")
51 (set_attr "length_address" "0")
52 (set_attr "atom_sse_attr" "lfence")
53 (set_attr "memory" "unknown")])
55 (define_expand "sse_sfence"
57 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
58 "TARGET_SSE || TARGET_3DNOW_A"
60 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
61 MEM_VOLATILE_P (operands[0]) = 1;
64 (define_insn "*sse_sfence"
65 [(set (match_operand:BLK 0 "" "")
66 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
67 "TARGET_SSE || TARGET_3DNOW_A"
69 [(set_attr "type" "sse")
70 (set_attr "length_address" "0")
71 (set_attr "atom_sse_attr" "fence")
72 (set_attr "memory" "unknown")])
74 (define_expand "sse2_mfence"
76 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
79 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
80 MEM_VOLATILE_P (operands[0]) = 1;
83 (define_insn "mfence_sse2"
84 [(set (match_operand:BLK 0 "" "")
85 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
86 "TARGET_64BIT || TARGET_SSE2"
88 [(set_attr "type" "sse")
89 (set_attr "length_address" "0")
90 (set_attr "atom_sse_attr" "fence")
91 (set_attr "memory" "unknown")])
93 (define_insn "mfence_nosse"
94 [(set (match_operand:BLK 0 "" "")
95 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
96 (clobber (reg:CC FLAGS_REG))]
97 "!(TARGET_64BIT || TARGET_SSE2)"
98 "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
99 [(set_attr "memory" "unknown")])
101 (define_expand "mem_thread_fence"
102 [(match_operand:SI 0 "const_int_operand" "")] ;; model
105 /* Unless this is a SEQ_CST fence, the i386 memory model is strong
106 enough not to require barriers of any kind. */
107 if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST)
109 rtx (*mfence_insn)(rtx);
112 if (TARGET_64BIT || TARGET_SSE2)
113 mfence_insn = gen_mfence_sse2;
115 mfence_insn = gen_mfence_nosse;
117 mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
118 MEM_VOLATILE_P (mem) = 1;
120 emit_insn (mfence_insn (mem));
125 ;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations,
126 ;; Only beginning at Pentium family processors do we get any guarantee of
127 ;; atomicity in aligned 64-bit quantities. Beginning at P6, we get a
128 ;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
130 ;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
132 ;; Importantly, *no* processor makes atomicity guarantees for larger
133 ;; accesses. In particular, there's no way to perform an atomic TImode
134 ;; move, despite the apparent applicability of MOVDQA et al.
136 (define_mode_iterator ATOMIC
138 (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
141 (define_expand "atomic_load<mode>"
142 [(set (match_operand:ATOMIC 0 "register_operand" "")
143 (unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "")
144 (match_operand:SI 2 "const_int_operand" "")]
148 /* For DImode on 32-bit, we can use the FPU to perform the load. */
149 if (<MODE>mode == DImode && !TARGET_64BIT)
150 emit_insn (gen_atomic_loaddi_fpu
151 (operands[0], operands[1],
152 assign_386_stack_local (DImode, SLOT_TEMP)));
154 emit_move_insn (operands[0], operands[1]);
158 (define_insn_and_split "atomic_loaddi_fpu"
159 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
160 (unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
162 (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
163 (clobber (match_scratch:DF 3 "=X,xf,xf"))]
164 "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
166 "&& reload_completed"
169 rtx dst = operands[0], src = operands[1];
170 rtx mem = operands[2], tmp = operands[3];
173 emit_move_insn (dst, src);
181 emit_insn (gen_loaddi_via_fpu (tmp, src));
182 emit_insn (gen_storedi_via_fpu (mem, tmp));
186 adjust_reg_mode (tmp, DImode);
187 emit_move_insn (tmp, src);
188 emit_move_insn (mem, tmp);
192 emit_move_insn (dst, mem);
197 (define_expand "atomic_store<mode>"
198 [(set (match_operand:ATOMIC 0 "memory_operand" "")
199 (unspec:ATOMIC [(match_operand:ATOMIC 1 "register_operand" "")
200 (match_operand:SI 2 "const_int_operand" "")]
204 enum memmodel model = (enum memmodel) INTVAL (operands[2]);
206 if (<MODE>mode == DImode && !TARGET_64BIT)
208 /* For DImode on 32-bit, we can use the FPU to perform the store. */
209 /* Note that while we could perform a cmpxchg8b loop, that turns
210 out to be significantly larger than this plus a barrier. */
211 emit_insn (gen_atomic_storedi_fpu
212 (operands[0], operands[1],
213 assign_386_stack_local (DImode, SLOT_TEMP)));
217 /* For seq-cst stores, when we lack MFENCE, use XCHG. */
218 if (model == MEMMODEL_SEQ_CST && !(TARGET_64BIT || TARGET_SSE2))
220 emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
221 operands[0], operands[1],
226 /* Otherwise use a store. */
227 emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1],
230 /* ... followed by an MFENCE, if required. */
231 if (model == MEMMODEL_SEQ_CST)
232 emit_insn (gen_mem_thread_fence (operands[2]));
236 (define_insn "atomic_store<mode>_1"
237 [(set (match_operand:SWI 0 "memory_operand" "=m")
238 (unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>")
239 (match_operand:SI 2 "const_int_operand")]
242 "mov{<imodesuffix>}\t{%1, %0|%0, %1}")
244 (define_insn_and_split "atomic_storedi_fpu"
245 [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
246 (unspec:DI [(match_operand:DI 1 "register_operand" "x,m,?r")]
248 (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
249 (clobber (match_scratch:DF 3 "=X,xf,xf"))]
250 "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
252 "&& reload_completed"
255 rtx dst = operands[0], src = operands[1];
256 rtx mem = operands[2], tmp = operands[3];
258 if (!SSE_REG_P (src))
262 emit_move_insn (mem, src);
268 emit_insn (gen_loaddi_via_fpu (tmp, src));
269 emit_insn (gen_storedi_via_fpu (dst, tmp));
274 adjust_reg_mode (tmp, DImode);
275 emit_move_insn (tmp, mem);
279 emit_move_insn (dst, src);
283 ;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
284 ;; operations. But the fix_trunc patterns want way more setup than we want
285 ;; to provide. Note that the scratch is DFmode instead of XFmode in order
286 ;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
288 (define_insn "loaddi_via_fpu"
289 [(set (match_operand:DF 0 "register_operand" "=f")
290 (unspec:DF [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_LDA))]
293 [(set_attr "type" "fmov")
294 (set_attr "mode" "DF")
295 (set_attr "fp_int_src" "true")])
297 (define_insn "storedi_via_fpu"
298 [(set (match_operand:DI 0 "memory_operand" "=m")
299 (unspec:DI [(match_operand:DF 1 "register_operand" "f")] UNSPEC_STA))]
302 gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX);
304 return "fistp%Z0\t%0";
306 [(set_attr "type" "fmov")
307 (set_attr "mode" "DI")])
309 (define_expand "atomic_compare_and_swap<mode>"
310 [(match_operand:QI 0 "register_operand" "") ;; bool success output
311 (match_operand:SWI124 1 "register_operand" "") ;; oldval output
312 (match_operand:SWI124 2 "memory_operand" "") ;; memory
313 (match_operand:SWI124 3 "register_operand" "") ;; expected input
314 (match_operand:SWI124 4 "register_operand" "") ;; newval input
315 (match_operand:SI 5 "const_int_operand" "") ;; is_weak
316 (match_operand:SI 6 "const_int_operand" "") ;; success model
317 (match_operand:SI 7 "const_int_operand" "")] ;; failure model
321 (gen_atomic_compare_and_swap<mode>_1
322 (operands[1], operands[2], operands[3], operands[4]));
323 ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
328 (define_mode_iterator CASMODE
329 [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
330 (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
331 (define_mode_attr CASHMODE [(DI "SI") (TI "DI")])
333 (define_expand "atomic_compare_and_swap<mode>"
334 [(match_operand:QI 0 "register_operand" "") ;; bool success output
335 (match_operand:CASMODE 1 "register_operand" "") ;; oldval output
336 (match_operand:CASMODE 2 "memory_operand" "") ;; memory
337 (match_operand:CASMODE 3 "register_operand" "") ;; expected input
338 (match_operand:CASMODE 4 "register_operand" "") ;; newval input
339 (match_operand:SI 5 "const_int_operand" "") ;; is_weak
340 (match_operand:SI 6 "const_int_operand" "") ;; success model
341 (match_operand:SI 7 "const_int_operand" "")] ;; failure model
344 if (<MODE>mode == DImode && TARGET_64BIT)
347 (gen_atomic_compare_and_swapdi_1
348 (operands[1], operands[2], operands[3], operands[4]));
352 enum machine_mode hmode = <CASHMODE>mode;
353 rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem;
359 hi_o = gen_highpart (hmode, lo_o);
360 hi_e = gen_highpart (hmode, lo_e);
361 hi_n = gen_highpart (hmode, lo_n);
362 lo_o = gen_lowpart (hmode, lo_o);
363 lo_e = gen_lowpart (hmode, lo_e);
364 lo_n = gen_lowpart (hmode, lo_n);
366 if (!cmpxchg8b_pic_memory_operand (mem, <MODE>mode))
367 mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
370 (gen_atomic_compare_and_swap<mode>_doubleword
371 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n));
374 ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
379 (define_insn "atomic_compare_and_swap<mode>_1"
380 [(set (match_operand:SWI 0 "register_operand" "=a")
382 [(match_operand:SWI 1 "memory_operand" "+m")
383 (match_operand:SWI 2 "register_operand" "0")
384 (match_operand:SWI 3 "register_operand" "<r>")]
387 (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
388 (set (reg:CCZ FLAGS_REG)
389 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
391 "lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
393 ;; For double-word compare and swap, we are obliged to play tricks with
394 ;; the input newval (op5:op6) because the Intel register numbering does
395 ;; not match the gcc register numbering, so the pair must be CX:BX.
396 ;; That said, in order to take advantage of possible lower-subreg opts,
397 ;; treat all of the integral operands in the same way.
399 ;; Operands 5 and 6 really need to be different registers, which in
400 ;; this case means op5 must not be ecx. If op5 and op6 are the same
401 ;; (like when the input is -1LL) GCC might chose to allocate op5 to ecx,
402 ;; like op6. This breaks, as the xchg will move the PIC register
403 ;; contents to %ecx then --> boom.
405 (define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
406 (define_mode_attr regprefix [(SI "e") (DI "r")])
408 (define_insn "atomic_compare_and_swap<dwi>_doubleword"
409 [(set (match_operand:DWIH 0 "register_operand" "=a,a")
410 (unspec_volatile:DWIH
411 [(match_operand:<DWI> 2 "cmpxchg8b_pic_memory_operand" "+m,m")
412 (match_operand:DWIH 3 "register_operand" "0,0")
413 (match_operand:DWIH 4 "register_operand" "1,1")
414 (match_operand:DWIH 5 "register_operand" "b,!*r")
415 (match_operand:DWIH 6 "register_operand" "c,c")]
417 (set (match_operand:DWIH 1 "register_operand" "=d,d")
418 (unspec_volatile:DWIH [(const_int 0)] UNSPECV_CMPXCHG))
420 (unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
421 (set (reg:CCZ FLAGS_REG)
422 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))
423 (clobber (match_scratch:DWIH 7 "=X,&5"))]
424 "TARGET_CMPXCHG<doublemodesuffix>B"
426 bool swap = REGNO (operands[5]) != BX_REG;
429 output_asm_insn ("xchg{<imodesuffix>}\t%%<regprefix>bx, %5", operands);
431 output_asm_insn ("lock{%;} cmpxchg<doublemodesuffix>b\t%2", operands);
434 output_asm_insn ("xchg{<imodesuffix>}\t%%<regprefix>bx, %5", operands);
439 ;; For operand 2 nonmemory_operand predicate is used instead of
440 ;; register_operand to allow combiner to better optimize atomic
441 ;; additions of constants.
442 (define_insn "atomic_fetch_add<mode>"
443 [(set (match_operand:SWI 0 "register_operand" "=<r>")
445 [(match_operand:SWI 1 "memory_operand" "+m")
446 (match_operand:SI 3 "const_int_operand" "")] ;; model
449 (plus:SWI (match_dup 1)
450 (match_operand:SWI 2 "nonmemory_operand" "0")))
451 (clobber (reg:CC FLAGS_REG))]
453 "lock{%;} xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
455 ;; This peephole2 and following insn optimize
456 ;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
457 ;; followed by testing of flags instead of lock xadd and comparisons.
459 [(set (match_operand:SWI 0 "register_operand" "")
460 (match_operand:SWI 2 "const_int_operand" ""))
461 (parallel [(set (match_dup 0)
463 [(match_operand:SWI 1 "memory_operand" "")
464 (match_operand:SI 4 "const_int_operand" "")]
467 (plus:SWI (match_dup 1)
469 (clobber (reg:CC FLAGS_REG))])
470 (set (reg:CCZ FLAGS_REG)
471 (compare:CCZ (match_dup 0)
472 (match_operand:SWI 3 "const_int_operand" "")))]
473 "peep2_reg_dead_p (3, operands[0])
474 && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
475 == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
476 && !reg_overlap_mentioned_p (operands[0], operands[1])"
477 [(parallel [(set (reg:CCZ FLAGS_REG)
479 (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
483 (plus:SWI (match_dup 1)
486 (define_insn "*atomic_fetch_add_cmp<mode>"
487 [(set (reg:CCZ FLAGS_REG)
488 (compare:CCZ (unspec_volatile:SWI
489 [(match_operand:SWI 0 "memory_operand" "+m")
490 (match_operand:SI 3 "const_int_operand" "")]
492 (match_operand:SWI 2 "const_int_operand" "i")))
494 (plus:SWI (match_dup 0)
495 (match_operand:SWI 1 "const_int_operand" "i")))]
496 "(unsigned HOST_WIDE_INT) INTVAL (operands[1])
497 == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
499 if (TARGET_USE_INCDEC)
501 if (operands[1] == const1_rtx)
502 return "lock{%;} inc{<imodesuffix>}\t%0";
503 if (operands[1] == constm1_rtx)
504 return "lock{%;} dec{<imodesuffix>}\t%0";
507 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
508 return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
510 return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
513 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
514 ;; In addition, it is always a full barrier, so we can ignore the memory model.
515 (define_insn "atomic_exchange<mode>"
516 [(set (match_operand:SWI 0 "register_operand" "=<r>") ;; output
518 [(match_operand:SWI 1 "memory_operand" "+m") ;; memory
519 (match_operand:SI 3 "const_int_operand" "")] ;; model
522 (match_operand:SWI 2 "register_operand" "0"))] ;; input
524 "xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
526 (define_insn "atomic_add<mode>"
527 [(set (match_operand:SWI 0 "memory_operand" "+m")
529 [(plus:SWI (match_dup 0)
530 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
531 (match_operand:SI 2 "const_int_operand" "")] ;; model
533 (clobber (reg:CC FLAGS_REG))]
536 if (TARGET_USE_INCDEC)
538 if (operands[1] == const1_rtx)
539 return "lock{%;} inc{<imodesuffix>}\t%0";
540 if (operands[1] == constm1_rtx)
541 return "lock{%;} dec{<imodesuffix>}\t%0";
544 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
545 return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
547 return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
550 (define_insn "atomic_sub<mode>"
551 [(set (match_operand:SWI 0 "memory_operand" "+m")
553 [(minus:SWI (match_dup 0)
554 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
555 (match_operand:SI 2 "const_int_operand" "")] ;; model
557 (clobber (reg:CC FLAGS_REG))]
560 if (TARGET_USE_INCDEC)
562 if (operands[1] == const1_rtx)
563 return "lock{%;} dec{<imodesuffix>}\t%0";
564 if (operands[1] == constm1_rtx)
565 return "lock{%;} inc{<imodesuffix>}\t%0";
568 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
569 return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
571 return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
574 (define_insn "atomic_<logic><mode>"
575 [(set (match_operand:SWI 0 "memory_operand" "+m")
577 [(any_logic:SWI (match_dup 0)
578 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
579 (match_operand:SI 2 "const_int_operand" "")] ;; model
581 (clobber (reg:CC FLAGS_REG))]
583 "lock{%;} <logic>{<imodesuffix>}\t{%1, %0|%0, %1}")