1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
29 #include "diagnostic-core.h"
34 #include "insn-config.h"
38 #include "langhooks.h"
43 struct target_expmed default_target_expmed;
45 struct target_expmed *this_target_expmed = &default_target_expmed;
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
79 /* Reduce conditional compilation elsewhere. */
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
101 struct rtx_def reg; rtunion reg_fld[2];
102 struct rtx_def plus; rtunion plus_fld1;
104 struct rtx_def mult; rtunion mult_fld1;
105 struct rtx_def sdiv; rtunion sdiv_fld1;
106 struct rtx_def udiv; rtunion udiv_fld1;
108 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
109 struct rtx_def smod_32; rtunion smod_32_fld1;
110 struct rtx_def wide_mult; rtunion wide_mult_fld1;
111 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
112 struct rtx_def wide_trunc;
113 struct rtx_def shift; rtunion shift_fld1;
114 struct rtx_def shift_mult; rtunion shift_mult_fld1;
115 struct rtx_def shift_add; rtunion shift_add_fld1;
116 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
117 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
120 rtx pow2[MAX_BITS_PER_WORD];
121 rtx cint[MAX_BITS_PER_WORD];
123 enum machine_mode mode, wider_mode;
127 for (m = 1; m < MAX_BITS_PER_WORD; m++)
129 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130 cint[m] = GEN_INT (m);
132 memset (&all, 0, sizeof all);
134 PUT_CODE (&all.reg, REG);
135 /* Avoid using hard regs in ways which may be unsupported. */
136 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
138 PUT_CODE (&all.plus, PLUS);
139 XEXP (&all.plus, 0) = &all.reg;
140 XEXP (&all.plus, 1) = &all.reg;
142 PUT_CODE (&all.neg, NEG);
143 XEXP (&all.neg, 0) = &all.reg;
145 PUT_CODE (&all.mult, MULT);
146 XEXP (&all.mult, 0) = &all.reg;
147 XEXP (&all.mult, 1) = &all.reg;
149 PUT_CODE (&all.sdiv, DIV);
150 XEXP (&all.sdiv, 0) = &all.reg;
151 XEXP (&all.sdiv, 1) = &all.reg;
153 PUT_CODE (&all.udiv, UDIV);
154 XEXP (&all.udiv, 0) = &all.reg;
155 XEXP (&all.udiv, 1) = &all.reg;
157 PUT_CODE (&all.sdiv_32, DIV);
158 XEXP (&all.sdiv_32, 0) = &all.reg;
159 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
161 PUT_CODE (&all.smod_32, MOD);
162 XEXP (&all.smod_32, 0) = &all.reg;
163 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
165 PUT_CODE (&all.zext, ZERO_EXTEND);
166 XEXP (&all.zext, 0) = &all.reg;
168 PUT_CODE (&all.wide_mult, MULT);
169 XEXP (&all.wide_mult, 0) = &all.zext;
170 XEXP (&all.wide_mult, 1) = &all.zext;
172 PUT_CODE (&all.wide_lshr, LSHIFTRT);
173 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
175 PUT_CODE (&all.wide_trunc, TRUNCATE);
176 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
178 PUT_CODE (&all.shift, ASHIFT);
179 XEXP (&all.shift, 0) = &all.reg;
181 PUT_CODE (&all.shift_mult, MULT);
182 XEXP (&all.shift_mult, 0) = &all.reg;
184 PUT_CODE (&all.shift_add, PLUS);
185 XEXP (&all.shift_add, 0) = &all.shift_mult;
186 XEXP (&all.shift_add, 1) = &all.reg;
188 PUT_CODE (&all.shift_sub0, MINUS);
189 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190 XEXP (&all.shift_sub0, 1) = &all.reg;
192 PUT_CODE (&all.shift_sub1, MINUS);
193 XEXP (&all.shift_sub1, 0) = &all.reg;
194 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
196 for (speed = 0; speed < 2; speed++)
198 crtl->maybe_hot_insn_p = speed;
199 zero_cost[speed] = set_src_cost (const0_rtx, speed);
201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
203 mode = GET_MODE_WIDER_MODE (mode))
205 PUT_MODE (&all.reg, mode);
206 PUT_MODE (&all.plus, mode);
207 PUT_MODE (&all.neg, mode);
208 PUT_MODE (&all.mult, mode);
209 PUT_MODE (&all.sdiv, mode);
210 PUT_MODE (&all.udiv, mode);
211 PUT_MODE (&all.sdiv_32, mode);
212 PUT_MODE (&all.smod_32, mode);
213 PUT_MODE (&all.wide_trunc, mode);
214 PUT_MODE (&all.shift, mode);
215 PUT_MODE (&all.shift_mult, mode);
216 PUT_MODE (&all.shift_add, mode);
217 PUT_MODE (&all.shift_sub0, mode);
218 PUT_MODE (&all.shift_sub1, mode);
220 add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221 neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222 mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223 sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224 udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
226 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227 <= 2 * add_cost[speed][mode]);
228 smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229 <= 4 * add_cost[speed][mode]);
231 wider_mode = GET_MODE_WIDER_MODE (mode);
232 if (wider_mode != VOIDmode)
234 PUT_MODE (&all.zext, wider_mode);
235 PUT_MODE (&all.wide_mult, wider_mode);
236 PUT_MODE (&all.wide_lshr, wider_mode);
237 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
239 mul_widen_cost[speed][wider_mode]
240 = set_src_cost (&all.wide_mult, speed);
241 mul_highpart_cost[speed][mode]
242 = set_src_cost (&all.wide_trunc, speed);
245 shift_cost[speed][mode][0] = 0;
246 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
249 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250 for (m = 1; m < n; m++)
252 XEXP (&all.shift, 1) = cint[m];
253 XEXP (&all.shift_mult, 1) = pow2[m];
255 shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256 shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
258 shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
260 shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
266 memset (alg_hash, 0, sizeof (alg_hash));
268 alg_hash_used_p = true;
269 default_rtl_profile ();
272 /* Return an rtx representing minus the value of X.
273 MODE is the intended mode of the result,
274 useful if X is a CONST_INT. */
277 negate_rtx (enum machine_mode mode, rtx x)
279 rtx result = simplify_unary_operation (NEG, mode, x, mode);
282 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
287 /* Report on the availability of insv/extv/extzv and the desired mode
288 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
289 is false; else the mode of the specified operand. If OPNO is -1,
290 all the caller cares about is whether the insn is available. */
292 mode_for_extraction (enum extraction_pattern pattern, int opno)
294 const struct insn_data_d *data;
301 data = &insn_data[CODE_FOR_insv];
304 return MAX_MACHINE_MODE;
309 data = &insn_data[CODE_FOR_extv];
312 return MAX_MACHINE_MODE;
317 data = &insn_data[CODE_FOR_extzv];
320 return MAX_MACHINE_MODE;
329 /* Everyone who uses this function used to follow it with
330 if (result == VOIDmode) result = word_mode; */
331 if (data->operand[opno].mode == VOIDmode)
333 return data->operand[opno].mode;
336 /* A subroutine of store_bit_field, with the same arguments. Return true
337 if the operation could be implemented.
339 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340 no other way of implementing the operation. If FALLBACK_P is false,
341 return false instead. */
344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345 unsigned HOST_WIDE_INT bitnum,
346 unsigned HOST_WIDE_INT bitregion_start,
347 unsigned HOST_WIDE_INT bitregion_end,
348 enum machine_mode fieldmode,
349 rtx value, bool fallback_p)
352 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353 unsigned HOST_WIDE_INT offset, bitpos;
358 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
360 while (GET_CODE (op0) == SUBREG)
362 /* The following line once was done only if WORDS_BIG_ENDIAN,
363 but I think that is a mistake. WORDS_BIG_ENDIAN is
364 meaningful at a much higher level; when structures are copied
365 between memory and regs, the higher-numbered regs
366 always get higher addresses. */
367 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
372 /* Paradoxical subregs need special handling on big endian machines. */
373 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
375 int difference = inner_mode_size - outer_mode_size;
377 if (WORDS_BIG_ENDIAN)
378 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379 if (BYTES_BIG_ENDIAN)
380 byte_offset += difference % UNITS_PER_WORD;
383 byte_offset = SUBREG_BYTE (op0);
385 bitnum += byte_offset * BITS_PER_UNIT;
386 op0 = SUBREG_REG (op0);
389 /* No action is needed if the target is a register and if the field
390 lies completely outside that register. This can occur if the source
391 code contains an out-of-bounds access to a small array. */
392 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
395 /* Use vec_set patterns for inserting parts of vectors whenever
397 if (VECTOR_MODE_P (GET_MODE (op0))
399 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
404 struct expand_operand ops[3];
405 enum machine_mode outermode = GET_MODE (op0);
406 enum machine_mode innermode = GET_MODE_INNER (outermode);
407 enum insn_code icode = optab_handler (vec_set_optab, outermode);
408 int pos = bitnum / GET_MODE_BITSIZE (innermode);
410 create_fixed_operand (&ops[0], op0);
411 create_input_operand (&ops[1], value, innermode);
412 create_integer_operand (&ops[2], pos);
413 if (maybe_expand_insn (icode, 3, ops))
417 /* If the target is a register, overwriting the entire object, or storing
418 a full-word or multi-word field can be done with just a SUBREG.
420 If the target is memory, storing any naturally aligned field can be
421 done with a simple store. For targets that support fast unaligned
422 memory, any naturally sized, unit aligned field can be done directly. */
424 offset = bitnum / unit;
425 bitpos = bitnum % unit;
426 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427 + (offset * UNITS_PER_WORD);
430 && bitsize == GET_MODE_BITSIZE (fieldmode)
432 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435 || validate_subreg (fieldmode, GET_MODE (op0), op0,
437 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438 || (offset * BITS_PER_UNIT % bitsize == 0
439 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
442 op0 = adjust_address (op0, fieldmode, offset);
443 else if (GET_MODE (op0) != fieldmode)
444 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
446 emit_move_insn (op0, value);
450 /* Make sure we are playing with integral modes. Pun with subregs
451 if we aren't. This must come after the entire register case above,
452 since that case is valid for any mode. The following cases are only
453 valid for integral modes. */
455 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456 if (imode != GET_MODE (op0))
459 op0 = adjust_address (op0, imode, 0);
462 gcc_assert (imode != BLKmode);
463 op0 = gen_lowpart (imode, op0);
468 /* We may be accessing data outside the field, which means
469 we can alias adjacent data. */
470 /* ?? not always for C++0x memory model ?? */
473 op0 = shallow_copy_rtx (op0);
474 set_mem_alias_set (op0, 0);
475 set_mem_expr (op0, 0);
478 /* If OP0 is a register, BITPOS must count within a word.
479 But as we have it, it counts within whatever size OP0 now has.
480 On a bigendian machine, these are not the same, so convert. */
483 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
486 /* Storing an lsb-aligned field in a register
487 can be done with a movestrict instruction. */
490 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
494 struct expand_operand ops[2];
495 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
497 unsigned HOST_WIDE_INT subreg_off;
499 if (GET_CODE (arg0) == SUBREG)
501 /* Else we've got some float mode source being extracted into
502 a different float mode destination -- this combination of
503 subregs results in Severe Tire Damage. */
504 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505 || GET_MODE_CLASS (fieldmode) == MODE_INT
506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 arg0 = SUBREG_REG (arg0);
510 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511 + (offset * UNITS_PER_WORD);
512 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
514 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
516 create_fixed_operand (&ops[0], arg0);
517 /* Shrink the source operand to FIELDMODE. */
518 create_convert_operand_to (&ops[1], value, fieldmode, false);
519 if (maybe_expand_insn (icode, 2, ops))
524 /* Handle fields bigger than a word. */
526 if (bitsize > BITS_PER_WORD)
528 /* Here we transfer the words of the field
529 in the order least significant first.
530 This is because the most significant word is the one which may
532 However, only do that if the value is not BLKmode. */
534 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
539 /* This is the mode we must force value to, so that there will be enough
540 subwords to extract. Note that fieldmode will often (always?) be
541 VOIDmode, because that is what store_field uses to indicate that this
542 is a bit field, but passing VOIDmode to operand_subword_force
544 fieldmode = GET_MODE (value);
545 if (fieldmode == VOIDmode)
546 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
548 last = get_last_insn ();
549 for (i = 0; i < nwords; i++)
551 /* If I is 0, use the low-order word in both field and target;
552 if I is 1, use the next to lowest word; and so on. */
553 unsigned int wordnum = (backwards
554 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
557 unsigned int bit_offset = (backwards
558 ? MAX ((int) bitsize - ((int) i + 1)
561 : (int) i * BITS_PER_WORD);
562 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
563 unsigned HOST_WIDE_INT new_bitsize =
564 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
566 /* If the remaining chunk doesn't have full wordsize we have
567 to make sure that for big endian machines the higher order
569 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
570 value_word = simplify_expand_binop (word_mode, lshr_optab,
572 GEN_INT (BITS_PER_WORD
577 if (!store_bit_field_1 (op0, new_bitsize,
579 bitregion_start, bitregion_end,
581 value_word, fallback_p))
583 delete_insns_since (last);
590 /* From here on we can assume that the field to be stored in is
591 a full-word (whatever type that is), since it is shorter than a word. */
593 /* OFFSET is the number of words or bytes (UNIT says which)
594 from STR_RTX to the first word or byte containing part of the field. */
599 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
603 /* Since this is a destination (lvalue), we can't copy
604 it to a pseudo. We can remove a SUBREG that does not
605 change the size of the operand. Such a SUBREG may
606 have been added above. */
607 gcc_assert (GET_CODE (op0) == SUBREG
608 && (GET_MODE_SIZE (GET_MODE (op0))
609 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610 op0 = SUBREG_REG (op0);
612 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613 op0, (offset * UNITS_PER_WORD));
618 /* If VALUE has a floating-point or complex mode, access it as an
619 integer of the corresponding size. This can occur on a machine
620 with 64 bit registers that uses SFmode for float. It can also
621 occur for unaligned float or complex fields. */
623 if (GET_MODE (value) != VOIDmode
624 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
627 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
631 /* Now OFFSET is nonzero only if OP0 is memory
632 and is therefore always measured in bytes. */
635 && GET_MODE (value) != BLKmode
637 && GET_MODE_BITSIZE (op_mode) >= bitsize
638 /* Do not use insv for volatile bitfields when
639 -fstrict-volatile-bitfields is in effect. */
640 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
641 && flag_strict_volatile_bitfields > 0)
642 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
643 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
644 /* Do not use insv if the bit region is restricted and
645 op_mode integer at offset doesn't fit into the
646 restricted region. */
647 && !(MEM_P (op0) && bitregion_end
648 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
649 > bitregion_end + 1))
651 struct expand_operand ops[4];
652 int xbitpos = bitpos;
655 rtx last = get_last_insn ();
656 bool copy_back = false;
658 /* Add OFFSET into OP0's address. */
660 xop0 = adjust_address (xop0, byte_mode, offset);
662 /* If xop0 is a register, we need it in OP_MODE
663 to make it acceptable to the format of insv. */
664 if (GET_CODE (xop0) == SUBREG)
665 /* We can't just change the mode, because this might clobber op0,
666 and we will need the original value of op0 if insv fails. */
667 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
668 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
669 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
671 /* If the destination is a paradoxical subreg such that we need a
672 truncate to the inner mode, perform the insertion on a temporary and
673 truncate the result to the original destination. Note that we can't
674 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
675 X) 0)) is (reg:N X). */
676 if (GET_CODE (xop0) == SUBREG
677 && REG_P (SUBREG_REG (xop0))
678 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
681 rtx tem = gen_reg_rtx (op_mode);
682 emit_move_insn (tem, xop0);
687 /* We have been counting XBITPOS within UNIT.
688 Count instead within the size of the register. */
689 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
690 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
692 unit = GET_MODE_BITSIZE (op_mode);
694 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
695 "backwards" from the size of the unit we are inserting into.
696 Otherwise, we count bits from the most significant on a
697 BYTES/BITS_BIG_ENDIAN machine. */
699 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
700 xbitpos = unit - bitsize - xbitpos;
702 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
704 if (GET_MODE (value) != op_mode)
706 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
708 /* Optimization: Don't bother really extending VALUE
709 if it has all the bits we will actually use. However,
710 if we must narrow it, be sure we do it correctly. */
712 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
716 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
718 tmp = simplify_gen_subreg (op_mode,
719 force_reg (GET_MODE (value),
721 GET_MODE (value), 0);
725 value1 = gen_lowpart (op_mode, value1);
727 else if (CONST_INT_P (value))
728 value1 = gen_int_mode (INTVAL (value), op_mode);
730 /* Parse phase is supposed to make VALUE's data type
731 match that of the component reference, which is a type
732 at least as wide as the field; so VALUE should have
733 a mode that corresponds to that type. */
734 gcc_assert (CONSTANT_P (value));
737 create_fixed_operand (&ops[0], xop0);
738 create_integer_operand (&ops[1], bitsize);
739 create_integer_operand (&ops[2], xbitpos);
740 create_input_operand (&ops[3], value1, op_mode);
741 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
744 convert_move (op0, xop0, true);
747 delete_insns_since (last);
750 /* If OP0 is a memory, try copying it to a register and seeing if a
751 cheap register alternative is available. */
752 if (HAVE_insv && MEM_P (op0))
754 enum machine_mode bestmode;
755 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
758 maxbits = bitregion_end - bitregion_start + 1;
760 /* Get the mode to use for inserting into this field. If OP0 is
761 BLKmode, get the smallest mode consistent with the alignment. If
762 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
763 mode. Otherwise, use the smallest mode containing the field. */
765 if (GET_MODE (op0) == BLKmode
766 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
767 || (op_mode != MAX_MACHINE_MODE
768 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
769 bestmode = get_best_mode (bitsize, bitnum,
770 bitregion_start, bitregion_end,
772 (op_mode == MAX_MACHINE_MODE
773 ? VOIDmode : op_mode),
774 MEM_VOLATILE_P (op0));
776 bestmode = GET_MODE (op0);
778 if (bestmode != VOIDmode
779 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
780 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
781 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
783 rtx last, tempreg, xop0;
784 unsigned HOST_WIDE_INT xoffset, xbitpos;
786 last = get_last_insn ();
788 /* Adjust address to point to the containing unit of
789 that mode. Compute the offset as a multiple of this unit,
790 counting in bytes. */
791 unit = GET_MODE_BITSIZE (bestmode);
792 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
793 xbitpos = bitnum % unit;
794 xop0 = adjust_address (op0, bestmode, xoffset);
796 /* Fetch that unit, store the bitfield in it, then store
798 tempreg = copy_to_reg (xop0);
799 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
800 bitregion_start, bitregion_end,
801 fieldmode, orig_value, false))
803 emit_move_insn (xop0, tempreg);
806 delete_insns_since (last);
813 store_fixed_bit_field (op0, offset, bitsize, bitpos,
814 bitregion_start, bitregion_end, value);
818 /* Generate code to store value from rtx VALUE
819 into a bit-field within structure STR_RTX
820 containing BITSIZE bits starting at bit BITNUM.
822 BITREGION_START is bitpos of the first bitfield in this region.
823 BITREGION_END is the bitpos of the ending bitfield in this region.
824 These two fields are 0, if the C++ memory model does not apply,
825 or we are not interested in keeping track of bitfield regions.
827 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
830 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
831 unsigned HOST_WIDE_INT bitnum,
832 unsigned HOST_WIDE_INT bitregion_start,
833 unsigned HOST_WIDE_INT bitregion_end,
834 enum machine_mode fieldmode,
837 /* Under the C++0x memory model, we must not touch bits outside the
838 bit region. Adjust the address to start at the beginning of the
840 if (MEM_P (str_rtx) && bitregion_start > 0)
842 enum machine_mode bestmode;
843 enum machine_mode op_mode;
844 unsigned HOST_WIDE_INT offset;
846 op_mode = mode_for_extraction (EP_insv, 3);
847 if (op_mode == MAX_MACHINE_MODE)
850 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
852 offset = bitregion_start / BITS_PER_UNIT;
853 bitnum -= bitregion_start;
854 bitregion_end -= bitregion_start;
856 bestmode = get_best_mode (bitsize, bitnum,
857 bitregion_start, bitregion_end,
860 MEM_VOLATILE_P (str_rtx));
861 str_rtx = adjust_address (str_rtx, bestmode, offset);
864 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
865 bitregion_start, bitregion_end,
866 fieldmode, value, true))
870 /* Use shifts and boolean operations to store VALUE
871 into a bit field of width BITSIZE
872 in a memory location specified by OP0 except offset by OFFSET bytes.
873 (OFFSET must be 0 if OP0 is a register.)
874 The field starts at position BITPOS within the byte.
875 (If OP0 is a register, it may be a full word or a narrower mode,
876 but BITPOS still counts within a full word,
877 which is significant on bigendian machines.) */
880 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
881 unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitpos,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
887 enum machine_mode mode;
888 unsigned int total_bits = BITS_PER_WORD;
893 /* There is a case not handled here:
894 a structure with a known alignment of just a halfword
895 and a field split across two aligned halfwords within the structure.
896 Or likewise a structure with a known alignment of just a byte
897 and a field split across two bytes.
898 Such cases are not supposed to be able to occur. */
900 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
902 gcc_assert (!offset);
903 /* Special treatment for a bit field split across two registers. */
904 if (bitsize + bitpos > BITS_PER_WORD)
906 store_split_bit_field (op0, bitsize, bitpos,
907 bitregion_start, bitregion_end,
914 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
917 maxbits = bitregion_end - bitregion_start + 1;
919 /* Get the proper mode to use for this field. We want a mode that
920 includes the entire field. If such a mode would be larger than
921 a word, we won't be doing the extraction the normal way.
922 We don't want a mode bigger than the destination. */
924 mode = GET_MODE (op0);
925 if (GET_MODE_BITSIZE (mode) == 0
926 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
929 if (MEM_VOLATILE_P (op0)
930 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
931 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
932 && flag_strict_volatile_bitfields > 0)
933 mode = GET_MODE (op0);
935 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
936 bitregion_start, bitregion_end,
937 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
939 if (mode == VOIDmode)
941 /* The only way this should occur is if the field spans word
943 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
944 bitregion_start, bitregion_end, value);
948 total_bits = GET_MODE_BITSIZE (mode);
950 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
951 be in the range 0 to total_bits-1, and put any excess bytes in
953 if (bitpos >= total_bits)
955 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
956 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
960 /* Get ref to an aligned byte, halfword, or word containing the field.
961 Adjust BITPOS to be position within a word,
962 and OFFSET to be the offset of that word.
963 Then alter OP0 to refer to that word. */
964 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
965 offset -= (offset % (total_bits / BITS_PER_UNIT));
966 op0 = adjust_address (op0, mode, offset);
969 mode = GET_MODE (op0);
971 /* Now MODE is either some integral mode for a MEM as OP0,
972 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
973 The bit field is contained entirely within OP0.
974 BITPOS is the starting bit number within OP0.
975 (OP0's mode may actually be narrower than MODE.) */
977 if (BYTES_BIG_ENDIAN)
978 /* BITPOS is the distance between our msb
979 and that of the containing datum.
980 Convert it to the distance from the lsb. */
981 bitpos = total_bits - bitsize - bitpos;
983 /* Now BITPOS is always the distance between our lsb
986 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
987 we must first convert its mode to MODE. */
989 if (CONST_INT_P (value))
991 HOST_WIDE_INT v = INTVAL (value);
993 if (bitsize < HOST_BITS_PER_WIDE_INT)
994 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
998 else if ((bitsize < HOST_BITS_PER_WIDE_INT
999 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1000 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1003 value = lshift_value (mode, value, bitpos, bitsize);
1007 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1008 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1010 if (GET_MODE (value) != mode)
1011 value = convert_to_mode (mode, value, 1);
1014 value = expand_binop (mode, and_optab, value,
1015 mask_rtx (mode, 0, bitsize, 0),
1016 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1018 value = expand_shift (LSHIFT_EXPR, mode, value,
1019 bitpos, NULL_RTX, 1);
1022 /* Now clear the chosen bits in OP0,
1023 except that if VALUE is -1 we need not bother. */
1024 /* We keep the intermediates in registers to allow CSE to combine
1025 consecutive bitfield assignments. */
1027 temp = force_reg (mode, op0);
1031 temp = expand_binop (mode, and_optab, temp,
1032 mask_rtx (mode, bitpos, bitsize, 1),
1033 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1034 temp = force_reg (mode, temp);
1037 /* Now logical-or VALUE into OP0, unless it is zero. */
1041 temp = expand_binop (mode, ior_optab, temp, value,
1042 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1043 temp = force_reg (mode, temp);
1048 op0 = copy_rtx (op0);
1049 emit_move_insn (op0, temp);
1053 /* Store a bit field that is split across multiple accessible memory objects.
1055 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1056 BITSIZE is the field width; BITPOS the position of its first bit
1058 VALUE is the value to store.
1060 This does not yet handle fields wider than BITS_PER_WORD. */
1063 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1064 unsigned HOST_WIDE_INT bitpos,
1065 unsigned HOST_WIDE_INT bitregion_start,
1066 unsigned HOST_WIDE_INT bitregion_end,
1070 unsigned int bitsdone = 0;
1072 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1074 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1075 unit = BITS_PER_WORD;
1077 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1079 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1080 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1081 that VALUE might be a floating-point constant. */
1082 if (CONSTANT_P (value) && !CONST_INT_P (value))
1084 rtx word = gen_lowpart_common (word_mode, value);
1086 if (word && (value != word))
1089 value = gen_lowpart_common (word_mode,
1090 force_reg (GET_MODE (value) != VOIDmode
1092 : word_mode, value));
1095 while (bitsdone < bitsize)
1097 unsigned HOST_WIDE_INT thissize;
1099 unsigned HOST_WIDE_INT thispos;
1100 unsigned HOST_WIDE_INT offset;
1102 offset = (bitpos + bitsdone) / unit;
1103 thispos = (bitpos + bitsdone) % unit;
1105 /* When region of bytes we can touch is restricted, decrease
1106 UNIT close to the end of the region as needed. */
1108 && unit > BITS_PER_UNIT
1109 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1115 /* THISSIZE must not overrun a word boundary. Otherwise,
1116 store_fixed_bit_field will call us again, and we will mutually
1118 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1119 thissize = MIN (thissize, unit - thispos);
1121 if (BYTES_BIG_ENDIAN)
1125 /* We must do an endian conversion exactly the same way as it is
1126 done in extract_bit_field, so that the two calls to
1127 extract_fixed_bit_field will have comparable arguments. */
1128 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1129 total_bits = BITS_PER_WORD;
1131 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1133 /* Fetch successively less significant portions. */
1134 if (CONST_INT_P (value))
1135 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1136 >> (bitsize - bitsdone - thissize))
1137 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1139 /* The args are chosen so that the last part includes the
1140 lsb. Give extract_bit_field the value it needs (with
1141 endianness compensation) to fetch the piece we want. */
1142 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1143 total_bits - bitsize + bitsdone,
1144 NULL_RTX, 1, false);
1148 /* Fetch successively more significant portions. */
1149 if (CONST_INT_P (value))
1150 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1152 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1154 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1155 bitsdone, NULL_RTX, 1, false);
1158 /* If OP0 is a register, then handle OFFSET here.
1160 When handling multiword bitfields, extract_bit_field may pass
1161 down a word_mode SUBREG of a larger REG for a bitfield that actually
1162 crosses a word boundary. Thus, for a SUBREG, we must find
1163 the current word starting from the base register. */
1164 if (GET_CODE (op0) == SUBREG)
1166 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1167 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1168 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1169 word = word_offset ? const0_rtx : op0;
1171 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1172 GET_MODE (SUBREG_REG (op0)));
1175 else if (REG_P (op0))
1177 enum machine_mode op0_mode = GET_MODE (op0);
1178 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1179 word = offset ? const0_rtx : op0;
1181 word = operand_subword_force (op0, offset, GET_MODE (op0));
1187 /* OFFSET is in UNITs, and UNIT is in bits.
1188 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1189 it is just an out-of-bounds access. Ignore it. */
1190 if (word != const0_rtx)
1191 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1192 thispos, bitregion_start, bitregion_end, part);
1193 bitsdone += thissize;
1197 /* A subroutine of extract_bit_field_1 that converts return value X
1198 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1199 to extract_bit_field. */
1202 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1203 enum machine_mode tmode, bool unsignedp)
1205 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1208 /* If the x mode is not a scalar integral, first convert to the
1209 integer mode of that size and then access it as a floating-point
1210 value via a SUBREG. */
1211 if (!SCALAR_INT_MODE_P (tmode))
1213 enum machine_mode smode;
1215 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1216 x = convert_to_mode (smode, x, unsignedp);
1217 x = force_reg (smode, x);
1218 return gen_lowpart (tmode, x);
1221 return convert_to_mode (tmode, x, unsignedp);
1224 /* A subroutine of extract_bit_field, with the same arguments.
1225 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1226 if we can find no other means of implementing the operation.
1227 if FALLBACK_P is false, return NULL instead. */
1230 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1231 unsigned HOST_WIDE_INT bitnum,
1232 int unsignedp, bool packedp, rtx target,
1233 enum machine_mode mode, enum machine_mode tmode,
1237 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1238 unsigned HOST_WIDE_INT offset, bitpos;
1240 enum machine_mode int_mode;
1241 enum machine_mode ext_mode;
1242 enum machine_mode mode1;
1245 if (tmode == VOIDmode)
1248 while (GET_CODE (op0) == SUBREG)
1250 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1251 op0 = SUBREG_REG (op0);
1254 /* If we have an out-of-bounds access to a register, just return an
1255 uninitialized register of the required mode. This can occur if the
1256 source code contains an out-of-bounds access to a small array. */
1257 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1258 return gen_reg_rtx (tmode);
1261 && mode == GET_MODE (op0)
1263 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1265 /* We're trying to extract a full register from itself. */
1269 /* See if we can get a better vector mode before extracting. */
1270 if (VECTOR_MODE_P (GET_MODE (op0))
1272 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1274 enum machine_mode new_mode;
1276 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1277 new_mode = MIN_MODE_VECTOR_FLOAT;
1278 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1279 new_mode = MIN_MODE_VECTOR_FRACT;
1280 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1281 new_mode = MIN_MODE_VECTOR_UFRACT;
1282 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1283 new_mode = MIN_MODE_VECTOR_ACCUM;
1284 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1285 new_mode = MIN_MODE_VECTOR_UACCUM;
1287 new_mode = MIN_MODE_VECTOR_INT;
1289 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1290 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1291 && targetm.vector_mode_supported_p (new_mode))
1293 if (new_mode != VOIDmode)
1294 op0 = gen_lowpart (new_mode, op0);
1297 /* Use vec_extract patterns for extracting parts of vectors whenever
1299 if (VECTOR_MODE_P (GET_MODE (op0))
1301 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1302 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1303 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1305 struct expand_operand ops[3];
1306 enum machine_mode outermode = GET_MODE (op0);
1307 enum machine_mode innermode = GET_MODE_INNER (outermode);
1308 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1309 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1311 create_output_operand (&ops[0], target, innermode);
1312 create_input_operand (&ops[1], op0, outermode);
1313 create_integer_operand (&ops[2], pos);
1314 if (maybe_expand_insn (icode, 3, ops))
1316 target = ops[0].value;
1317 if (GET_MODE (target) != mode)
1318 return gen_lowpart (tmode, target);
1323 /* Make sure we are playing with integral modes. Pun with subregs
1326 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1327 if (imode != GET_MODE (op0))
1330 op0 = adjust_address (op0, imode, 0);
1331 else if (imode != BLKmode)
1333 op0 = gen_lowpart (imode, op0);
1335 /* If we got a SUBREG, force it into a register since we
1336 aren't going to be able to do another SUBREG on it. */
1337 if (GET_CODE (op0) == SUBREG)
1338 op0 = force_reg (imode, op0);
1340 else if (REG_P (op0))
1343 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1345 reg = gen_reg_rtx (imode);
1346 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1347 emit_move_insn (subreg, op0);
1349 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1353 rtx mem = assign_stack_temp (GET_MODE (op0),
1354 GET_MODE_SIZE (GET_MODE (op0)), 0);
1355 emit_move_insn (mem, op0);
1356 op0 = adjust_address (mem, BLKmode, 0);
1361 /* We may be accessing data outside the field, which means
1362 we can alias adjacent data. */
1365 op0 = shallow_copy_rtx (op0);
1366 set_mem_alias_set (op0, 0);
1367 set_mem_expr (op0, 0);
1370 /* Extraction of a full-word or multi-word value from a structure
1371 in a register or aligned memory can be done with just a SUBREG.
1372 A subword value in the least significant part of a register
1373 can also be extracted with a SUBREG. For this, we need the
1374 byte offset of the value in op0. */
1376 bitpos = bitnum % unit;
1377 offset = bitnum / unit;
1378 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1380 /* If OP0 is a register, BITPOS must count within a word.
1381 But as we have it, it counts within whatever size OP0 now has.
1382 On a bigendian machine, these are not the same, so convert. */
1383 if (BYTES_BIG_ENDIAN
1385 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1386 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1388 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1389 If that's wrong, the solution is to test for it and set TARGET to 0
1392 /* Only scalar integer modes can be converted via subregs. There is an
1393 additional problem for FP modes here in that they can have a precision
1394 which is different from the size. mode_for_size uses precision, but
1395 we want a mode based on the size, so we must avoid calling it for FP
1397 mode1 = (SCALAR_INT_MODE_P (tmode)
1398 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1401 /* If the bitfield is volatile, we need to make sure the access
1402 remains on a type-aligned boundary. */
1403 if (GET_CODE (op0) == MEM
1404 && MEM_VOLATILE_P (op0)
1405 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1406 && flag_strict_volatile_bitfields > 0)
1407 goto no_subreg_mode_swap;
1409 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1410 && bitpos % BITS_PER_WORD == 0)
1411 || (mode1 != BLKmode
1412 /* ??? The big endian test here is wrong. This is correct
1413 if the value is in a register, and if mode_for_size is not
1414 the same mode as op0. This causes us to get unnecessarily
1415 inefficient code from the Thumb port when -mbig-endian. */
1416 && (BYTES_BIG_ENDIAN
1417 ? bitpos + bitsize == BITS_PER_WORD
1420 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1421 && GET_MODE_SIZE (mode1) != 0
1422 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1424 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1425 || (offset * BITS_PER_UNIT % bitsize == 0
1426 && MEM_ALIGN (op0) % bitsize == 0)))))
1429 op0 = adjust_address (op0, mode1, offset);
1430 else if (mode1 != GET_MODE (op0))
1432 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1435 goto no_subreg_mode_swap;
1439 return convert_to_mode (tmode, op0, unsignedp);
1442 no_subreg_mode_swap:
1444 /* Handle fields bigger than a word. */
1446 if (bitsize > BITS_PER_WORD)
1448 /* Here we transfer the words of the field
1449 in the order least significant first.
1450 This is because the most significant word is the one which may
1451 be less than full. */
1453 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1456 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1457 target = gen_reg_rtx (mode);
1459 /* Indicate for flow that the entire target reg is being set. */
1460 emit_clobber (target);
1462 for (i = 0; i < nwords; i++)
1464 /* If I is 0, use the low-order word in both field and target;
1465 if I is 1, use the next to lowest word; and so on. */
1466 /* Word number in TARGET to use. */
1467 unsigned int wordnum
1469 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1471 /* Offset from start of field in OP0. */
1472 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1473 ? MAX (0, ((int) bitsize - ((int) i + 1)
1474 * (int) BITS_PER_WORD))
1475 : (int) i * BITS_PER_WORD);
1476 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1478 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1479 bitsize - i * BITS_PER_WORD),
1480 bitnum + bit_offset, 1, false, target_part, mode,
1483 gcc_assert (target_part);
1485 if (result_part != target_part)
1486 emit_move_insn (target_part, result_part);
1491 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1492 need to be zero'd out. */
1493 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1495 unsigned int i, total_words;
1497 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1498 for (i = nwords; i < total_words; i++)
1500 (operand_subword (target,
1501 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1508 /* Signed bit field: sign-extend with two arithmetic shifts. */
1509 target = expand_shift (LSHIFT_EXPR, mode, target,
1510 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1511 return expand_shift (RSHIFT_EXPR, mode, target,
1512 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1515 /* From here on we know the desired field is smaller than a word. */
1517 /* Check if there is a correspondingly-sized integer field, so we can
1518 safely extract it as one size of integer, if necessary; then
1519 truncate or extend to the size that is wanted; then use SUBREGs or
1520 convert_to_mode to get one of the modes we really wanted. */
1522 int_mode = int_mode_for_mode (tmode);
1523 if (int_mode == BLKmode)
1524 int_mode = int_mode_for_mode (mode);
1525 /* Should probably push op0 out to memory and then do a load. */
1526 gcc_assert (int_mode != BLKmode);
1528 /* OFFSET is the number of words or bytes (UNIT says which)
1529 from STR_RTX to the first word or byte containing part of the field. */
1533 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1536 op0 = copy_to_reg (op0);
1537 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1538 op0, (offset * UNITS_PER_WORD));
1543 /* Now OFFSET is nonzero only for memory operands. */
1544 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1545 if (ext_mode != MAX_MACHINE_MODE
1547 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1548 /* Do not use extv/extzv for volatile bitfields when
1549 -fstrict-volatile-bitfields is in effect. */
1550 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1551 && flag_strict_volatile_bitfields > 0)
1552 /* If op0 is a register, we need it in EXT_MODE to make it
1553 acceptable to the format of ext(z)v. */
1554 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1555 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1556 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1558 struct expand_operand ops[4];
1559 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1561 rtx xtarget = target;
1562 rtx xspec_target = target;
1563 rtx xspec_target_subreg = 0;
1565 /* If op0 is a register, we need it in EXT_MODE to make it
1566 acceptable to the format of ext(z)v. */
1567 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1568 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1570 /* Get ref to first byte containing part of the field. */
1571 xop0 = adjust_address (xop0, byte_mode, xoffset);
1573 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1574 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1575 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1577 unit = GET_MODE_BITSIZE (ext_mode);
1579 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1580 "backwards" from the size of the unit we are extracting from.
1581 Otherwise, we count bits from the most significant on a
1582 BYTES/BITS_BIG_ENDIAN machine. */
1584 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1585 xbitpos = unit - bitsize - xbitpos;
1588 xtarget = xspec_target = gen_reg_rtx (tmode);
1590 if (GET_MODE (xtarget) != ext_mode)
1592 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1593 between the mode of the extraction (word_mode) and the target
1594 mode. Instead, create a temporary and use convert_move to set
1597 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1599 xtarget = gen_lowpart (ext_mode, xtarget);
1600 if (GET_MODE_PRECISION (ext_mode)
1601 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1602 xspec_target_subreg = xtarget;
1605 xtarget = gen_reg_rtx (ext_mode);
1608 create_output_operand (&ops[0], xtarget, ext_mode);
1609 create_fixed_operand (&ops[1], xop0);
1610 create_integer_operand (&ops[2], bitsize);
1611 create_integer_operand (&ops[3], xbitpos);
1612 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1615 xtarget = ops[0].value;
1616 if (xtarget == xspec_target)
1618 if (xtarget == xspec_target_subreg)
1619 return xspec_target;
1620 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1624 /* If OP0 is a memory, try copying it to a register and seeing if a
1625 cheap register alternative is available. */
1626 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1628 enum machine_mode bestmode;
1630 /* Get the mode to use for inserting into this field. If
1631 OP0 is BLKmode, get the smallest mode consistent with the
1632 alignment. If OP0 is a non-BLKmode object that is no
1633 wider than EXT_MODE, use its mode. Otherwise, use the
1634 smallest mode containing the field. */
1636 if (GET_MODE (op0) == BLKmode
1637 || (ext_mode != MAX_MACHINE_MODE
1638 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1639 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1640 (ext_mode == MAX_MACHINE_MODE
1641 ? VOIDmode : ext_mode),
1642 MEM_VOLATILE_P (op0));
1644 bestmode = GET_MODE (op0);
1646 if (bestmode != VOIDmode
1647 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1648 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1650 unsigned HOST_WIDE_INT xoffset, xbitpos;
1652 /* Compute the offset as a multiple of this unit,
1653 counting in bytes. */
1654 unit = GET_MODE_BITSIZE (bestmode);
1655 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1656 xbitpos = bitnum % unit;
1658 /* Make sure the register is big enough for the whole field. */
1659 if (xoffset * BITS_PER_UNIT + unit
1660 >= offset * BITS_PER_UNIT + bitsize)
1662 rtx last, result, xop0;
1664 last = get_last_insn ();
1666 /* Fetch it to a register in that size. */
1667 xop0 = adjust_address (op0, bestmode, xoffset);
1668 xop0 = force_reg (bestmode, xop0);
1669 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1670 unsignedp, packedp, target,
1671 mode, tmode, false);
1675 delete_insns_since (last);
1683 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1684 bitpos, target, unsignedp, packedp);
1685 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1688 /* Generate code to extract a byte-field from STR_RTX
1689 containing BITSIZE bits, starting at BITNUM,
1690 and put it in TARGET if possible (if TARGET is nonzero).
1691 Regardless of TARGET, we return the rtx for where the value is placed.
1693 STR_RTX is the structure containing the byte (a REG or MEM).
1694 UNSIGNEDP is nonzero if this is an unsigned bit field.
1695 PACKEDP is nonzero if the field has the packed attribute.
1696 MODE is the natural mode of the field value once extracted.
1697 TMODE is the mode the caller would like the value to have;
1698 but the value may be returned with type MODE instead.
1700 If a TARGET is specified and we can store in it at no extra cost,
1701 we do so, and return TARGET.
1702 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1703 if they are equally easy. */
1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1707 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1708 rtx target, enum machine_mode mode, enum machine_mode tmode)
1710 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1711 target, mode, tmode, true);
1714 /* Extract a bit field using shifts and boolean operations
1715 Returns an rtx to represent the value.
1716 OP0 addresses a register (word) or memory (byte).
1717 BITPOS says which bit within the word or byte the bit field starts in.
1718 OFFSET says how many bytes farther the bit field starts;
1719 it is 0 if OP0 is a register.
1720 BITSIZE says how many bits long the bit field is.
1721 (If OP0 is a register, it may be narrower than a full word,
1722 but BITPOS still counts within a full word,
1723 which is significant on bigendian machines.)
1725 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1726 PACKEDP is true if the field has the packed attribute.
1728 If TARGET is nonzero, attempts to store the value there
1729 and return TARGET, but this is not guaranteed.
1730 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1733 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1734 unsigned HOST_WIDE_INT offset,
1735 unsigned HOST_WIDE_INT bitsize,
1736 unsigned HOST_WIDE_INT bitpos, rtx target,
1737 int unsignedp, bool packedp)
1739 unsigned int total_bits = BITS_PER_WORD;
1740 enum machine_mode mode;
1742 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1744 /* Special treatment for a bit field split across two registers. */
1745 if (bitsize + bitpos > BITS_PER_WORD)
1746 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1750 /* Get the proper mode to use for this field. We want a mode that
1751 includes the entire field. If such a mode would be larger than
1752 a word, we won't be doing the extraction the normal way. */
1754 if (MEM_VOLATILE_P (op0)
1755 && flag_strict_volatile_bitfields > 0)
1757 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1758 mode = GET_MODE (op0);
1759 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1760 mode = GET_MODE (target);
1765 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1766 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1768 if (mode == VOIDmode)
1769 /* The only way this should occur is if the field spans word
1771 return extract_split_bit_field (op0, bitsize,
1772 bitpos + offset * BITS_PER_UNIT,
1775 total_bits = GET_MODE_BITSIZE (mode);
1777 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1778 be in the range 0 to total_bits-1, and put any excess bytes in
1780 if (bitpos >= total_bits)
1782 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1783 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1787 /* If we're accessing a volatile MEM, we can't do the next
1788 alignment step if it results in a multi-word access where we
1789 otherwise wouldn't have one. So, check for that case
1792 && MEM_VOLATILE_P (op0)
1793 && flag_strict_volatile_bitfields > 0
1794 && bitpos + bitsize <= total_bits
1795 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1797 if (STRICT_ALIGNMENT)
1799 static bool informed_about_misalignment = false;
1804 if (bitsize == total_bits)
1805 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1806 "multiple accesses to volatile structure member"
1807 " because of packed attribute");
1809 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1810 "multiple accesses to volatile structure bitfield"
1811 " because of packed attribute");
1813 return extract_split_bit_field (op0, bitsize,
1814 bitpos + offset * BITS_PER_UNIT,
1818 if (bitsize == total_bits)
1819 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1820 "mis-aligned access used for structure member");
1822 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1823 "mis-aligned access used for structure bitfield");
1825 if (! informed_about_misalignment && warned)
1827 informed_about_misalignment = true;
1828 inform (input_location,
1829 "when a volatile object spans multiple type-sized locations,"
1830 " the compiler must choose between using a single mis-aligned access to"
1831 " preserve the volatility, or using multiple aligned accesses to avoid"
1832 " runtime faults; this code may fail at runtime if the hardware does"
1833 " not allow this access");
1840 /* Get ref to an aligned byte, halfword, or word containing the field.
1841 Adjust BITPOS to be position within a word,
1842 and OFFSET to be the offset of that word.
1843 Then alter OP0 to refer to that word. */
1844 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1845 offset -= (offset % (total_bits / BITS_PER_UNIT));
1848 op0 = adjust_address (op0, mode, offset);
1851 mode = GET_MODE (op0);
1853 if (BYTES_BIG_ENDIAN)
1854 /* BITPOS is the distance between our msb and that of OP0.
1855 Convert it to the distance from the lsb. */
1856 bitpos = total_bits - bitsize - bitpos;
1858 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1859 We have reduced the big-endian case to the little-endian case. */
1865 /* If the field does not already start at the lsb,
1866 shift it so it does. */
1867 /* Maybe propagate the target for the shift. */
1868 /* But not if we will return it--could confuse integrate.c. */
1869 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1870 if (tmode != mode) subtarget = 0;
1871 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1873 /* Convert the value to the desired mode. */
1875 op0 = convert_to_mode (tmode, op0, 1);
1877 /* Unless the msb of the field used to be the msb when we shifted,
1878 mask out the upper bits. */
1880 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1881 return expand_binop (GET_MODE (op0), and_optab, op0,
1882 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1883 target, 1, OPTAB_LIB_WIDEN);
1887 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1888 then arithmetic-shift its lsb to the lsb of the word. */
1889 op0 = force_reg (mode, op0);
1891 /* Find the narrowest integer mode that contains the field. */
1893 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1894 mode = GET_MODE_WIDER_MODE (mode))
1895 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1897 op0 = convert_to_mode (mode, op0, 0);
1904 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1906 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1907 /* Maybe propagate the target for the shift. */
1908 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1909 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1912 return expand_shift (RSHIFT_EXPR, mode, op0,
1913 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1917 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1918 complement of that if COMPLEMENT. The mask is truncated if
1919 necessary to the width of mode MODE. The mask is zero-extended if
1920 BITSIZE+BITPOS is too small for MODE. */
1923 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1927 mask = double_int_mask (bitsize);
1928 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1931 mask = double_int_not (mask);
1933 return immed_double_int_const (mask, mode);
1936 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1937 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1940 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1944 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1945 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1947 return immed_double_int_const (val, mode);
1950 /* Extract a bit field that is split across two words
1951 and return an RTX for the result.
1953 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1954 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1955 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1958 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1959 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1962 unsigned int bitsdone = 0;
1963 rtx result = NULL_RTX;
1966 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1968 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1969 unit = BITS_PER_WORD;
1971 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1973 while (bitsdone < bitsize)
1975 unsigned HOST_WIDE_INT thissize;
1977 unsigned HOST_WIDE_INT thispos;
1978 unsigned HOST_WIDE_INT offset;
1980 offset = (bitpos + bitsdone) / unit;
1981 thispos = (bitpos + bitsdone) % unit;
1983 /* THISSIZE must not overrun a word boundary. Otherwise,
1984 extract_fixed_bit_field will call us again, and we will mutually
1986 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1987 thissize = MIN (thissize, unit - thispos);
1989 /* If OP0 is a register, then handle OFFSET here.
1991 When handling multiword bitfields, extract_bit_field may pass
1992 down a word_mode SUBREG of a larger REG for a bitfield that actually
1993 crosses a word boundary. Thus, for a SUBREG, we must find
1994 the current word starting from the base register. */
1995 if (GET_CODE (op0) == SUBREG)
1997 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1998 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1999 GET_MODE (SUBREG_REG (op0)));
2002 else if (REG_P (op0))
2004 word = operand_subword_force (op0, offset, GET_MODE (op0));
2010 /* Extract the parts in bit-counting order,
2011 whose meaning is determined by BYTES_PER_UNIT.
2012 OFFSET is in UNITs, and UNIT is in bits.
2013 extract_fixed_bit_field wants offset in bytes. */
2014 part = extract_fixed_bit_field (word_mode, word,
2015 offset * unit / BITS_PER_UNIT,
2016 thissize, thispos, 0, 1, false);
2017 bitsdone += thissize;
2019 /* Shift this part into place for the result. */
2020 if (BYTES_BIG_ENDIAN)
2022 if (bitsize != bitsdone)
2023 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2024 bitsize - bitsdone, 0, 1);
2028 if (bitsdone != thissize)
2029 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2030 bitsdone - thissize, 0, 1);
2036 /* Combine the parts with bitwise or. This works
2037 because we extracted each part as an unsigned bit field. */
2038 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2044 /* Unsigned bit field: we are done. */
2047 /* Signed bit field: sign-extend with two arithmetic shifts. */
2048 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2049 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2050 return expand_shift (RSHIFT_EXPR, word_mode, result,
2051 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2054 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2055 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2056 MODE, fill the upper bits with zeros. Fail if the layout of either
2057 mode is unknown (as for CC modes) or if the extraction would involve
2058 unprofitable mode punning. Return the value on success, otherwise
2061 This is different from gen_lowpart* in these respects:
2063 - the returned value must always be considered an rvalue
2065 - when MODE is wider than SRC_MODE, the extraction involves
2068 - when MODE is smaller than SRC_MODE, the extraction involves
2069 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2071 In other words, this routine performs a computation, whereas the
2072 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2076 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2078 enum machine_mode int_mode, src_int_mode;
2080 if (mode == src_mode)
2083 if (CONSTANT_P (src))
2085 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2086 fails, it will happily create (subreg (symbol_ref)) or similar
2088 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2089 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2093 if (GET_MODE (src) == VOIDmode
2094 || !validate_subreg (mode, src_mode, src, byte))
2097 src = force_reg (GET_MODE (src), src);
2098 return gen_rtx_SUBREG (mode, src, byte);
2101 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2104 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2105 && MODES_TIEABLE_P (mode, src_mode))
2107 rtx x = gen_lowpart_common (mode, src);
2112 src_int_mode = int_mode_for_mode (src_mode);
2113 int_mode = int_mode_for_mode (mode);
2114 if (src_int_mode == BLKmode || int_mode == BLKmode)
2117 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2119 if (!MODES_TIEABLE_P (int_mode, mode))
2122 src = gen_lowpart (src_int_mode, src);
2123 src = convert_modes (int_mode, src_int_mode, src, true);
2124 src = gen_lowpart (mode, src);
2128 /* Add INC into TARGET. */
2131 expand_inc (rtx target, rtx inc)
2133 rtx value = expand_binop (GET_MODE (target), add_optab,
2135 target, 0, OPTAB_LIB_WIDEN);
2136 if (value != target)
2137 emit_move_insn (target, value);
2140 /* Subtract DEC from TARGET. */
2143 expand_dec (rtx target, rtx dec)
2145 rtx value = expand_binop (GET_MODE (target), sub_optab,
2147 target, 0, OPTAB_LIB_WIDEN);
2148 if (value != target)
2149 emit_move_insn (target, value);
2152 /* Output a shift instruction for expression code CODE,
2153 with SHIFTED being the rtx for the value to shift,
2154 and AMOUNT the rtx for the amount to shift by.
2155 Store the result in the rtx TARGET, if that is convenient.
2156 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2157 Return the rtx for where the value is. */
2160 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2161 rtx amount, rtx target, int unsignedp)
2164 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2165 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2166 optab lshift_optab = ashl_optab;
2167 optab rshift_arith_optab = ashr_optab;
2168 optab rshift_uns_optab = lshr_optab;
2169 optab lrotate_optab = rotl_optab;
2170 optab rrotate_optab = rotr_optab;
2171 enum machine_mode op1_mode;
2173 bool speed = optimize_insn_for_speed_p ();
2176 op1_mode = GET_MODE (op1);
2178 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2179 shift amount is a vector, use the vector/vector shift patterns. */
2180 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2182 lshift_optab = vashl_optab;
2183 rshift_arith_optab = vashr_optab;
2184 rshift_uns_optab = vlshr_optab;
2185 lrotate_optab = vrotl_optab;
2186 rrotate_optab = vrotr_optab;
2189 /* Previously detected shift-counts computed by NEGATE_EXPR
2190 and shifted in the other direction; but that does not work
2193 if (SHIFT_COUNT_TRUNCATED)
2195 if (CONST_INT_P (op1)
2196 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2197 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2198 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2199 % GET_MODE_BITSIZE (mode));
2200 else if (GET_CODE (op1) == SUBREG
2201 && subreg_lowpart_p (op1)
2202 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2203 op1 = SUBREG_REG (op1);
2206 if (op1 == const0_rtx)
2209 /* Check whether its cheaper to implement a left shift by a constant
2210 bit count by a sequence of additions. */
2211 if (code == LSHIFT_EXPR
2212 && CONST_INT_P (op1)
2214 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2215 && INTVAL (op1) < MAX_BITS_PER_WORD
2216 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2217 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2220 for (i = 0; i < INTVAL (op1); i++)
2222 temp = force_reg (mode, shifted);
2223 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2224 unsignedp, OPTAB_LIB_WIDEN);
2229 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2231 enum optab_methods methods;
2234 methods = OPTAB_DIRECT;
2235 else if (attempt == 1)
2236 methods = OPTAB_WIDEN;
2238 methods = OPTAB_LIB_WIDEN;
2242 /* Widening does not work for rotation. */
2243 if (methods == OPTAB_WIDEN)
2245 else if (methods == OPTAB_LIB_WIDEN)
2247 /* If we have been unable to open-code this by a rotation,
2248 do it as the IOR of two shifts. I.e., to rotate A
2249 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2250 where C is the bitsize of A.
2252 It is theoretically possible that the target machine might
2253 not be able to perform either shift and hence we would
2254 be making two libcalls rather than just the one for the
2255 shift (similarly if IOR could not be done). We will allow
2256 this extremely unlikely lossage to avoid complicating the
2259 rtx subtarget = target == shifted ? 0 : target;
2260 rtx new_amount, other_amount;
2264 if (CONST_INT_P (op1))
2265 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2269 = simplify_gen_binary (MINUS, GET_MODE (op1),
2270 GEN_INT (GET_MODE_PRECISION (mode)),
2273 shifted = force_reg (mode, shifted);
2275 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2276 mode, shifted, new_amount, 0, 1);
2277 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2278 mode, shifted, other_amount,
2280 return expand_binop (mode, ior_optab, temp, temp1, target,
2281 unsignedp, methods);
2284 temp = expand_binop (mode,
2285 left ? lrotate_optab : rrotate_optab,
2286 shifted, op1, target, unsignedp, methods);
2289 temp = expand_binop (mode,
2290 left ? lshift_optab : rshift_uns_optab,
2291 shifted, op1, target, unsignedp, methods);
2293 /* Do arithmetic shifts.
2294 Also, if we are going to widen the operand, we can just as well
2295 use an arithmetic right-shift instead of a logical one. */
2296 if (temp == 0 && ! rotate
2297 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2299 enum optab_methods methods1 = methods;
2301 /* If trying to widen a log shift to an arithmetic shift,
2302 don't accept an arithmetic shift of the same size. */
2304 methods1 = OPTAB_MUST_WIDEN;
2306 /* Arithmetic shift */
2308 temp = expand_binop (mode,
2309 left ? lshift_optab : rshift_arith_optab,
2310 shifted, op1, target, unsignedp, methods1);
2313 /* We used to try extzv here for logical right shifts, but that was
2314 only useful for one machine, the VAX, and caused poor code
2315 generation there for lshrdi3, so the code was deleted and a
2316 define_expand for lshrsi3 was added to vax.md. */
2323 /* Output a shift instruction for expression code CODE,
2324 with SHIFTED being the rtx for the value to shift,
2325 and AMOUNT the amount to shift by.
2326 Store the result in the rtx TARGET, if that is convenient.
2327 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2328 Return the rtx for where the value is. */
2331 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2332 int amount, rtx target, int unsignedp)
2334 return expand_shift_1 (code, mode,
2335 shifted, GEN_INT (amount), target, unsignedp);
2338 /* Output a shift instruction for expression code CODE,
2339 with SHIFTED being the rtx for the value to shift,
2340 and AMOUNT the tree for the amount to shift by.
2341 Store the result in the rtx TARGET, if that is convenient.
2342 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2343 Return the rtx for where the value is. */
2346 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2347 tree amount, rtx target, int unsignedp)
2349 return expand_shift_1 (code, mode,
2350 shifted, expand_normal (amount), target, unsignedp);
2354 /* Indicates the type of fixup needed after a constant multiplication.
2355 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2356 the result should be negated, and ADD_VARIANT means that the
2357 multiplicand should be added to the result. */
2358 enum mult_variant {basic_variant, negate_variant, add_variant};
2360 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2361 const struct mult_cost *, enum machine_mode mode);
2362 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2363 struct algorithm *, enum mult_variant *, int);
2364 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2365 const struct algorithm *, enum mult_variant);
2366 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2367 int, rtx *, int *, int *);
2368 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2369 static rtx extract_high_half (enum machine_mode, rtx);
2370 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2371 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2373 /* Compute and return the best algorithm for multiplying by T.
2374 The algorithm must cost less than cost_limit
2375 If retval.cost >= COST_LIMIT, no algorithm was found and all
2376 other field of the returned struct are undefined.
2377 MODE is the machine mode of the multiplication. */
2380 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2381 const struct mult_cost *cost_limit, enum machine_mode mode)
2384 struct algorithm *alg_in, *best_alg;
2385 struct mult_cost best_cost;
2386 struct mult_cost new_limit;
2387 int op_cost, op_latency;
2388 unsigned HOST_WIDE_INT orig_t = t;
2389 unsigned HOST_WIDE_INT q;
2390 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2392 bool cache_hit = false;
2393 enum alg_code cache_alg = alg_zero;
2394 bool speed = optimize_insn_for_speed_p ();
2396 /* Indicate that no algorithm is yet found. If no algorithm
2397 is found, this value will be returned and indicate failure. */
2398 alg_out->cost.cost = cost_limit->cost + 1;
2399 alg_out->cost.latency = cost_limit->latency + 1;
2401 if (cost_limit->cost < 0
2402 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2405 /* Restrict the bits of "t" to the multiplication's mode. */
2406 t &= GET_MODE_MASK (mode);
2408 /* t == 1 can be done in zero cost. */
2412 alg_out->cost.cost = 0;
2413 alg_out->cost.latency = 0;
2414 alg_out->op[0] = alg_m;
2418 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2422 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2427 alg_out->cost.cost = zero_cost[speed];
2428 alg_out->cost.latency = zero_cost[speed];
2429 alg_out->op[0] = alg_zero;
2434 /* We'll be needing a couple extra algorithm structures now. */
2436 alg_in = XALLOCA (struct algorithm);
2437 best_alg = XALLOCA (struct algorithm);
2438 best_cost = *cost_limit;
2440 /* Compute the hash index. */
2441 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2443 /* See if we already know what to do for T. */
2444 if (alg_hash[hash_index].t == t
2445 && alg_hash[hash_index].mode == mode
2446 && alg_hash[hash_index].mode == mode
2447 && alg_hash[hash_index].speed == speed
2448 && alg_hash[hash_index].alg != alg_unknown)
2450 cache_alg = alg_hash[hash_index].alg;
2452 if (cache_alg == alg_impossible)
2454 /* The cache tells us that it's impossible to synthesize
2455 multiplication by T within alg_hash[hash_index].cost. */
2456 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2457 /* COST_LIMIT is at least as restrictive as the one
2458 recorded in the hash table, in which case we have no
2459 hope of synthesizing a multiplication. Just
2463 /* If we get here, COST_LIMIT is less restrictive than the
2464 one recorded in the hash table, so we may be able to
2465 synthesize a multiplication. Proceed as if we didn't
2466 have the cache entry. */
2470 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2471 /* The cached algorithm shows that this multiplication
2472 requires more cost than COST_LIMIT. Just return. This
2473 way, we don't clobber this cache entry with
2474 alg_impossible but retain useful information. */
2486 goto do_alg_addsub_t_m2;
2488 case alg_add_factor:
2489 case alg_sub_factor:
2490 goto do_alg_addsub_factor;
2493 goto do_alg_add_t2_m;
2496 goto do_alg_sub_t2_m;
2504 /* If we have a group of zero bits at the low-order part of T, try
2505 multiplying by the remaining bits and then doing a shift. */
2510 m = floor_log2 (t & -t); /* m = number of low zero bits */
2514 /* The function expand_shift will choose between a shift and
2515 a sequence of additions, so the observed cost is given as
2516 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2517 op_cost = m * add_cost[speed][mode];
2518 if (shift_cost[speed][mode][m] < op_cost)
2519 op_cost = shift_cost[speed][mode][m];
2520 new_limit.cost = best_cost.cost - op_cost;
2521 new_limit.latency = best_cost.latency - op_cost;
2522 synth_mult (alg_in, q, &new_limit, mode);
2524 alg_in->cost.cost += op_cost;
2525 alg_in->cost.latency += op_cost;
2526 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2528 struct algorithm *x;
2529 best_cost = alg_in->cost;
2530 x = alg_in, alg_in = best_alg, best_alg = x;
2531 best_alg->log[best_alg->ops] = m;
2532 best_alg->op[best_alg->ops] = alg_shift;
2535 /* See if treating ORIG_T as a signed number yields a better
2536 sequence. Try this sequence only for a negative ORIG_T
2537 as it would be useless for a non-negative ORIG_T. */
2538 if ((HOST_WIDE_INT) orig_t < 0)
2540 /* Shift ORIG_T as follows because a right shift of a
2541 negative-valued signed type is implementation
2543 q = ~(~orig_t >> m);
2544 /* The function expand_shift will choose between a shift
2545 and a sequence of additions, so the observed cost is
2546 given as MIN (m * add_cost[speed][mode],
2547 shift_cost[speed][mode][m]). */
2548 op_cost = m * add_cost[speed][mode];
2549 if (shift_cost[speed][mode][m] < op_cost)
2550 op_cost = shift_cost[speed][mode][m];
2551 new_limit.cost = best_cost.cost - op_cost;
2552 new_limit.latency = best_cost.latency - op_cost;
2553 synth_mult (alg_in, q, &new_limit, mode);
2555 alg_in->cost.cost += op_cost;
2556 alg_in->cost.latency += op_cost;
2557 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2559 struct algorithm *x;
2560 best_cost = alg_in->cost;
2561 x = alg_in, alg_in = best_alg, best_alg = x;
2562 best_alg->log[best_alg->ops] = m;
2563 best_alg->op[best_alg->ops] = alg_shift;
2571 /* If we have an odd number, add or subtract one. */
2574 unsigned HOST_WIDE_INT w;
2577 for (w = 1; (w & t) != 0; w <<= 1)
2579 /* If T was -1, then W will be zero after the loop. This is another
2580 case where T ends with ...111. Handling this with (T + 1) and
2581 subtract 1 produces slightly better code and results in algorithm
2582 selection much faster than treating it like the ...0111 case
2586 /* Reject the case where t is 3.
2587 Thus we prefer addition in that case. */
2590 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2592 op_cost = add_cost[speed][mode];
2593 new_limit.cost = best_cost.cost - op_cost;
2594 new_limit.latency = best_cost.latency - op_cost;
2595 synth_mult (alg_in, t + 1, &new_limit, mode);
2597 alg_in->cost.cost += op_cost;
2598 alg_in->cost.latency += op_cost;
2599 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2601 struct algorithm *x;
2602 best_cost = alg_in->cost;
2603 x = alg_in, alg_in = best_alg, best_alg = x;
2604 best_alg->log[best_alg->ops] = 0;
2605 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2610 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2612 op_cost = add_cost[speed][mode];
2613 new_limit.cost = best_cost.cost - op_cost;
2614 new_limit.latency = best_cost.latency - op_cost;
2615 synth_mult (alg_in, t - 1, &new_limit, mode);
2617 alg_in->cost.cost += op_cost;
2618 alg_in->cost.latency += op_cost;
2619 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2621 struct algorithm *x;
2622 best_cost = alg_in->cost;
2623 x = alg_in, alg_in = best_alg, best_alg = x;
2624 best_alg->log[best_alg->ops] = 0;
2625 best_alg->op[best_alg->ops] = alg_add_t_m2;
2629 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2630 quickly with a - a * n for some appropriate constant n. */
2631 m = exact_log2 (-orig_t + 1);
2632 if (m >= 0 && m < maxm)
2634 op_cost = shiftsub1_cost[speed][mode][m];
2635 new_limit.cost = best_cost.cost - op_cost;
2636 new_limit.latency = best_cost.latency - op_cost;
2637 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2639 alg_in->cost.cost += op_cost;
2640 alg_in->cost.latency += op_cost;
2641 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2643 struct algorithm *x;
2644 best_cost = alg_in->cost;
2645 x = alg_in, alg_in = best_alg, best_alg = x;
2646 best_alg->log[best_alg->ops] = m;
2647 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2655 /* Look for factors of t of the form
2656 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2657 If we find such a factor, we can multiply by t using an algorithm that
2658 multiplies by q, shift the result by m and add/subtract it to itself.
2660 We search for large factors first and loop down, even if large factors
2661 are less probable than small; if we find a large factor we will find a
2662 good sequence quickly, and therefore be able to prune (by decreasing
2663 COST_LIMIT) the search. */
2665 do_alg_addsub_factor:
2666 for (m = floor_log2 (t - 1); m >= 2; m--)
2668 unsigned HOST_WIDE_INT d;
2670 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2671 if (t % d == 0 && t > d && m < maxm
2672 && (!cache_hit || cache_alg == alg_add_factor))
2674 /* If the target has a cheap shift-and-add instruction use
2675 that in preference to a shift insn followed by an add insn.
2676 Assume that the shift-and-add is "atomic" with a latency
2677 equal to its cost, otherwise assume that on superscalar
2678 hardware the shift may be executed concurrently with the
2679 earlier steps in the algorithm. */
2680 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2681 if (shiftadd_cost[speed][mode][m] < op_cost)
2683 op_cost = shiftadd_cost[speed][mode][m];
2684 op_latency = op_cost;
2687 op_latency = add_cost[speed][mode];
2689 new_limit.cost = best_cost.cost - op_cost;
2690 new_limit.latency = best_cost.latency - op_latency;
2691 synth_mult (alg_in, t / d, &new_limit, mode);
2693 alg_in->cost.cost += op_cost;
2694 alg_in->cost.latency += op_latency;
2695 if (alg_in->cost.latency < op_cost)
2696 alg_in->cost.latency = op_cost;
2697 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2699 struct algorithm *x;
2700 best_cost = alg_in->cost;
2701 x = alg_in, alg_in = best_alg, best_alg = x;
2702 best_alg->log[best_alg->ops] = m;
2703 best_alg->op[best_alg->ops] = alg_add_factor;
2705 /* Other factors will have been taken care of in the recursion. */
2709 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2710 if (t % d == 0 && t > d && m < maxm
2711 && (!cache_hit || cache_alg == alg_sub_factor))
2713 /* If the target has a cheap shift-and-subtract insn use
2714 that in preference to a shift insn followed by a sub insn.
2715 Assume that the shift-and-sub is "atomic" with a latency
2716 equal to it's cost, otherwise assume that on superscalar
2717 hardware the shift may be executed concurrently with the
2718 earlier steps in the algorithm. */
2719 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2720 if (shiftsub0_cost[speed][mode][m] < op_cost)
2722 op_cost = shiftsub0_cost[speed][mode][m];
2723 op_latency = op_cost;
2726 op_latency = add_cost[speed][mode];
2728 new_limit.cost = best_cost.cost - op_cost;
2729 new_limit.latency = best_cost.latency - op_latency;
2730 synth_mult (alg_in, t / d, &new_limit, mode);
2732 alg_in->cost.cost += op_cost;
2733 alg_in->cost.latency += op_latency;
2734 if (alg_in->cost.latency < op_cost)
2735 alg_in->cost.latency = op_cost;
2736 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2738 struct algorithm *x;
2739 best_cost = alg_in->cost;
2740 x = alg_in, alg_in = best_alg, best_alg = x;
2741 best_alg->log[best_alg->ops] = m;
2742 best_alg->op[best_alg->ops] = alg_sub_factor;
2750 /* Try shift-and-add (load effective address) instructions,
2751 i.e. do a*3, a*5, a*9. */
2758 if (m >= 0 && m < maxm)
2760 op_cost = shiftadd_cost[speed][mode][m];
2761 new_limit.cost = best_cost.cost - op_cost;
2762 new_limit.latency = best_cost.latency - op_cost;
2763 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2765 alg_in->cost.cost += op_cost;
2766 alg_in->cost.latency += op_cost;
2767 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769 struct algorithm *x;
2770 best_cost = alg_in->cost;
2771 x = alg_in, alg_in = best_alg, best_alg = x;
2772 best_alg->log[best_alg->ops] = m;
2773 best_alg->op[best_alg->ops] = alg_add_t2_m;
2783 if (m >= 0 && m < maxm)
2785 op_cost = shiftsub0_cost[speed][mode][m];
2786 new_limit.cost = best_cost.cost - op_cost;
2787 new_limit.latency = best_cost.latency - op_cost;
2788 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2790 alg_in->cost.cost += op_cost;
2791 alg_in->cost.latency += op_cost;
2792 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2794 struct algorithm *x;
2795 best_cost = alg_in->cost;
2796 x = alg_in, alg_in = best_alg, best_alg = x;
2797 best_alg->log[best_alg->ops] = m;
2798 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2806 /* If best_cost has not decreased, we have not found any algorithm. */
2807 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2809 /* We failed to find an algorithm. Record alg_impossible for
2810 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2811 we are asked to find an algorithm for T within the same or
2812 lower COST_LIMIT, we can immediately return to the
2814 alg_hash[hash_index].t = t;
2815 alg_hash[hash_index].mode = mode;
2816 alg_hash[hash_index].speed = speed;
2817 alg_hash[hash_index].alg = alg_impossible;
2818 alg_hash[hash_index].cost = *cost_limit;
2822 /* Cache the result. */
2825 alg_hash[hash_index].t = t;
2826 alg_hash[hash_index].mode = mode;
2827 alg_hash[hash_index].speed = speed;
2828 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2829 alg_hash[hash_index].cost.cost = best_cost.cost;
2830 alg_hash[hash_index].cost.latency = best_cost.latency;
2833 /* If we are getting a too long sequence for `struct algorithm'
2834 to record, make this search fail. */
2835 if (best_alg->ops == MAX_BITS_PER_WORD)
2838 /* Copy the algorithm from temporary space to the space at alg_out.
2839 We avoid using structure assignment because the majority of
2840 best_alg is normally undefined, and this is a critical function. */
2841 alg_out->ops = best_alg->ops + 1;
2842 alg_out->cost = best_cost;
2843 memcpy (alg_out->op, best_alg->op,
2844 alg_out->ops * sizeof *alg_out->op);
2845 memcpy (alg_out->log, best_alg->log,
2846 alg_out->ops * sizeof *alg_out->log);
2849 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2850 Try three variations:
2852 - a shift/add sequence based on VAL itself
2853 - a shift/add sequence based on -VAL, followed by a negation
2854 - a shift/add sequence based on VAL - 1, followed by an addition.
2856 Return true if the cheapest of these cost less than MULT_COST,
2857 describing the algorithm in *ALG and final fixup in *VARIANT. */
2860 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2861 struct algorithm *alg, enum mult_variant *variant,
2864 struct algorithm alg2;
2865 struct mult_cost limit;
2867 bool speed = optimize_insn_for_speed_p ();
2869 /* Fail quickly for impossible bounds. */
2873 /* Ensure that mult_cost provides a reasonable upper bound.
2874 Any constant multiplication can be performed with less
2875 than 2 * bits additions. */
2876 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2877 if (mult_cost > op_cost)
2878 mult_cost = op_cost;
2880 *variant = basic_variant;
2881 limit.cost = mult_cost;
2882 limit.latency = mult_cost;
2883 synth_mult (alg, val, &limit, mode);
2885 /* This works only if the inverted value actually fits in an
2887 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2889 op_cost = neg_cost[speed][mode];
2890 if (MULT_COST_LESS (&alg->cost, mult_cost))
2892 limit.cost = alg->cost.cost - op_cost;
2893 limit.latency = alg->cost.latency - op_cost;
2897 limit.cost = mult_cost - op_cost;
2898 limit.latency = mult_cost - op_cost;
2901 synth_mult (&alg2, -val, &limit, mode);
2902 alg2.cost.cost += op_cost;
2903 alg2.cost.latency += op_cost;
2904 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2905 *alg = alg2, *variant = negate_variant;
2908 /* This proves very useful for division-by-constant. */
2909 op_cost = add_cost[speed][mode];
2910 if (MULT_COST_LESS (&alg->cost, mult_cost))
2912 limit.cost = alg->cost.cost - op_cost;
2913 limit.latency = alg->cost.latency - op_cost;
2917 limit.cost = mult_cost - op_cost;
2918 limit.latency = mult_cost - op_cost;
2921 synth_mult (&alg2, val - 1, &limit, mode);
2922 alg2.cost.cost += op_cost;
2923 alg2.cost.latency += op_cost;
2924 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2925 *alg = alg2, *variant = add_variant;
2927 return MULT_COST_LESS (&alg->cost, mult_cost);
2930 /* A subroutine of expand_mult, used for constant multiplications.
2931 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2932 convenient. Use the shift/add sequence described by ALG and apply
2933 the final fixup specified by VARIANT. */
2936 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2937 rtx target, const struct algorithm *alg,
2938 enum mult_variant variant)
2940 HOST_WIDE_INT val_so_far;
2941 rtx insn, accum, tem;
2943 enum machine_mode nmode;
2945 /* Avoid referencing memory over and over and invalid sharing
2947 op0 = force_reg (mode, op0);
2949 /* ACCUM starts out either as OP0 or as a zero, depending on
2950 the first operation. */
2952 if (alg->op[0] == alg_zero)
2954 accum = copy_to_mode_reg (mode, const0_rtx);
2957 else if (alg->op[0] == alg_m)
2959 accum = copy_to_mode_reg (mode, op0);
2965 for (opno = 1; opno < alg->ops; opno++)
2967 int log = alg->log[opno];
2968 rtx shift_subtarget = optimize ? 0 : accum;
2970 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2973 rtx accum_target = optimize ? 0 : accum;
2976 switch (alg->op[opno])
2979 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2980 /* REG_EQUAL note will be attached to the following insn. */
2981 emit_move_insn (accum, tem);
2986 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2987 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2988 add_target ? add_target : accum_target);
2989 val_so_far += (HOST_WIDE_INT) 1 << log;
2993 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2994 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2995 add_target ? add_target : accum_target);
2996 val_so_far -= (HOST_WIDE_INT) 1 << log;
3000 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3001 log, shift_subtarget, 0);
3002 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3003 add_target ? add_target : accum_target);
3004 val_so_far = (val_so_far << log) + 1;
3008 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3009 log, shift_subtarget, 0);
3010 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3011 add_target ? add_target : accum_target);
3012 val_so_far = (val_so_far << log) - 1;
3015 case alg_add_factor:
3016 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3017 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3018 add_target ? add_target : accum_target);
3019 val_so_far += val_so_far << log;
3022 case alg_sub_factor:
3023 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3024 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3026 ? add_target : (optimize ? 0 : tem)));
3027 val_so_far = (val_so_far << log) - val_so_far;
3034 /* Write a REG_EQUAL note on the last insn so that we can cse
3035 multiplication sequences. Note that if ACCUM is a SUBREG,
3036 we've set the inner register and must properly indicate
3039 tem = op0, nmode = mode;
3040 accum_inner = accum;
3041 if (GET_CODE (accum) == SUBREG)
3043 accum_inner = SUBREG_REG (accum);
3044 nmode = GET_MODE (accum_inner);
3045 tem = gen_lowpart (nmode, op0);
3048 insn = get_last_insn ();
3049 set_dst_reg_note (insn, REG_EQUAL,
3050 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3054 if (variant == negate_variant)
3056 val_so_far = -val_so_far;
3057 accum = expand_unop (mode, neg_optab, accum, target, 0);
3059 else if (variant == add_variant)
3061 val_so_far = val_so_far + 1;
3062 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3065 /* Compare only the bits of val and val_so_far that are significant
3066 in the result mode, to avoid sign-/zero-extension confusion. */
3067 val &= GET_MODE_MASK (mode);
3068 val_so_far &= GET_MODE_MASK (mode);
3069 gcc_assert (val == val_so_far);
3074 /* Perform a multiplication and return an rtx for the result.
3075 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3076 TARGET is a suggestion for where to store the result (an rtx).
3078 We check specially for a constant integer as OP1.
3079 If you want this check for OP0 as well, then before calling
3080 you should swap the two operands if OP0 would be constant. */
3083 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3086 enum mult_variant variant;
3087 struct algorithm algorithm;
3089 bool speed = optimize_insn_for_speed_p ();
3091 /* Handling const0_rtx here allows us to use zero as a rogue value for
3093 if (op1 == const0_rtx)
3095 if (op1 == const1_rtx)
3097 if (op1 == constm1_rtx)
3098 return expand_unop (mode,
3099 GET_MODE_CLASS (mode) == MODE_INT
3100 && !unsignedp && flag_trapv
3101 ? negv_optab : neg_optab,
3104 /* These are the operations that are potentially turned into a sequence
3105 of shifts and additions. */
3106 if (SCALAR_INT_MODE_P (mode)
3107 && (unsignedp || !flag_trapv))
3109 HOST_WIDE_INT coeff = 0;
3110 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3112 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3113 less than or equal in size to `unsigned int' this doesn't matter.
3114 If the mode is larger than `unsigned int', then synth_mult works
3115 only if the constant value exactly fits in an `unsigned int' without
3116 any truncation. This means that multiplying by negative values does
3117 not work; results are off by 2^32 on a 32 bit machine. */
3119 if (CONST_INT_P (op1))
3121 /* Attempt to handle multiplication of DImode values by negative
3122 coefficients, by performing the multiplication by a positive
3123 multiplier and then inverting the result. */
3124 if (INTVAL (op1) < 0
3125 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3127 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3128 result is interpreted as an unsigned coefficient.
3129 Exclude cost of op0 from max_cost to match the cost
3130 calculation of the synth_mult. */
3131 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3133 - neg_cost[speed][mode]);
3135 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3136 &variant, max_cost))
3138 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3139 NULL_RTX, &algorithm,
3141 return expand_unop (mode, neg_optab, temp, target, 0);
3144 else coeff = INTVAL (op1);
3146 else if (GET_CODE (op1) == CONST_DOUBLE)
3148 /* If we are multiplying in DImode, it may still be a win
3149 to try to work with shifts and adds. */
3150 if (CONST_DOUBLE_HIGH (op1) == 0
3151 && CONST_DOUBLE_LOW (op1) > 0)
3152 coeff = CONST_DOUBLE_LOW (op1);
3153 else if (CONST_DOUBLE_LOW (op1) == 0
3154 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3156 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3157 + HOST_BITS_PER_WIDE_INT;
3158 return expand_shift (LSHIFT_EXPR, mode, op0,
3159 shift, target, unsignedp);
3163 /* We used to test optimize here, on the grounds that it's better to
3164 produce a smaller program when -O is not used. But this causes
3165 such a terrible slowdown sometimes that it seems better to always
3169 /* Special case powers of two. */
3170 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3171 return expand_shift (LSHIFT_EXPR, mode, op0,
3172 floor_log2 (coeff), target, unsignedp);
3174 /* Exclude cost of op0 from max_cost to match the cost
3175 calculation of the synth_mult. */
3176 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3177 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3179 return expand_mult_const (mode, op0, coeff, target,
3180 &algorithm, variant);
3184 if (GET_CODE (op0) == CONST_DOUBLE)
3191 /* Expand x*2.0 as x+x. */
3192 if (GET_CODE (op1) == CONST_DOUBLE
3193 && SCALAR_FLOAT_MODE_P (mode))
3196 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3198 if (REAL_VALUES_EQUAL (d, dconst2))
3200 op0 = force_reg (GET_MODE (op0), op0);
3201 return expand_binop (mode, add_optab, op0, op0,
3202 target, unsignedp, OPTAB_LIB_WIDEN);
3206 /* This used to use umul_optab if unsigned, but for non-widening multiply
3207 there is no difference between signed and unsigned. */
3208 op0 = expand_binop (mode,
3210 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3211 ? smulv_optab : smul_optab,
3212 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3217 /* Perform a widening multiplication and return an rtx for the result.
3218 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3219 TARGET is a suggestion for where to store the result (an rtx).
3220 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3221 or smul_widen_optab.
3223 We check specially for a constant integer as OP1, comparing the
3224 cost of a widening multiply against the cost of a sequence of shifts
3228 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3229 int unsignedp, optab this_optab)
3231 bool speed = optimize_insn_for_speed_p ();
3234 if (CONST_INT_P (op1)
3235 && GET_MODE (op0) != VOIDmode
3236 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3237 this_optab == umul_widen_optab))
3238 && CONST_INT_P (cop1)
3239 && (INTVAL (cop1) >= 0
3240 || HWI_COMPUTABLE_MODE_P (mode)))
3242 HOST_WIDE_INT coeff = INTVAL (cop1);
3244 enum mult_variant variant;
3245 struct algorithm algorithm;
3247 /* Special case powers of two. */
3248 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3250 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3251 return expand_shift (LSHIFT_EXPR, mode, op0,
3252 floor_log2 (coeff), target, unsignedp);
3255 /* Exclude cost of op0 from max_cost to match the cost
3256 calculation of the synth_mult. */
3257 max_cost = mul_widen_cost[speed][mode];
3258 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3261 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3262 return expand_mult_const (mode, op0, coeff, target,
3263 &algorithm, variant);
3266 return expand_binop (mode, this_optab, op0, op1, target,
3267 unsignedp, OPTAB_LIB_WIDEN);
3270 /* Return the smallest n such that 2**n >= X. */
3273 ceil_log2 (unsigned HOST_WIDE_INT x)
3275 return floor_log2 (x - 1) + 1;
3278 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3279 replace division by D, and put the least significant N bits of the result
3280 in *MULTIPLIER_PTR and return the most significant bit.
3282 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3283 needed precision is in PRECISION (should be <= N).
3285 PRECISION should be as small as possible so this function can choose
3286 multiplier more freely.
3288 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3289 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3291 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3292 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3295 unsigned HOST_WIDE_INT
3296 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3297 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3299 HOST_WIDE_INT mhigh_hi, mlow_hi;
3300 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3301 int lgup, post_shift;
3303 unsigned HOST_WIDE_INT nl, dummy1;
3304 HOST_WIDE_INT nh, dummy2;
3306 /* lgup = ceil(log2(divisor)); */
3307 lgup = ceil_log2 (d);
3309 gcc_assert (lgup <= n);
3312 pow2 = n + lgup - precision;
3314 /* We could handle this with some effort, but this case is much
3315 better handled directly with a scc insn, so rely on caller using
3317 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3319 /* mlow = 2^(N + lgup)/d */
3320 if (pow >= HOST_BITS_PER_WIDE_INT)
3322 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3328 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3330 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3331 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3333 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3334 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3335 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3337 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3338 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3339 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3341 gcc_assert (!mhigh_hi || nh - d < d);
3342 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3343 /* Assert that mlow < mhigh. */
3344 gcc_assert (mlow_hi < mhigh_hi
3345 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3347 /* If precision == N, then mlow, mhigh exceed 2^N
3348 (but they do not exceed 2^(N+1)). */
3350 /* Reduce to lowest terms. */
3351 for (post_shift = lgup; post_shift > 0; post_shift--)
3353 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3354 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3364 *post_shift_ptr = post_shift;
3366 if (n < HOST_BITS_PER_WIDE_INT)
3368 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3369 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3370 return mhigh_lo >= mask;
3374 *multiplier_ptr = GEN_INT (mhigh_lo);
3379 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3380 congruent to 1 (mod 2**N). */
3382 static unsigned HOST_WIDE_INT
3383 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3385 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3387 /* The algorithm notes that the choice y = x satisfies
3388 x*y == 1 mod 2^3, since x is assumed odd.
3389 Each iteration doubles the number of bits of significance in y. */
3391 unsigned HOST_WIDE_INT mask;
3392 unsigned HOST_WIDE_INT y = x;
3395 mask = (n == HOST_BITS_PER_WIDE_INT
3396 ? ~(unsigned HOST_WIDE_INT) 0
3397 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3401 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3407 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3408 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3409 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3410 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3413 The result is put in TARGET if that is convenient.
3415 MODE is the mode of operation. */
3418 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3419 rtx op1, rtx target, int unsignedp)
3422 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3424 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3425 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3426 tem = expand_and (mode, tem, op1, NULL_RTX);
3428 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3431 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3432 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3433 tem = expand_and (mode, tem, op0, NULL_RTX);
3434 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3440 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3443 extract_high_half (enum machine_mode mode, rtx op)
3445 enum machine_mode wider_mode;
3447 if (mode == word_mode)
3448 return gen_highpart (mode, op);
3450 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3452 wider_mode = GET_MODE_WIDER_MODE (mode);
3453 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3454 GET_MODE_BITSIZE (mode), 0, 1);
3455 return convert_modes (mode, wider_mode, op, 0);
3458 /* Like expand_mult_highpart, but only consider using a multiplication
3459 optab. OP1 is an rtx for the constant operand. */
3462 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3463 rtx target, int unsignedp, int max_cost)
3465 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3466 enum machine_mode wider_mode;
3470 bool speed = optimize_insn_for_speed_p ();
3472 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3474 wider_mode = GET_MODE_WIDER_MODE (mode);
3475 size = GET_MODE_BITSIZE (mode);
3477 /* Firstly, try using a multiplication insn that only generates the needed
3478 high part of the product, and in the sign flavor of unsignedp. */
3479 if (mul_highpart_cost[speed][mode] < max_cost)
3481 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3482 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3483 unsignedp, OPTAB_DIRECT);
3488 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3489 Need to adjust the result after the multiplication. */
3490 if (size - 1 < BITS_PER_WORD
3491 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3492 + 4 * add_cost[speed][mode] < max_cost))
3494 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3495 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3496 unsignedp, OPTAB_DIRECT);
3498 /* We used the wrong signedness. Adjust the result. */
3499 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3503 /* Try widening multiplication. */
3504 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3505 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3506 && mul_widen_cost[speed][wider_mode] < max_cost)
3508 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3509 unsignedp, OPTAB_WIDEN);
3511 return extract_high_half (mode, tem);
3514 /* Try widening the mode and perform a non-widening multiplication. */
3515 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3516 && size - 1 < BITS_PER_WORD
3517 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3519 rtx insns, wop0, wop1;
3521 /* We need to widen the operands, for example to ensure the
3522 constant multiplier is correctly sign or zero extended.
3523 Use a sequence to clean-up any instructions emitted by
3524 the conversions if things don't work out. */
3526 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3527 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3528 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3529 unsignedp, OPTAB_WIDEN);
3530 insns = get_insns ();
3536 return extract_high_half (mode, tem);
3540 /* Try widening multiplication of opposite signedness, and adjust. */
3541 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3542 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3543 && size - 1 < BITS_PER_WORD
3544 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3545 + 4 * add_cost[speed][mode] < max_cost))
3547 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3548 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3551 tem = extract_high_half (mode, tem);
3552 /* We used the wrong signedness. Adjust the result. */
3553 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3561 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3562 putting the high half of the result in TARGET if that is convenient,
3563 and return where the result is. If the operation can not be performed,
3566 MODE is the mode of operation and result.
3568 UNSIGNEDP nonzero means unsigned multiply.
3570 MAX_COST is the total allowed cost for the expanded RTL. */
3573 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3574 rtx target, int unsignedp, int max_cost)
3576 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3577 unsigned HOST_WIDE_INT cnst1;
3579 bool sign_adjust = false;
3580 enum mult_variant variant;
3581 struct algorithm alg;
3583 bool speed = optimize_insn_for_speed_p ();
3585 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3586 /* We can't support modes wider than HOST_BITS_PER_INT. */
3587 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3589 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3591 /* We can't optimize modes wider than BITS_PER_WORD.
3592 ??? We might be able to perform double-word arithmetic if
3593 mode == word_mode, however all the cost calculations in
3594 synth_mult etc. assume single-word operations. */
3595 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3596 return expand_mult_highpart_optab (mode, op0, op1, target,
3597 unsignedp, max_cost);
3599 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3601 /* Check whether we try to multiply by a negative constant. */
3602 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3605 extra_cost += add_cost[speed][mode];
3608 /* See whether shift/add multiplication is cheap enough. */
3609 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3610 max_cost - extra_cost))
3612 /* See whether the specialized multiplication optabs are
3613 cheaper than the shift/add version. */
3614 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3615 alg.cost.cost + extra_cost);
3619 tem = convert_to_mode (wider_mode, op0, unsignedp);
3620 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3621 tem = extract_high_half (mode, tem);
3623 /* Adjust result for signedness. */
3625 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3629 return expand_mult_highpart_optab (mode, op0, op1, target,
3630 unsignedp, max_cost);
3634 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3637 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3639 unsigned HOST_WIDE_INT masklow, maskhigh;
3640 rtx result, temp, shift, label;
3643 logd = floor_log2 (d);
3644 result = gen_reg_rtx (mode);
3646 /* Avoid conditional branches when they're expensive. */
3647 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3648 && optimize_insn_for_speed_p ())
3650 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3654 signmask = force_reg (mode, signmask);
3655 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3656 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3658 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3659 which instruction sequence to use. If logical right shifts
3660 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3661 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3663 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3664 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3665 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3666 > COSTS_N_INSNS (2)))
3668 temp = expand_binop (mode, xor_optab, op0, signmask,
3669 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670 temp = expand_binop (mode, sub_optab, temp, signmask,
3671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3673 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3674 temp = expand_binop (mode, xor_optab, temp, signmask,
3675 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3676 temp = expand_binop (mode, sub_optab, temp, signmask,
3677 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3681 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3682 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3683 signmask = force_reg (mode, signmask);
3685 temp = expand_binop (mode, add_optab, op0, signmask,
3686 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3687 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3688 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689 temp = expand_binop (mode, sub_optab, temp, signmask,
3690 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3696 /* Mask contains the mode's signbit and the significant bits of the
3697 modulus. By including the signbit in the operation, many targets
3698 can avoid an explicit compare operation in the following comparison
3701 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3702 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3704 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3708 maskhigh = (HOST_WIDE_INT) -1
3709 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3711 temp = expand_binop (mode, and_optab, op0,
3712 immed_double_const (masklow, maskhigh, mode),
3713 result, 1, OPTAB_LIB_WIDEN);
3715 emit_move_insn (result, temp);
3717 label = gen_label_rtx ();
3718 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3720 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3721 0, OPTAB_LIB_WIDEN);
3722 masklow = (HOST_WIDE_INT) -1 << logd;
3724 temp = expand_binop (mode, ior_optab, temp,
3725 immed_double_const (masklow, maskhigh, mode),
3726 result, 1, OPTAB_LIB_WIDEN);
3727 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3728 0, OPTAB_LIB_WIDEN);
3730 emit_move_insn (result, temp);
3735 /* Expand signed division of OP0 by a power of two D in mode MODE.
3736 This routine is only called for positive values of D. */
3739 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3744 logd = floor_log2 (d);
3747 && BRANCH_COST (optimize_insn_for_speed_p (),
3750 temp = gen_reg_rtx (mode);
3751 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3752 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3753 0, OPTAB_LIB_WIDEN);
3754 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3757 #ifdef HAVE_conditional_move
3758 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3763 /* ??? emit_conditional_move forces a stack adjustment via
3764 compare_from_rtx so, if the sequence is discarded, it will
3765 be lost. Do it now instead. */
3766 do_pending_stack_adjust ();
3769 temp2 = copy_to_mode_reg (mode, op0);
3770 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3771 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3772 temp = force_reg (mode, temp);
3774 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3775 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3776 mode, temp, temp2, mode, 0);
3779 rtx seq = get_insns ();
3782 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3788 if (BRANCH_COST (optimize_insn_for_speed_p (),
3791 int ushift = GET_MODE_BITSIZE (mode) - logd;
3793 temp = gen_reg_rtx (mode);
3794 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3795 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3796 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3797 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3799 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3800 ushift, NULL_RTX, 1);
3801 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3802 0, OPTAB_LIB_WIDEN);
3803 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3806 label = gen_label_rtx ();
3807 temp = copy_to_mode_reg (mode, op0);
3808 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3809 expand_inc (temp, GEN_INT (d - 1));
3811 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3814 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3815 if that is convenient, and returning where the result is.
3816 You may request either the quotient or the remainder as the result;
3817 specify REM_FLAG nonzero to get the remainder.
3819 CODE is the expression code for which kind of division this is;
3820 it controls how rounding is done. MODE is the machine mode to use.
3821 UNSIGNEDP nonzero means do unsigned division. */
3823 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3824 and then correct it by or'ing in missing high bits
3825 if result of ANDI is nonzero.
3826 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3827 This could optimize to a bfexts instruction.
3828 But C doesn't use these operations, so their optimizations are
3830 /* ??? For modulo, we don't actually need the highpart of the first product,
3831 the low part will do nicely. And for small divisors, the second multiply
3832 can also be a low-part only multiply or even be completely left out.
3833 E.g. to calculate the remainder of a division by 3 with a 32 bit
3834 multiply, multiply with 0x55555556 and extract the upper two bits;
3835 the result is exact for inputs up to 0x1fffffff.
3836 The input range can be reduced by using cross-sum rules.
3837 For odd divisors >= 3, the following table gives right shift counts
3838 so that if a number is shifted by an integer multiple of the given
3839 amount, the remainder stays the same:
3840 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3841 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3842 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3843 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3844 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3846 Cross-sum rules for even numbers can be derived by leaving as many bits
3847 to the right alone as the divisor has zeros to the right.
3848 E.g. if x is an unsigned 32 bit number:
3849 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3853 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3854 rtx op0, rtx op1, rtx target, int unsignedp)
3856 enum machine_mode compute_mode;
3858 rtx quotient = 0, remainder = 0;
3862 optab optab1, optab2;
3863 int op1_is_constant, op1_is_pow2 = 0;
3864 int max_cost, extra_cost;
3865 static HOST_WIDE_INT last_div_const = 0;
3866 static HOST_WIDE_INT ext_op1;
3867 bool speed = optimize_insn_for_speed_p ();
3869 op1_is_constant = CONST_INT_P (op1);
3870 if (op1_is_constant)
3872 ext_op1 = INTVAL (op1);
3874 ext_op1 &= GET_MODE_MASK (mode);
3875 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3876 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3880 This is the structure of expand_divmod:
3882 First comes code to fix up the operands so we can perform the operations
3883 correctly and efficiently.
3885 Second comes a switch statement with code specific for each rounding mode.
3886 For some special operands this code emits all RTL for the desired
3887 operation, for other cases, it generates only a quotient and stores it in
3888 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3889 to indicate that it has not done anything.
3891 Last comes code that finishes the operation. If QUOTIENT is set and
3892 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3893 QUOTIENT is not set, it is computed using trunc rounding.
3895 We try to generate special code for division and remainder when OP1 is a
3896 constant. If |OP1| = 2**n we can use shifts and some other fast
3897 operations. For other values of OP1, we compute a carefully selected
3898 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3901 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3902 half of the product. Different strategies for generating the product are
3903 implemented in expand_mult_highpart.
3905 If what we actually want is the remainder, we generate that by another
3906 by-constant multiplication and a subtraction. */
3908 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3909 code below will malfunction if we are, so check here and handle
3910 the special case if so. */
3911 if (op1 == const1_rtx)
3912 return rem_flag ? const0_rtx : op0;
3914 /* When dividing by -1, we could get an overflow.
3915 negv_optab can handle overflows. */
3916 if (! unsignedp && op1 == constm1_rtx)
3920 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3921 ? negv_optab : neg_optab, op0, target, 0);
3925 /* Don't use the function value register as a target
3926 since we have to read it as well as write it,
3927 and function-inlining gets confused by this. */
3928 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3929 /* Don't clobber an operand while doing a multi-step calculation. */
3930 || ((rem_flag || op1_is_constant)
3931 && (reg_mentioned_p (target, op0)
3932 || (MEM_P (op0) && MEM_P (target))))
3933 || reg_mentioned_p (target, op1)
3934 || (MEM_P (op1) && MEM_P (target))))
3937 /* Get the mode in which to perform this computation. Normally it will
3938 be MODE, but sometimes we can't do the desired operation in MODE.
3939 If so, pick a wider mode in which we can do the operation. Convert
3940 to that mode at the start to avoid repeated conversions.
3942 First see what operations we need. These depend on the expression
3943 we are evaluating. (We assume that divxx3 insns exist under the
3944 same conditions that modxx3 insns and that these insns don't normally
3945 fail. If these assumptions are not correct, we may generate less
3946 efficient code in some cases.)
3948 Then see if we find a mode in which we can open-code that operation
3949 (either a division, modulus, or shift). Finally, check for the smallest
3950 mode for which we can do the operation with a library call. */
3952 /* We might want to refine this now that we have division-by-constant
3953 optimization. Since expand_mult_highpart tries so many variants, it is
3954 not straightforward to generalize this. Maybe we should make an array
3955 of possible modes in init_expmed? Save this for GCC 2.7. */
3957 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3958 ? (unsignedp ? lshr_optab : ashr_optab)
3959 : (unsignedp ? udiv_optab : sdiv_optab));
3960 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3962 : (unsignedp ? udivmod_optab : sdivmod_optab));
3964 for (compute_mode = mode; compute_mode != VOIDmode;
3965 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3966 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3967 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3970 if (compute_mode == VOIDmode)
3971 for (compute_mode = mode; compute_mode != VOIDmode;
3972 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3973 if (optab_libfunc (optab1, compute_mode)
3974 || optab_libfunc (optab2, compute_mode))
3977 /* If we still couldn't find a mode, use MODE, but expand_binop will
3979 if (compute_mode == VOIDmode)
3980 compute_mode = mode;
3982 if (target && GET_MODE (target) == compute_mode)
3985 tquotient = gen_reg_rtx (compute_mode);
3987 size = GET_MODE_BITSIZE (compute_mode);
3989 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3990 (mode), and thereby get better code when OP1 is a constant. Do that
3991 later. It will require going over all usages of SIZE below. */
3992 size = GET_MODE_BITSIZE (mode);
3995 /* Only deduct something for a REM if the last divide done was
3996 for a different constant. Then set the constant of the last
3998 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3999 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4000 && INTVAL (op1) == last_div_const))
4001 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4003 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4005 /* Now convert to the best mode to use. */
4006 if (compute_mode != mode)
4008 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4009 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4011 /* convert_modes may have placed op1 into a register, so we
4012 must recompute the following. */
4013 op1_is_constant = CONST_INT_P (op1);
4014 op1_is_pow2 = (op1_is_constant
4015 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4017 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4020 /* If one of the operands is a volatile MEM, copy it into a register. */
4022 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4023 op0 = force_reg (compute_mode, op0);
4024 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4025 op1 = force_reg (compute_mode, op1);
4027 /* If we need the remainder or if OP1 is constant, we need to
4028 put OP0 in a register in case it has any queued subexpressions. */
4029 if (rem_flag || op1_is_constant)
4030 op0 = force_reg (compute_mode, op0);
4032 last = get_last_insn ();
4034 /* Promote floor rounding to trunc rounding for unsigned operations. */
4037 if (code == FLOOR_DIV_EXPR)
4038 code = TRUNC_DIV_EXPR;
4039 if (code == FLOOR_MOD_EXPR)
4040 code = TRUNC_MOD_EXPR;
4041 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4042 code = TRUNC_DIV_EXPR;
4045 if (op1 != const0_rtx)
4048 case TRUNC_MOD_EXPR:
4049 case TRUNC_DIV_EXPR:
4050 if (op1_is_constant)
4054 unsigned HOST_WIDE_INT mh;
4055 int pre_shift, post_shift;
4058 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4059 & GET_MODE_MASK (compute_mode));
4061 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4063 pre_shift = floor_log2 (d);
4067 = expand_binop (compute_mode, and_optab, op0,
4068 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4072 return gen_lowpart (mode, remainder);
4074 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4075 pre_shift, tquotient, 1);
4077 else if (size <= HOST_BITS_PER_WIDE_INT)
4079 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4081 /* Most significant bit of divisor is set; emit an scc
4083 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4084 compute_mode, 1, 1);
4088 /* Find a suitable multiplier and right shift count
4089 instead of multiplying with D. */
4091 mh = choose_multiplier (d, size, size,
4092 &ml, &post_shift, &dummy);
4094 /* If the suggested multiplier is more than SIZE bits,
4095 we can do better for even divisors, using an
4096 initial right shift. */
4097 if (mh != 0 && (d & 1) == 0)
4099 pre_shift = floor_log2 (d & -d);
4100 mh = choose_multiplier (d >> pre_shift, size,
4102 &ml, &post_shift, &dummy);
4112 if (post_shift - 1 >= BITS_PER_WORD)
4116 = (shift_cost[speed][compute_mode][post_shift - 1]
4117 + shift_cost[speed][compute_mode][1]
4118 + 2 * add_cost[speed][compute_mode]);
4119 t1 = expand_mult_highpart (compute_mode, op0, ml,
4121 max_cost - extra_cost);
4124 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4127 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4128 t2, 1, NULL_RTX, 1);
4129 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4132 quotient = expand_shift
4133 (RSHIFT_EXPR, compute_mode, t4,
4134 post_shift - 1, tquotient, 1);
4140 if (pre_shift >= BITS_PER_WORD
4141 || post_shift >= BITS_PER_WORD)
4145 (RSHIFT_EXPR, compute_mode, op0,
4146 pre_shift, NULL_RTX, 1);
4148 = (shift_cost[speed][compute_mode][pre_shift]
4149 + shift_cost[speed][compute_mode][post_shift]);
4150 t2 = expand_mult_highpart (compute_mode, t1, ml,
4152 max_cost - extra_cost);
4155 quotient = expand_shift
4156 (RSHIFT_EXPR, compute_mode, t2,
4157 post_shift, tquotient, 1);
4161 else /* Too wide mode to use tricky code */
4164 insn = get_last_insn ();
4166 set_dst_reg_note (insn, REG_EQUAL,
4167 gen_rtx_UDIV (compute_mode, op0, op1),
4170 else /* TRUNC_DIV, signed */
4172 unsigned HOST_WIDE_INT ml;
4173 int lgup, post_shift;
4175 HOST_WIDE_INT d = INTVAL (op1);
4176 unsigned HOST_WIDE_INT abs_d;
4178 /* Since d might be INT_MIN, we have to cast to
4179 unsigned HOST_WIDE_INT before negating to avoid
4180 undefined signed overflow. */
4182 ? (unsigned HOST_WIDE_INT) d
4183 : - (unsigned HOST_WIDE_INT) d);
4185 /* n rem d = n rem -d */
4186 if (rem_flag && d < 0)
4189 op1 = gen_int_mode (abs_d, compute_mode);
4195 quotient = expand_unop (compute_mode, neg_optab, op0,
4197 else if (HOST_BITS_PER_WIDE_INT >= size
4198 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4200 /* This case is not handled correctly below. */
4201 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4202 compute_mode, 1, 1);
4206 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4207 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4208 : sdiv_pow2_cheap[speed][compute_mode])
4209 /* We assume that cheap metric is true if the
4210 optab has an expander for this mode. */
4211 && ((optab_handler ((rem_flag ? smod_optab
4214 != CODE_FOR_nothing)
4215 || (optab_handler (sdivmod_optab,
4217 != CODE_FOR_nothing)))
4219 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4223 remainder = expand_smod_pow2 (compute_mode, op0, d);
4225 return gen_lowpart (mode, remainder);
4228 if (sdiv_pow2_cheap[speed][compute_mode]
4229 && ((optab_handler (sdiv_optab, compute_mode)
4230 != CODE_FOR_nothing)
4231 || (optab_handler (sdivmod_optab, compute_mode)
4232 != CODE_FOR_nothing)))
4233 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4235 gen_int_mode (abs_d,
4239 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4241 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4242 negate the quotient. */
4245 insn = get_last_insn ();
4247 && abs_d < ((unsigned HOST_WIDE_INT) 1
4248 << (HOST_BITS_PER_WIDE_INT - 1)))
4249 set_dst_reg_note (insn, REG_EQUAL,
4250 gen_rtx_DIV (compute_mode, op0,
4256 quotient = expand_unop (compute_mode, neg_optab,
4257 quotient, quotient, 0);
4260 else if (size <= HOST_BITS_PER_WIDE_INT)
4262 choose_multiplier (abs_d, size, size - 1,
4263 &mlr, &post_shift, &lgup);
4264 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4265 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4269 if (post_shift >= BITS_PER_WORD
4270 || size - 1 >= BITS_PER_WORD)
4273 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4274 + shift_cost[speed][compute_mode][size - 1]
4275 + add_cost[speed][compute_mode]);
4276 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4278 max_cost - extra_cost);
4282 (RSHIFT_EXPR, compute_mode, t1,
4283 post_shift, NULL_RTX, 0);
4285 (RSHIFT_EXPR, compute_mode, op0,
4286 size - 1, NULL_RTX, 0);
4289 = force_operand (gen_rtx_MINUS (compute_mode,
4294 = force_operand (gen_rtx_MINUS (compute_mode,
4302 if (post_shift >= BITS_PER_WORD
4303 || size - 1 >= BITS_PER_WORD)
4306 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4307 mlr = gen_int_mode (ml, compute_mode);
4308 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4309 + shift_cost[speed][compute_mode][size - 1]
4310 + 2 * add_cost[speed][compute_mode]);
4311 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4313 max_cost - extra_cost);
4316 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4320 (RSHIFT_EXPR, compute_mode, t2,
4321 post_shift, NULL_RTX, 0);
4323 (RSHIFT_EXPR, compute_mode, op0,
4324 size - 1, NULL_RTX, 0);
4327 = force_operand (gen_rtx_MINUS (compute_mode,
4332 = force_operand (gen_rtx_MINUS (compute_mode,
4337 else /* Too wide mode to use tricky code */
4340 insn = get_last_insn ();
4342 set_dst_reg_note (insn, REG_EQUAL,
4343 gen_rtx_DIV (compute_mode, op0, op1),
4349 delete_insns_since (last);
4352 case FLOOR_DIV_EXPR:
4353 case FLOOR_MOD_EXPR:
4354 /* We will come here only for signed operations. */
4355 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4357 unsigned HOST_WIDE_INT mh;
4358 int pre_shift, lgup, post_shift;
4359 HOST_WIDE_INT d = INTVAL (op1);
4364 /* We could just as easily deal with negative constants here,
4365 but it does not seem worth the trouble for GCC 2.6. */
4366 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4368 pre_shift = floor_log2 (d);
4371 remainder = expand_binop (compute_mode, and_optab, op0,
4372 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4373 remainder, 0, OPTAB_LIB_WIDEN);
4375 return gen_lowpart (mode, remainder);
4377 quotient = expand_shift
4378 (RSHIFT_EXPR, compute_mode, op0,
4379 pre_shift, tquotient, 0);
4385 mh = choose_multiplier (d, size, size - 1,
4386 &ml, &post_shift, &lgup);
4389 if (post_shift < BITS_PER_WORD
4390 && size - 1 < BITS_PER_WORD)
4393 (RSHIFT_EXPR, compute_mode, op0,
4394 size - 1, NULL_RTX, 0);
4395 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4396 NULL_RTX, 0, OPTAB_WIDEN);
4397 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4398 + shift_cost[speed][compute_mode][size - 1]
4399 + 2 * add_cost[speed][compute_mode]);
4400 t3 = expand_mult_highpart (compute_mode, t2, ml,
4402 max_cost - extra_cost);
4406 (RSHIFT_EXPR, compute_mode, t3,
4407 post_shift, NULL_RTX, 1);
4408 quotient = expand_binop (compute_mode, xor_optab,
4409 t4, t1, tquotient, 0,
4417 rtx nsign, t1, t2, t3, t4;
4418 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4419 op0, constm1_rtx), NULL_RTX);
4420 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4422 nsign = expand_shift
4423 (RSHIFT_EXPR, compute_mode, t2,
4424 size - 1, NULL_RTX, 0);
4425 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4427 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4432 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4434 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4443 delete_insns_since (last);
4445 /* Try using an instruction that produces both the quotient and
4446 remainder, using truncation. We can easily compensate the quotient
4447 or remainder to get floor rounding, once we have the remainder.
4448 Notice that we compute also the final remainder value here,
4449 and return the result right away. */
4450 if (target == 0 || GET_MODE (target) != compute_mode)
4451 target = gen_reg_rtx (compute_mode);
4456 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4457 quotient = gen_reg_rtx (compute_mode);
4462 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4463 remainder = gen_reg_rtx (compute_mode);
4466 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4467 quotient, remainder, 0))
4469 /* This could be computed with a branch-less sequence.
4470 Save that for later. */
4472 rtx label = gen_label_rtx ();
4473 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4474 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4475 NULL_RTX, 0, OPTAB_WIDEN);
4476 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4477 expand_dec (quotient, const1_rtx);
4478 expand_inc (remainder, op1);
4480 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4483 /* No luck with division elimination or divmod. Have to do it
4484 by conditionally adjusting op0 *and* the result. */
4486 rtx label1, label2, label3, label4, label5;
4490 quotient = gen_reg_rtx (compute_mode);
4491 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4492 label1 = gen_label_rtx ();
4493 label2 = gen_label_rtx ();
4494 label3 = gen_label_rtx ();
4495 label4 = gen_label_rtx ();
4496 label5 = gen_label_rtx ();
4497 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4498 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4499 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4500 quotient, 0, OPTAB_LIB_WIDEN);
4501 if (tem != quotient)
4502 emit_move_insn (quotient, tem);
4503 emit_jump_insn (gen_jump (label5));
4505 emit_label (label1);
4506 expand_inc (adjusted_op0, const1_rtx);
4507 emit_jump_insn (gen_jump (label4));
4509 emit_label (label2);
4510 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4511 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4512 quotient, 0, OPTAB_LIB_WIDEN);
4513 if (tem != quotient)
4514 emit_move_insn (quotient, tem);
4515 emit_jump_insn (gen_jump (label5));
4517 emit_label (label3);
4518 expand_dec (adjusted_op0, const1_rtx);
4519 emit_label (label4);
4520 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4521 quotient, 0, OPTAB_LIB_WIDEN);
4522 if (tem != quotient)
4523 emit_move_insn (quotient, tem);
4524 expand_dec (quotient, const1_rtx);
4525 emit_label (label5);
4533 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4536 unsigned HOST_WIDE_INT d = INTVAL (op1);
4537 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4538 floor_log2 (d), tquotient, 1);
4539 t2 = expand_binop (compute_mode, and_optab, op0,
4541 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4542 t3 = gen_reg_rtx (compute_mode);
4543 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4544 compute_mode, 1, 1);
4548 lab = gen_label_rtx ();
4549 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4550 expand_inc (t1, const1_rtx);
4555 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4561 /* Try using an instruction that produces both the quotient and
4562 remainder, using truncation. We can easily compensate the
4563 quotient or remainder to get ceiling rounding, once we have the
4564 remainder. Notice that we compute also the final remainder
4565 value here, and return the result right away. */
4566 if (target == 0 || GET_MODE (target) != compute_mode)
4567 target = gen_reg_rtx (compute_mode);
4571 remainder = (REG_P (target)
4572 ? target : gen_reg_rtx (compute_mode));
4573 quotient = gen_reg_rtx (compute_mode);
4577 quotient = (REG_P (target)
4578 ? target : gen_reg_rtx (compute_mode));
4579 remainder = gen_reg_rtx (compute_mode);
4582 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4585 /* This could be computed with a branch-less sequence.
4586 Save that for later. */
4587 rtx label = gen_label_rtx ();
4588 do_cmp_and_jump (remainder, const0_rtx, EQ,
4589 compute_mode, label);
4590 expand_inc (quotient, const1_rtx);
4591 expand_dec (remainder, op1);
4593 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4596 /* No luck with division elimination or divmod. Have to do it
4597 by conditionally adjusting op0 *and* the result. */
4600 rtx adjusted_op0, tem;
4602 quotient = gen_reg_rtx (compute_mode);
4603 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4604 label1 = gen_label_rtx ();
4605 label2 = gen_label_rtx ();
4606 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4607 compute_mode, label1);
4608 emit_move_insn (quotient, const0_rtx);
4609 emit_jump_insn (gen_jump (label2));
4611 emit_label (label1);
4612 expand_dec (adjusted_op0, const1_rtx);
4613 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4614 quotient, 1, OPTAB_LIB_WIDEN);
4615 if (tem != quotient)
4616 emit_move_insn (quotient, tem);
4617 expand_inc (quotient, const1_rtx);
4618 emit_label (label2);
4623 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4624 && INTVAL (op1) >= 0)
4626 /* This is extremely similar to the code for the unsigned case
4627 above. For 2.7 we should merge these variants, but for
4628 2.6.1 I don't want to touch the code for unsigned since that
4629 get used in C. The signed case will only be used by other
4633 unsigned HOST_WIDE_INT d = INTVAL (op1);
4634 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4635 floor_log2 (d), tquotient, 0);
4636 t2 = expand_binop (compute_mode, and_optab, op0,
4638 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4639 t3 = gen_reg_rtx (compute_mode);
4640 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4641 compute_mode, 1, 1);
4645 lab = gen_label_rtx ();
4646 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4647 expand_inc (t1, const1_rtx);
4652 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4658 /* Try using an instruction that produces both the quotient and
4659 remainder, using truncation. We can easily compensate the
4660 quotient or remainder to get ceiling rounding, once we have the
4661 remainder. Notice that we compute also the final remainder
4662 value here, and return the result right away. */
4663 if (target == 0 || GET_MODE (target) != compute_mode)
4664 target = gen_reg_rtx (compute_mode);
4667 remainder= (REG_P (target)
4668 ? target : gen_reg_rtx (compute_mode));
4669 quotient = gen_reg_rtx (compute_mode);
4673 quotient = (REG_P (target)
4674 ? target : gen_reg_rtx (compute_mode));
4675 remainder = gen_reg_rtx (compute_mode);
4678 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4681 /* This could be computed with a branch-less sequence.
4682 Save that for later. */
4684 rtx label = gen_label_rtx ();
4685 do_cmp_and_jump (remainder, const0_rtx, EQ,
4686 compute_mode, label);
4687 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4688 NULL_RTX, 0, OPTAB_WIDEN);
4689 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4690 expand_inc (quotient, const1_rtx);
4691 expand_dec (remainder, op1);
4693 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4696 /* No luck with division elimination or divmod. Have to do it
4697 by conditionally adjusting op0 *and* the result. */
4699 rtx label1, label2, label3, label4, label5;
4703 quotient = gen_reg_rtx (compute_mode);
4704 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4705 label1 = gen_label_rtx ();
4706 label2 = gen_label_rtx ();
4707 label3 = gen_label_rtx ();
4708 label4 = gen_label_rtx ();
4709 label5 = gen_label_rtx ();
4710 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4711 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4712 compute_mode, label1);
4713 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4714 quotient, 0, OPTAB_LIB_WIDEN);
4715 if (tem != quotient)
4716 emit_move_insn (quotient, tem);
4717 emit_jump_insn (gen_jump (label5));
4719 emit_label (label1);
4720 expand_dec (adjusted_op0, const1_rtx);
4721 emit_jump_insn (gen_jump (label4));
4723 emit_label (label2);
4724 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4725 compute_mode, label3);
4726 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4727 quotient, 0, OPTAB_LIB_WIDEN);
4728 if (tem != quotient)
4729 emit_move_insn (quotient, tem);
4730 emit_jump_insn (gen_jump (label5));
4732 emit_label (label3);
4733 expand_inc (adjusted_op0, const1_rtx);
4734 emit_label (label4);
4735 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4736 quotient, 0, OPTAB_LIB_WIDEN);
4737 if (tem != quotient)
4738 emit_move_insn (quotient, tem);
4739 expand_inc (quotient, const1_rtx);
4740 emit_label (label5);
4745 case EXACT_DIV_EXPR:
4746 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4748 HOST_WIDE_INT d = INTVAL (op1);
4749 unsigned HOST_WIDE_INT ml;
4753 pre_shift = floor_log2 (d & -d);
4754 ml = invert_mod2n (d >> pre_shift, size);
4755 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4756 pre_shift, NULL_RTX, unsignedp);
4757 quotient = expand_mult (compute_mode, t1,
4758 gen_int_mode (ml, compute_mode),
4761 insn = get_last_insn ();
4762 set_dst_reg_note (insn, REG_EQUAL,
4763 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4764 compute_mode, op0, op1),
4769 case ROUND_DIV_EXPR:
4770 case ROUND_MOD_EXPR:
4775 label = gen_label_rtx ();
4776 quotient = gen_reg_rtx (compute_mode);
4777 remainder = gen_reg_rtx (compute_mode);
4778 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4781 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4782 quotient, 1, OPTAB_LIB_WIDEN);
4783 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4784 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4785 remainder, 1, OPTAB_LIB_WIDEN);
4787 tem = plus_constant (op1, -1);
4788 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4789 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4790 expand_inc (quotient, const1_rtx);
4791 expand_dec (remainder, op1);
4796 rtx abs_rem, abs_op1, tem, mask;
4798 label = gen_label_rtx ();
4799 quotient = gen_reg_rtx (compute_mode);
4800 remainder = gen_reg_rtx (compute_mode);
4801 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4804 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4805 quotient, 0, OPTAB_LIB_WIDEN);
4806 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4807 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4808 remainder, 0, OPTAB_LIB_WIDEN);
4810 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4811 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4812 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4814 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4815 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4816 NULL_RTX, 0, OPTAB_WIDEN);
4817 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4818 size - 1, NULL_RTX, 0);
4819 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4820 NULL_RTX, 0, OPTAB_WIDEN);
4821 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4822 NULL_RTX, 0, OPTAB_WIDEN);
4823 expand_inc (quotient, tem);
4824 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4825 NULL_RTX, 0, OPTAB_WIDEN);
4826 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4827 NULL_RTX, 0, OPTAB_WIDEN);
4828 expand_dec (remainder, tem);
4831 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4839 if (target && GET_MODE (target) != compute_mode)
4844 /* Try to produce the remainder without producing the quotient.
4845 If we seem to have a divmod pattern that does not require widening,
4846 don't try widening here. We should really have a WIDEN argument
4847 to expand_twoval_binop, since what we'd really like to do here is
4848 1) try a mod insn in compute_mode
4849 2) try a divmod insn in compute_mode
4850 3) try a div insn in compute_mode and multiply-subtract to get
4852 4) try the same things with widening allowed. */
4854 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4857 ((optab_handler (optab2, compute_mode)
4858 != CODE_FOR_nothing)
4859 ? OPTAB_DIRECT : OPTAB_WIDEN));
4862 /* No luck there. Can we do remainder and divide at once
4863 without a library call? */
4864 remainder = gen_reg_rtx (compute_mode);
4865 if (! expand_twoval_binop ((unsignedp
4869 NULL_RTX, remainder, unsignedp))
4874 return gen_lowpart (mode, remainder);
4877 /* Produce the quotient. Try a quotient insn, but not a library call.
4878 If we have a divmod in this mode, use it in preference to widening
4879 the div (for this test we assume it will not fail). Note that optab2
4880 is set to the one of the two optabs that the call below will use. */
4882 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4883 op0, op1, rem_flag ? NULL_RTX : target,
4885 ((optab_handler (optab2, compute_mode)
4886 != CODE_FOR_nothing)
4887 ? OPTAB_DIRECT : OPTAB_WIDEN));
4891 /* No luck there. Try a quotient-and-remainder insn,
4892 keeping the quotient alone. */
4893 quotient = gen_reg_rtx (compute_mode);
4894 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4896 quotient, NULL_RTX, unsignedp))
4900 /* Still no luck. If we are not computing the remainder,
4901 use a library call for the quotient. */
4902 quotient = sign_expand_binop (compute_mode,
4903 udiv_optab, sdiv_optab,
4905 unsignedp, OPTAB_LIB_WIDEN);
4912 if (target && GET_MODE (target) != compute_mode)
4917 /* No divide instruction either. Use library for remainder. */
4918 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4920 unsignedp, OPTAB_LIB_WIDEN);
4921 /* No remainder function. Try a quotient-and-remainder
4922 function, keeping the remainder. */
4925 remainder = gen_reg_rtx (compute_mode);
4926 if (!expand_twoval_binop_libfunc
4927 (unsignedp ? udivmod_optab : sdivmod_optab,
4929 NULL_RTX, remainder,
4930 unsignedp ? UMOD : MOD))
4931 remainder = NULL_RTX;
4936 /* We divided. Now finish doing X - Y * (X / Y). */
4937 remainder = expand_mult (compute_mode, quotient, op1,
4938 NULL_RTX, unsignedp);
4939 remainder = expand_binop (compute_mode, sub_optab, op0,
4940 remainder, target, unsignedp,
4945 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4948 /* Return a tree node with data type TYPE, describing the value of X.
4949 Usually this is an VAR_DECL, if there is no obvious better choice.
4950 X may be an expression, however we only support those expressions
4951 generated by loop.c. */
4954 make_tree (tree type, rtx x)
4958 switch (GET_CODE (x))
4962 HOST_WIDE_INT hi = 0;
4965 && !(TYPE_UNSIGNED (type)
4966 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4967 < HOST_BITS_PER_WIDE_INT)))
4970 t = build_int_cst_wide (type, INTVAL (x), hi);
4976 if (GET_MODE (x) == VOIDmode)
4977 t = build_int_cst_wide (type,
4978 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4983 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4984 t = build_real (type, d);
4991 int units = CONST_VECTOR_NUNITS (x);
4992 tree itype = TREE_TYPE (type);
4997 /* Build a tree with vector elements. */
4998 for (i = units - 1; i >= 0; --i)
5000 rtx elt = CONST_VECTOR_ELT (x, i);
5001 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5004 return build_vector (type, t);
5008 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5009 make_tree (type, XEXP (x, 1)));
5012 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5013 make_tree (type, XEXP (x, 1)));
5016 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5019 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5020 make_tree (type, XEXP (x, 1)));
5023 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5024 make_tree (type, XEXP (x, 1)));
5027 t = unsigned_type_for (type);
5028 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5029 make_tree (t, XEXP (x, 0)),
5030 make_tree (type, XEXP (x, 1))));
5033 t = signed_type_for (type);
5034 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5035 make_tree (t, XEXP (x, 0)),
5036 make_tree (type, XEXP (x, 1))));
5039 if (TREE_CODE (type) != REAL_TYPE)
5040 t = signed_type_for (type);
5044 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5045 make_tree (t, XEXP (x, 0)),
5046 make_tree (t, XEXP (x, 1))));
5048 t = unsigned_type_for (type);
5049 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5050 make_tree (t, XEXP (x, 0)),
5051 make_tree (t, XEXP (x, 1))));
5055 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5056 GET_CODE (x) == ZERO_EXTEND);
5057 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5060 return make_tree (type, XEXP (x, 0));
5063 t = SYMBOL_REF_DECL (x);
5065 return fold_convert (type, build_fold_addr_expr (t));
5066 /* else fall through. */
5069 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5071 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5072 address mode to pointer mode. */
5073 if (POINTER_TYPE_P (type))
5074 x = convert_memory_address_addr_space
5075 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5077 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5078 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5079 t->decl_with_rtl.rtl = x;
5085 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5086 and returning TARGET.
5088 If TARGET is 0, a pseudo-register or constant is returned. */
5091 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5095 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5096 tem = simplify_binary_operation (AND, mode, op0, op1);
5098 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5102 else if (tem != target)
5103 emit_move_insn (target, tem);
5107 /* Helper function for emit_store_flag. */
5109 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5110 enum machine_mode mode, enum machine_mode compare_mode,
5111 int unsignedp, rtx x, rtx y, int normalizep,
5112 enum machine_mode target_mode)
5114 struct expand_operand ops[4];
5115 rtx op0, last, comparison, subtarget;
5116 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5118 last = get_last_insn ();
5119 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5120 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5123 delete_insns_since (last);
5127 if (target_mode == VOIDmode)
5128 target_mode = result_mode;
5130 target = gen_reg_rtx (target_mode);
5132 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5134 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5135 create_fixed_operand (&ops[1], comparison);
5136 create_fixed_operand (&ops[2], x);
5137 create_fixed_operand (&ops[3], y);
5138 if (!maybe_expand_insn (icode, 4, ops))
5140 delete_insns_since (last);
5143 subtarget = ops[0].value;
5145 /* If we are converting to a wider mode, first convert to
5146 TARGET_MODE, then normalize. This produces better combining
5147 opportunities on machines that have a SIGN_EXTRACT when we are
5148 testing a single bit. This mostly benefits the 68k.
5150 If STORE_FLAG_VALUE does not have the sign bit set when
5151 interpreted in MODE, we can do this conversion as unsigned, which
5152 is usually more efficient. */
5153 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5155 convert_move (target, subtarget,
5156 val_signbit_known_clear_p (result_mode,
5159 result_mode = target_mode;
5164 /* If we want to keep subexpressions around, don't reuse our last
5169 /* Now normalize to the proper value in MODE. Sometimes we don't
5170 have to do anything. */
5171 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5173 /* STORE_FLAG_VALUE might be the most negative number, so write
5174 the comparison this way to avoid a compiler-time warning. */
5175 else if (- normalizep == STORE_FLAG_VALUE)
5176 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5178 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5179 it hard to use a value of just the sign bit due to ANSI integer
5180 constant typing rules. */
5181 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5182 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5183 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5187 gcc_assert (STORE_FLAG_VALUE & 1);
5189 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5190 if (normalizep == -1)
5191 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5194 /* If we were converting to a smaller mode, do the conversion now. */
5195 if (target_mode != result_mode)
5197 convert_move (target, op0, 0);
5205 /* A subroutine of emit_store_flag only including "tricks" that do not
5206 need a recursive call. These are kept separate to avoid infinite
5210 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5211 enum machine_mode mode, int unsignedp, int normalizep,
5212 enum machine_mode target_mode)
5215 enum insn_code icode;
5216 enum machine_mode compare_mode;
5217 enum mode_class mclass;
5218 enum rtx_code scode;
5222 code = unsigned_condition (code);
5223 scode = swap_condition (code);
5225 /* If one operand is constant, make it the second one. Only do this
5226 if the other operand is not constant as well. */
5228 if (swap_commutative_operands_p (op0, op1))
5233 code = swap_condition (code);
5236 if (mode == VOIDmode)
5237 mode = GET_MODE (op0);
5239 /* For some comparisons with 1 and -1, we can convert this to
5240 comparisons with zero. This will often produce more opportunities for
5241 store-flag insns. */
5246 if (op1 == const1_rtx)
5247 op1 = const0_rtx, code = LE;
5250 if (op1 == constm1_rtx)
5251 op1 = const0_rtx, code = LT;
5254 if (op1 == const1_rtx)
5255 op1 = const0_rtx, code = GT;
5258 if (op1 == constm1_rtx)
5259 op1 = const0_rtx, code = GE;
5262 if (op1 == const1_rtx)
5263 op1 = const0_rtx, code = NE;
5266 if (op1 == const1_rtx)
5267 op1 = const0_rtx, code = EQ;
5273 /* If we are comparing a double-word integer with zero or -1, we can
5274 convert the comparison into one involving a single word. */
5275 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5276 && GET_MODE_CLASS (mode) == MODE_INT
5277 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5279 if ((code == EQ || code == NE)
5280 && (op1 == const0_rtx || op1 == constm1_rtx))
5284 /* Do a logical OR or AND of the two words and compare the
5286 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5287 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5288 tem = expand_binop (word_mode,
5289 op1 == const0_rtx ? ior_optab : and_optab,
5290 op00, op01, NULL_RTX, unsignedp,
5294 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5295 unsignedp, normalizep);
5297 else if ((code == LT || code == GE) && op1 == const0_rtx)
5301 /* If testing the sign bit, can just test on high word. */
5302 op0h = simplify_gen_subreg (word_mode, op0, mode,
5303 subreg_highpart_offset (word_mode,
5305 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5306 unsignedp, normalizep);
5313 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5316 target = gen_reg_rtx (target_mode);
5318 convert_move (target, tem,
5319 !val_signbit_known_set_p (word_mode,
5320 (normalizep ? normalizep
5321 : STORE_FLAG_VALUE)));
5326 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5327 complement of A (for GE) and shifting the sign bit to the low bit. */
5328 if (op1 == const0_rtx && (code == LT || code == GE)
5329 && GET_MODE_CLASS (mode) == MODE_INT
5330 && (normalizep || STORE_FLAG_VALUE == 1
5331 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5338 /* If the result is to be wider than OP0, it is best to convert it
5339 first. If it is to be narrower, it is *incorrect* to convert it
5341 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5343 op0 = convert_modes (target_mode, mode, op0, 0);
5347 if (target_mode != mode)
5351 op0 = expand_unop (mode, one_cmpl_optab, op0,
5352 ((STORE_FLAG_VALUE == 1 || normalizep)
5353 ? 0 : subtarget), 0);
5355 if (STORE_FLAG_VALUE == 1 || normalizep)
5356 /* If we are supposed to produce a 0/1 value, we want to do
5357 a logical shift from the sign bit to the low-order bit; for
5358 a -1/0 value, we do an arithmetic shift. */
5359 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5360 GET_MODE_BITSIZE (mode) - 1,
5361 subtarget, normalizep != -1);
5363 if (mode != target_mode)
5364 op0 = convert_modes (target_mode, mode, op0, 0);
5369 mclass = GET_MODE_CLASS (mode);
5370 for (compare_mode = mode; compare_mode != VOIDmode;
5371 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5373 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5374 icode = optab_handler (cstore_optab, optab_mode);
5375 if (icode != CODE_FOR_nothing)
5377 do_pending_stack_adjust ();
5378 tem = emit_cstore (target, icode, code, mode, compare_mode,
5379 unsignedp, op0, op1, normalizep, target_mode);
5383 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5385 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5386 unsignedp, op1, op0, normalizep, target_mode);
5397 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5398 and storing in TARGET. Normally return TARGET.
5399 Return 0 if that cannot be done.
5401 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5402 it is VOIDmode, they cannot both be CONST_INT.
5404 UNSIGNEDP is for the case where we have to widen the operands
5405 to perform the operation. It says to use zero-extension.
5407 NORMALIZEP is 1 if we should convert the result to be either zero
5408 or one. Normalize is -1 if we should convert the result to be
5409 either zero or -1. If NORMALIZEP is zero, the result will be left
5410 "raw" out of the scc insn. */
5413 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5414 enum machine_mode mode, int unsignedp, int normalizep)
5416 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5417 enum rtx_code rcode;
5419 rtx tem, last, trueval;
5421 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5426 /* If we reached here, we can't do this with a scc insn, however there
5427 are some comparisons that can be done in other ways. Don't do any
5428 of these cases if branches are very cheap. */
5429 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5432 /* See what we need to return. We can only return a 1, -1, or the
5435 if (normalizep == 0)
5437 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5438 normalizep = STORE_FLAG_VALUE;
5440 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5446 last = get_last_insn ();
5448 /* If optimizing, use different pseudo registers for each insn, instead
5449 of reusing the same pseudo. This leads to better CSE, but slows
5450 down the compiler, since there are more pseudos */
5451 subtarget = (!optimize
5452 && (target_mode == mode)) ? target : NULL_RTX;
5453 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5455 /* For floating-point comparisons, try the reverse comparison or try
5456 changing the "orderedness" of the comparison. */
5457 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5459 enum rtx_code first_code;
5462 rcode = reverse_condition_maybe_unordered (code);
5463 if (can_compare_p (rcode, mode, ccp_store_flag)
5464 && (code == ORDERED || code == UNORDERED
5465 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5466 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5468 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5469 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5471 /* For the reverse comparison, use either an addition or a XOR. */
5473 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5474 optimize_insn_for_speed_p ()) == 0)
5476 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5477 STORE_FLAG_VALUE, target_mode);
5479 return expand_binop (target_mode, add_optab, tem,
5480 GEN_INT (normalizep),
5481 target, 0, OPTAB_WIDEN);
5484 && rtx_cost (trueval, XOR, 1,
5485 optimize_insn_for_speed_p ()) == 0)
5487 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5488 normalizep, target_mode);
5490 return expand_binop (target_mode, xor_optab, tem, trueval,
5491 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5495 delete_insns_since (last);
5497 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5498 if (code == ORDERED || code == UNORDERED)
5501 and_them = split_comparison (code, mode, &first_code, &code);
5503 /* If there are no NaNs, the first comparison should always fall through.
5504 Effectively change the comparison to the other one. */
5505 if (!HONOR_NANS (mode))
5507 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5508 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5512 #ifdef HAVE_conditional_move
5513 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5514 conditional move. */
5515 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5516 normalizep, target_mode);
5521 tem = emit_conditional_move (target, code, op0, op1, mode,
5522 tem, const0_rtx, GET_MODE (tem), 0);
5524 tem = emit_conditional_move (target, code, op0, op1, mode,
5525 trueval, tem, GET_MODE (tem), 0);
5528 delete_insns_since (last);
5535 /* The remaining tricks only apply to integer comparisons. */
5537 if (GET_MODE_CLASS (mode) != MODE_INT)
5540 /* If this is an equality comparison of integers, we can try to exclusive-or
5541 (or subtract) the two operands and use a recursive call to try the
5542 comparison with zero. Don't do any of these cases if branches are
5545 if ((code == EQ || code == NE) && op1 != const0_rtx)
5547 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5551 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5554 tem = emit_store_flag (target, code, tem, const0_rtx,
5555 mode, unsignedp, normalizep);
5559 delete_insns_since (last);
5562 /* For integer comparisons, try the reverse comparison. However, for
5563 small X and if we'd have anyway to extend, implementing "X != 0"
5564 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5565 rcode = reverse_condition (code);
5566 if (can_compare_p (rcode, mode, ccp_store_flag)
5567 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5569 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5570 && op1 == const0_rtx))
5572 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5573 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5575 /* Again, for the reverse comparison, use either an addition or a XOR. */
5577 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5578 optimize_insn_for_speed_p ()) == 0)
5580 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5581 STORE_FLAG_VALUE, target_mode);
5583 tem = expand_binop (target_mode, add_optab, tem,
5584 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5587 && rtx_cost (trueval, XOR, 1,
5588 optimize_insn_for_speed_p ()) == 0)
5590 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5591 normalizep, target_mode);
5593 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5594 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5599 delete_insns_since (last);
5602 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5603 the constant zero. Reject all other comparisons at this point. Only
5604 do LE and GT if branches are expensive since they are expensive on
5605 2-operand machines. */
5607 if (op1 != const0_rtx
5608 || (code != EQ && code != NE
5609 && (BRANCH_COST (optimize_insn_for_speed_p (),
5610 false) <= 1 || (code != LE && code != GT))))
5613 /* Try to put the result of the comparison in the sign bit. Assume we can't
5614 do the necessary operation below. */
5618 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5619 the sign bit set. */
5623 /* This is destructive, so SUBTARGET can't be OP0. */
5624 if (rtx_equal_p (subtarget, op0))
5627 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5630 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5634 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5635 number of bits in the mode of OP0, minus one. */
5639 if (rtx_equal_p (subtarget, op0))
5642 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5643 GET_MODE_BITSIZE (mode) - 1,
5645 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5649 if (code == EQ || code == NE)
5651 /* For EQ or NE, one way to do the comparison is to apply an operation
5652 that converts the operand into a positive number if it is nonzero
5653 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5654 for NE we negate. This puts the result in the sign bit. Then we
5655 normalize with a shift, if needed.
5657 Two operations that can do the above actions are ABS and FFS, so try
5658 them. If that doesn't work, and MODE is smaller than a full word,
5659 we can use zero-extension to the wider mode (an unsigned conversion)
5660 as the operation. */
5662 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5663 that is compensated by the subsequent overflow when subtracting
5666 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5667 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5668 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5669 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5670 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5672 tem = convert_modes (word_mode, mode, op0, 1);
5679 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5682 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5685 /* If we couldn't do it that way, for NE we can "or" the two's complement
5686 of the value with itself. For EQ, we take the one's complement of
5687 that "or", which is an extra insn, so we only handle EQ if branches
5692 || BRANCH_COST (optimize_insn_for_speed_p (),
5695 if (rtx_equal_p (subtarget, op0))
5698 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5699 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5702 if (tem && code == EQ)
5703 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5707 if (tem && normalizep)
5708 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5709 GET_MODE_BITSIZE (mode) - 1,
5710 subtarget, normalizep == 1);
5716 else if (GET_MODE (tem) != target_mode)
5718 convert_move (target, tem, 0);
5721 else if (!subtarget)
5723 emit_move_insn (target, tem);
5728 delete_insns_since (last);
5733 /* Like emit_store_flag, but always succeeds. */
5736 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5737 enum machine_mode mode, int unsignedp, int normalizep)
5740 rtx trueval, falseval;
5742 /* First see if emit_store_flag can do the job. */
5743 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5748 target = gen_reg_rtx (word_mode);
5750 /* If this failed, we have to do this with set/compare/jump/set code.
5751 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5752 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5754 && GET_MODE_CLASS (mode) == MODE_INT
5757 && op1 == const0_rtx)
5759 label = gen_label_rtx ();
5760 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5761 mode, NULL_RTX, NULL_RTX, label, -1);
5762 emit_move_insn (target, trueval);
5768 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5769 target = gen_reg_rtx (GET_MODE (target));
5771 /* Jump in the right direction if the target cannot implement CODE
5772 but can jump on its reverse condition. */
5773 falseval = const0_rtx;
5774 if (! can_compare_p (code, mode, ccp_jump)
5775 && (! FLOAT_MODE_P (mode)
5776 || code == ORDERED || code == UNORDERED
5777 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5778 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5780 enum rtx_code rcode;
5781 if (FLOAT_MODE_P (mode))
5782 rcode = reverse_condition_maybe_unordered (code);
5784 rcode = reverse_condition (code);
5786 /* Canonicalize to UNORDERED for the libcall. */
5787 if (can_compare_p (rcode, mode, ccp_jump)
5788 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5791 trueval = const0_rtx;
5796 emit_move_insn (target, trueval);
5797 label = gen_label_rtx ();
5798 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5799 NULL_RTX, label, -1);
5801 emit_move_insn (target, falseval);
5807 /* Perform possibly multi-word comparison and conditional jump to LABEL
5808 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5809 now a thin wrapper around do_compare_rtx_and_jump. */
5812 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5815 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5816 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5817 NULL_RTX, NULL_RTX, label, -1);