gdb - Local mods (compile)
[dragonfly.git] / contrib / gcc-5.0 / gcc / tree-ssa-reassoc.c
CommitLineData
dda118e3
JM
1/* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "hash-table.h"
25#include "tm.h"
26#include "rtl.h"
27#include "tm_p.h"
28#include "hash-set.h"
29#include "machmode.h"
30#include "vec.h"
31#include "double-int.h"
32#include "input.h"
33#include "alias.h"
34#include "symtab.h"
35#include "wide-int.h"
36#include "inchash.h"
37#include "tree.h"
38#include "fold-const.h"
39#include "stor-layout.h"
40#include "predict.h"
41#include "hard-reg-set.h"
42#include "function.h"
43#include "dominance.h"
44#include "cfg.h"
45#include "cfganal.h"
46#include "basic-block.h"
47#include "gimple-pretty-print.h"
48#include "tree-inline.h"
49#include "hash-map.h"
50#include "tree-ssa-alias.h"
51#include "internal-fn.h"
52#include "gimple-fold.h"
53#include "tree-eh.h"
54#include "gimple-expr.h"
55#include "is-a.h"
56#include "gimple.h"
57#include "gimple-iterator.h"
58#include "gimplify-me.h"
59#include "gimple-ssa.h"
60#include "tree-cfg.h"
61#include "tree-phinodes.h"
62#include "ssa-iterators.h"
63#include "stringpool.h"
64#include "tree-ssanames.h"
65#include "tree-ssa-loop-niter.h"
66#include "tree-ssa-loop.h"
67#include "hashtab.h"
68#include "flags.h"
69#include "statistics.h"
70#include "real.h"
71#include "fixed-value.h"
72#include "insn-config.h"
73#include "expmed.h"
74#include "dojump.h"
75#include "explow.h"
76#include "calls.h"
77#include "emit-rtl.h"
78#include "varasm.h"
79#include "stmt.h"
80#include "expr.h"
81#include "tree-dfa.h"
82#include "tree-ssa.h"
83#include "tree-iterator.h"
84#include "tree-pass.h"
85#include "alloc-pool.h"
86#include "langhooks.h"
87#include "cfgloop.h"
88#include "target.h"
89#include "params.h"
90#include "diagnostic-core.h"
91#include "builtins.h"
92#include "gimplify.h"
93#include "insn-codes.h"
94#include "optabs.h"
95
96/* This is a simple global reassociation pass. It is, in part, based
97 on the LLVM pass of the same name (They do some things more/less
98 than we do, in different orders, etc).
99
100 It consists of five steps:
101
102 1. Breaking up subtract operations into addition + negate, where
103 it would promote the reassociation of adds.
104
105 2. Left linearization of the expression trees, so that (A+B)+(C+D)
106 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
107 During linearization, we place the operands of the binary
108 expressions into a vector of operand_entry_t
109
110 3. Optimization of the operand lists, eliminating things like a +
111 -a, a & a, etc.
112
113 3a. Combine repeated factors with the same occurrence counts
114 into a __builtin_powi call that will later be optimized into
115 an optimal number of multiplies.
116
117 4. Rewrite the expression trees we linearized and optimized so
118 they are in proper rank order.
119
120 5. Repropagate negates, as nothing else will clean it up ATM.
121
122 A bit of theory on #4, since nobody seems to write anything down
123 about why it makes sense to do it the way they do it:
124
125 We could do this much nicer theoretically, but don't (for reasons
126 explained after how to do it theoretically nice :P).
127
128 In order to promote the most redundancy elimination, you want
129 binary expressions whose operands are the same rank (or
130 preferably, the same value) exposed to the redundancy eliminator,
131 for possible elimination.
132
133 So the way to do this if we really cared, is to build the new op
134 tree from the leaves to the roots, merging as you go, and putting the
135 new op on the end of the worklist, until you are left with one
136 thing on the worklist.
137
138 IE if you have to rewrite the following set of operands (listed with
139 rank in parentheses), with opcode PLUS_EXPR:
140
141 a (1), b (1), c (1), d (2), e (2)
142
143
144 We start with our merge worklist empty, and the ops list with all of
145 those on it.
146
147 You want to first merge all leaves of the same rank, as much as
148 possible.
149
150 So first build a binary op of
151
152 mergetmp = a + b, and put "mergetmp" on the merge worklist.
153
154 Because there is no three operand form of PLUS_EXPR, c is not going to
155 be exposed to redundancy elimination as a rank 1 operand.
156
157 So you might as well throw it on the merge worklist (you could also
158 consider it to now be a rank two operand, and merge it with d and e,
159 but in this case, you then have evicted e from a binary op. So at
160 least in this situation, you can't win.)
161
162 Then build a binary op of d + e
163 mergetmp2 = d + e
164
165 and put mergetmp2 on the merge worklist.
166
167 so merge worklist = {mergetmp, c, mergetmp2}
168
169 Continue building binary ops of these operations until you have only
170 one operation left on the worklist.
171
172 So we have
173
174 build binary op
175 mergetmp3 = mergetmp + c
176
177 worklist = {mergetmp2, mergetmp3}
178
179 mergetmp4 = mergetmp2 + mergetmp3
180
181 worklist = {mergetmp4}
182
183 because we have one operation left, we can now just set the original
184 statement equal to the result of that operation.
185
186 This will at least expose a + b and d + e to redundancy elimination
187 as binary operations.
188
189 For extra points, you can reuse the old statements to build the
190 mergetmps, since you shouldn't run out.
191
192 So why don't we do this?
193
194 Because it's expensive, and rarely will help. Most trees we are
195 reassociating have 3 or less ops. If they have 2 ops, they already
196 will be written into a nice single binary op. If you have 3 ops, a
197 single simple check suffices to tell you whether the first two are of the
198 same rank. If so, you know to order it
199
200 mergetmp = op1 + op2
201 newstmt = mergetmp + op3
202
203 instead of
204 mergetmp = op2 + op3
205 newstmt = mergetmp + op1
206
207 If all three are of the same rank, you can't expose them all in a
208 single binary operator anyway, so the above is *still* the best you
209 can do.
210
211 Thus, this is what we do. When we have three ops left, we check to see
212 what order to put them in, and call it a day. As a nod to vector sum
213 reduction, we check if any of the ops are really a phi node that is a
214 destructive update for the associating op, and keep the destructive
215 update together for vector sum reduction recognition. */
216
217
218/* Statistics */
219static struct
220{
221 int linearized;
222 int constants_eliminated;
223 int ops_eliminated;
224 int rewritten;
225 int pows_encountered;
226 int pows_created;
227} reassociate_stats;
228
229/* Operator, rank pair. */
230typedef struct operand_entry
231{
232 unsigned int rank;
233 int id;
234 tree op;
235 unsigned int count;
236} *operand_entry_t;
237
238static alloc_pool operand_entry_pool;
239
240/* This is used to assign a unique ID to each struct operand_entry
241 so that qsort results are identical on different hosts. */
242static int next_operand_entry_id;
243
244/* Starting rank number for a given basic block, so that we can rank
245 operations using unmovable instructions in that BB based on the bb
246 depth. */
247static long *bb_rank;
248
249/* Operand->rank hashtable. */
250static hash_map<tree, long> *operand_rank;
251
252/* Vector of SSA_NAMEs on which after reassociate_bb is done with
253 all basic blocks the CFG should be adjusted - basic blocks
254 split right after that SSA_NAME's definition statement and before
255 the only use, which must be a bit ior. */
256static vec<tree> reassoc_branch_fixups;
257
258/* Forward decls. */
259static long get_rank (tree);
260static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
261
262/* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
263 possibly added by gsi_remove. */
264
265bool
266reassoc_remove_stmt (gimple_stmt_iterator *gsi)
267{
268 gimple stmt = gsi_stmt (*gsi);
269
270 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
271 return gsi_remove (gsi, true);
272
273 gimple_stmt_iterator prev = *gsi;
274 gsi_prev (&prev);
275 unsigned uid = gimple_uid (stmt);
276 basic_block bb = gimple_bb (stmt);
277 bool ret = gsi_remove (gsi, true);
278 if (!gsi_end_p (prev))
279 gsi_next (&prev);
280 else
281 prev = gsi_start_bb (bb);
282 gimple end_stmt = gsi_stmt (*gsi);
283 while ((stmt = gsi_stmt (prev)) != end_stmt)
284 {
285 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
286 gimple_set_uid (stmt, uid);
287 gsi_next (&prev);
288 }
289 return ret;
290}
291
292/* Bias amount for loop-carried phis. We want this to be larger than
293 the depth of any reassociation tree we can see, but not larger than
294 the rank difference between two blocks. */
295#define PHI_LOOP_BIAS (1 << 15)
296
297/* Rank assigned to a phi statement. If STMT is a loop-carried phi of
298 an innermost loop, and the phi has only a single use which is inside
299 the loop, then the rank is the block rank of the loop latch plus an
300 extra bias for the loop-carried dependence. This causes expressions
301 calculated into an accumulator variable to be independent for each
302 iteration of the loop. If STMT is some other phi, the rank is the
303 block rank of its containing block. */
304static long
305phi_rank (gimple stmt)
306{
307 basic_block bb = gimple_bb (stmt);
308 struct loop *father = bb->loop_father;
309 tree res;
310 unsigned i;
311 use_operand_p use;
312 gimple use_stmt;
313
314 /* We only care about real loops (those with a latch). */
315 if (!father->latch)
316 return bb_rank[bb->index];
317
318 /* Interesting phis must be in headers of innermost loops. */
319 if (bb != father->header
320 || father->inner)
321 return bb_rank[bb->index];
322
323 /* Ignore virtual SSA_NAMEs. */
324 res = gimple_phi_result (stmt);
325 if (virtual_operand_p (res))
326 return bb_rank[bb->index];
327
328 /* The phi definition must have a single use, and that use must be
329 within the loop. Otherwise this isn't an accumulator pattern. */
330 if (!single_imm_use (res, &use, &use_stmt)
331 || gimple_bb (use_stmt)->loop_father != father)
332 return bb_rank[bb->index];
333
334 /* Look for phi arguments from within the loop. If found, bias this phi. */
335 for (i = 0; i < gimple_phi_num_args (stmt); i++)
336 {
337 tree arg = gimple_phi_arg_def (stmt, i);
338 if (TREE_CODE (arg) == SSA_NAME
339 && !SSA_NAME_IS_DEFAULT_DEF (arg))
340 {
341 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
342 if (gimple_bb (def_stmt)->loop_father == father)
343 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
344 }
345 }
346
347 /* Must be an uninteresting phi. */
348 return bb_rank[bb->index];
349}
350
351/* If EXP is an SSA_NAME defined by a PHI statement that represents a
352 loop-carried dependence of an innermost loop, return TRUE; else
353 return FALSE. */
354static bool
355loop_carried_phi (tree exp)
356{
357 gimple phi_stmt;
358 long block_rank;
359
360 if (TREE_CODE (exp) != SSA_NAME
361 || SSA_NAME_IS_DEFAULT_DEF (exp))
362 return false;
363
364 phi_stmt = SSA_NAME_DEF_STMT (exp);
365
366 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
367 return false;
368
369 /* Non-loop-carried phis have block rank. Loop-carried phis have
370 an additional bias added in. If this phi doesn't have block rank,
371 it's biased and should not be propagated. */
372 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
373
374 if (phi_rank (phi_stmt) != block_rank)
375 return true;
376
377 return false;
378}
379
380/* Return the maximum of RANK and the rank that should be propagated
381 from expression OP. For most operands, this is just the rank of OP.
382 For loop-carried phis, the value is zero to avoid undoing the bias
383 in favor of the phi. */
384static long
385propagate_rank (long rank, tree op)
386{
387 long op_rank;
388
389 if (loop_carried_phi (op))
390 return rank;
391
392 op_rank = get_rank (op);
393
394 return MAX (rank, op_rank);
395}
396
397/* Look up the operand rank structure for expression E. */
398
399static inline long
400find_operand_rank (tree e)
401{
402 long *slot = operand_rank->get (e);
403 return slot ? *slot : -1;
404}
405
406/* Insert {E,RANK} into the operand rank hashtable. */
407
408static inline void
409insert_operand_rank (tree e, long rank)
410{
411 gcc_assert (rank > 0);
412 gcc_assert (!operand_rank->put (e, rank));
413}
414
415/* Given an expression E, return the rank of the expression. */
416
417static long
418get_rank (tree e)
419{
420 /* Constants have rank 0. */
421 if (is_gimple_min_invariant (e))
422 return 0;
423
424 /* SSA_NAME's have the rank of the expression they are the result
425 of.
426 For globals and uninitialized values, the rank is 0.
427 For function arguments, use the pre-setup rank.
428 For PHI nodes, stores, asm statements, etc, we use the rank of
429 the BB.
430 For simple operations, the rank is the maximum rank of any of
431 its operands, or the bb_rank, whichever is less.
432 I make no claims that this is optimal, however, it gives good
433 results. */
434
435 /* We make an exception to the normal ranking system to break
436 dependences of accumulator variables in loops. Suppose we
437 have a simple one-block loop containing:
438
439 x_1 = phi(x_0, x_2)
440 b = a + x_1
441 c = b + d
442 x_2 = c + e
443
444 As shown, each iteration of the calculation into x is fully
445 dependent upon the iteration before it. We would prefer to
446 see this in the form:
447
448 x_1 = phi(x_0, x_2)
449 b = a + d
450 c = b + e
451 x_2 = c + x_1
452
453 If the loop is unrolled, the calculations of b and c from
454 different iterations can be interleaved.
455
456 To obtain this result during reassociation, we bias the rank
457 of the phi definition x_1 upward, when it is recognized as an
458 accumulator pattern. The artificial rank causes it to be
459 added last, providing the desired independence. */
460
461 if (TREE_CODE (e) == SSA_NAME)
462 {
463 gimple stmt;
464 long rank;
465 int i, n;
466 tree op;
467
468 if (SSA_NAME_IS_DEFAULT_DEF (e))
469 return find_operand_rank (e);
470
471 stmt = SSA_NAME_DEF_STMT (e);
472 if (gimple_code (stmt) == GIMPLE_PHI)
473 return phi_rank (stmt);
474
475 if (!is_gimple_assign (stmt)
476 || gimple_vdef (stmt))
477 return bb_rank[gimple_bb (stmt)->index];
478
479 /* If we already have a rank for this expression, use that. */
480 rank = find_operand_rank (e);
481 if (rank != -1)
482 return rank;
483
484 /* Otherwise, find the maximum rank for the operands. As an
485 exception, remove the bias from loop-carried phis when propagating
486 the rank so that dependent operations are not also biased. */
487 rank = 0;
488 if (gimple_assign_single_p (stmt))
489 {
490 tree rhs = gimple_assign_rhs1 (stmt);
491 n = TREE_OPERAND_LENGTH (rhs);
492 if (n == 0)
493 rank = propagate_rank (rank, rhs);
494 else
495 {
496 for (i = 0; i < n; i++)
497 {
498 op = TREE_OPERAND (rhs, i);
499
500 if (op != NULL_TREE)
501 rank = propagate_rank (rank, op);
502 }
503 }
504 }
505 else
506 {
507 n = gimple_num_ops (stmt);
508 for (i = 1; i < n; i++)
509 {
510 op = gimple_op (stmt, i);
511 gcc_assert (op);
512 rank = propagate_rank (rank, op);
513 }
514 }
515
516 if (dump_file && (dump_flags & TDF_DETAILS))
517 {
518 fprintf (dump_file, "Rank for ");
519 print_generic_expr (dump_file, e, 0);
520 fprintf (dump_file, " is %ld\n", (rank + 1));
521 }
522
523 /* Note the rank in the hashtable so we don't recompute it. */
524 insert_operand_rank (e, (rank + 1));
525 return (rank + 1);
526 }
527
528 /* Globals, etc, are rank 0 */
529 return 0;
530}
531
532
533/* We want integer ones to end up last no matter what, since they are
534 the ones we can do the most with. */
535#define INTEGER_CONST_TYPE 1 << 3
536#define FLOAT_CONST_TYPE 1 << 2
537#define OTHER_CONST_TYPE 1 << 1
538
539/* Classify an invariant tree into integer, float, or other, so that
540 we can sort them to be near other constants of the same type. */
541static inline int
542constant_type (tree t)
543{
544 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
545 return INTEGER_CONST_TYPE;
546 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
547 return FLOAT_CONST_TYPE;
548 else
549 return OTHER_CONST_TYPE;
550}
551
552/* qsort comparison function to sort operand entries PA and PB by rank
553 so that the sorted array is ordered by rank in decreasing order. */
554static int
555sort_by_operand_rank (const void *pa, const void *pb)
556{
557 const operand_entry_t oea = *(const operand_entry_t *)pa;
558 const operand_entry_t oeb = *(const operand_entry_t *)pb;
559
560 /* It's nicer for optimize_expression if constants that are likely
561 to fold when added/multiplied//whatever are put next to each
562 other. Since all constants have rank 0, order them by type. */
563 if (oeb->rank == 0 && oea->rank == 0)
564 {
565 if (constant_type (oeb->op) != constant_type (oea->op))
566 return constant_type (oeb->op) - constant_type (oea->op);
567 else
568 /* To make sorting result stable, we use unique IDs to determine
569 order. */
570 return oeb->id - oea->id;
571 }
572
573 /* Lastly, make sure the versions that are the same go next to each
574 other. */
575 if ((oeb->rank - oea->rank == 0)
576 && TREE_CODE (oea->op) == SSA_NAME
577 && TREE_CODE (oeb->op) == SSA_NAME)
578 {
579 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
580 versions of removed SSA_NAMEs, so if possible, prefer to sort
581 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
582 See PR60418. */
583 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
584 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
585 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
586 {
587 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
588 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
589 basic_block bba = gimple_bb (stmta);
590 basic_block bbb = gimple_bb (stmtb);
591 if (bbb != bba)
592 {
593 if (bb_rank[bbb->index] != bb_rank[bba->index])
594 return bb_rank[bbb->index] - bb_rank[bba->index];
595 }
596 else
597 {
598 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
599 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
600 if (da != db)
601 return da ? 1 : -1;
602 }
603 }
604
605 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
606 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
607 else
608 return oeb->id - oea->id;
609 }
610
611 if (oeb->rank != oea->rank)
612 return oeb->rank - oea->rank;
613 else
614 return oeb->id - oea->id;
615}
616
617/* Add an operand entry to *OPS for the tree operand OP. */
618
619static void
620add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
621{
622 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
623
624 oe->op = op;
625 oe->rank = get_rank (op);
626 oe->id = next_operand_entry_id++;
627 oe->count = 1;
628 ops->safe_push (oe);
629}
630
631/* Add an operand entry to *OPS for the tree operand OP with repeat
632 count REPEAT. */
633
634static void
635add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
636 HOST_WIDE_INT repeat)
637{
638 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
639
640 oe->op = op;
641 oe->rank = get_rank (op);
642 oe->id = next_operand_entry_id++;
643 oe->count = repeat;
644 ops->safe_push (oe);
645
646 reassociate_stats.pows_encountered++;
647}
648
649/* Return true if STMT is reassociable operation containing a binary
650 operation with tree code CODE, and is inside LOOP. */
651
652static bool
653is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
654{
655 basic_block bb = gimple_bb (stmt);
656
657 if (gimple_bb (stmt) == NULL)
658 return false;
659
660 if (!flow_bb_inside_loop_p (loop, bb))
661 return false;
662
663 if (is_gimple_assign (stmt)
664 && gimple_assign_rhs_code (stmt) == code
665 && has_single_use (gimple_assign_lhs (stmt)))
666 return true;
667
668 return false;
669}
670
671
672/* Given NAME, if NAME is defined by a unary operation OPCODE, return the
673 operand of the negate operation. Otherwise, return NULL. */
674
675static tree
676get_unary_op (tree name, enum tree_code opcode)
677{
678 gimple stmt = SSA_NAME_DEF_STMT (name);
679
680 if (!is_gimple_assign (stmt))
681 return NULL_TREE;
682
683 if (gimple_assign_rhs_code (stmt) == opcode)
684 return gimple_assign_rhs1 (stmt);
685 return NULL_TREE;
686}
687
688/* If CURR and LAST are a pair of ops that OPCODE allows us to
689 eliminate through equivalences, do so, remove them from OPS, and
690 return true. Otherwise, return false. */
691
692static bool
693eliminate_duplicate_pair (enum tree_code opcode,
694 vec<operand_entry_t> *ops,
695 bool *all_done,
696 unsigned int i,
697 operand_entry_t curr,
698 operand_entry_t last)
699{
700
701 /* If we have two of the same op, and the opcode is & |, min, or max,
702 we can eliminate one of them.
703 If we have two of the same op, and the opcode is ^, we can
704 eliminate both of them. */
705
706 if (last && last->op == curr->op)
707 {
708 switch (opcode)
709 {
710 case MAX_EXPR:
711 case MIN_EXPR:
712 case BIT_IOR_EXPR:
713 case BIT_AND_EXPR:
714 if (dump_file && (dump_flags & TDF_DETAILS))
715 {
716 fprintf (dump_file, "Equivalence: ");
717 print_generic_expr (dump_file, curr->op, 0);
718 fprintf (dump_file, " [&|minmax] ");
719 print_generic_expr (dump_file, last->op, 0);
720 fprintf (dump_file, " -> ");
721 print_generic_stmt (dump_file, last->op, 0);
722 }
723
724 ops->ordered_remove (i);
725 reassociate_stats.ops_eliminated ++;
726
727 return true;
728
729 case BIT_XOR_EXPR:
730 if (dump_file && (dump_flags & TDF_DETAILS))
731 {
732 fprintf (dump_file, "Equivalence: ");
733 print_generic_expr (dump_file, curr->op, 0);
734 fprintf (dump_file, " ^ ");
735 print_generic_expr (dump_file, last->op, 0);
736 fprintf (dump_file, " -> nothing\n");
737 }
738
739 reassociate_stats.ops_eliminated += 2;
740
741 if (ops->length () == 2)
742 {
743 ops->create (0);
744 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
745 *all_done = true;
746 }
747 else
748 {
749 ops->ordered_remove (i-1);
750 ops->ordered_remove (i-1);
751 }
752
753 return true;
754
755 default:
756 break;
757 }
758 }
759 return false;
760}
761
762static vec<tree> plus_negates;
763
764/* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
765 expression, look in OPS for a corresponding positive operation to cancel
766 it out. If we find one, remove the other from OPS, replace
767 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
768 return false. */
769
770static bool
771eliminate_plus_minus_pair (enum tree_code opcode,
772 vec<operand_entry_t> *ops,
773 unsigned int currindex,
774 operand_entry_t curr)
775{
776 tree negateop;
777 tree notop;
778 unsigned int i;
779 operand_entry_t oe;
780
781 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
782 return false;
783
784 negateop = get_unary_op (curr->op, NEGATE_EXPR);
785 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
786 if (negateop == NULL_TREE && notop == NULL_TREE)
787 return false;
788
789 /* Any non-negated version will have a rank that is one less than
790 the current rank. So once we hit those ranks, if we don't find
791 one, we can stop. */
792
793 for (i = currindex + 1;
794 ops->iterate (i, &oe)
795 && oe->rank >= curr->rank - 1 ;
796 i++)
797 {
798 if (oe->op == negateop)
799 {
800
801 if (dump_file && (dump_flags & TDF_DETAILS))
802 {
803 fprintf (dump_file, "Equivalence: ");
804 print_generic_expr (dump_file, negateop, 0);
805 fprintf (dump_file, " + -");
806 print_generic_expr (dump_file, oe->op, 0);
807 fprintf (dump_file, " -> 0\n");
808 }
809
810 ops->ordered_remove (i);
811 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
812 ops->ordered_remove (currindex);
813 reassociate_stats.ops_eliminated ++;
814
815 return true;
816 }
817 else if (oe->op == notop)
818 {
819 tree op_type = TREE_TYPE (oe->op);
820
821 if (dump_file && (dump_flags & TDF_DETAILS))
822 {
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, notop, 0);
825 fprintf (dump_file, " + ~");
826 print_generic_expr (dump_file, oe->op, 0);
827 fprintf (dump_file, " -> -1\n");
828 }
829
830 ops->ordered_remove (i);
831 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
832 ops->ordered_remove (currindex);
833 reassociate_stats.ops_eliminated ++;
834
835 return true;
836 }
837 }
838
839 /* CURR->OP is a negate expr in a plus expr: save it for later
840 inspection in repropagate_negates(). */
841 if (negateop != NULL_TREE)
842 plus_negates.safe_push (curr->op);
843
844 return false;
845}
846
847/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
848 bitwise not expression, look in OPS for a corresponding operand to
849 cancel it out. If we find one, remove the other from OPS, replace
850 OPS[CURRINDEX] with 0, and return true. Otherwise, return
851 false. */
852
853static bool
854eliminate_not_pairs (enum tree_code opcode,
855 vec<operand_entry_t> *ops,
856 unsigned int currindex,
857 operand_entry_t curr)
858{
859 tree notop;
860 unsigned int i;
861 operand_entry_t oe;
862
863 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
864 || TREE_CODE (curr->op) != SSA_NAME)
865 return false;
866
867 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
868 if (notop == NULL_TREE)
869 return false;
870
871 /* Any non-not version will have a rank that is one less than
872 the current rank. So once we hit those ranks, if we don't find
873 one, we can stop. */
874
875 for (i = currindex + 1;
876 ops->iterate (i, &oe)
877 && oe->rank >= curr->rank - 1;
878 i++)
879 {
880 if (oe->op == notop)
881 {
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 {
884 fprintf (dump_file, "Equivalence: ");
885 print_generic_expr (dump_file, notop, 0);
886 if (opcode == BIT_AND_EXPR)
887 fprintf (dump_file, " & ~");
888 else if (opcode == BIT_IOR_EXPR)
889 fprintf (dump_file, " | ~");
890 print_generic_expr (dump_file, oe->op, 0);
891 if (opcode == BIT_AND_EXPR)
892 fprintf (dump_file, " -> 0\n");
893 else if (opcode == BIT_IOR_EXPR)
894 fprintf (dump_file, " -> -1\n");
895 }
896
897 if (opcode == BIT_AND_EXPR)
898 oe->op = build_zero_cst (TREE_TYPE (oe->op));
899 else if (opcode == BIT_IOR_EXPR)
900 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
901
902 reassociate_stats.ops_eliminated += ops->length () - 1;
903 ops->truncate (0);
904 ops->quick_push (oe);
905 return true;
906 }
907 }
908
909 return false;
910}
911
912/* Use constant value that may be present in OPS to try to eliminate
913 operands. Note that this function is only really used when we've
914 eliminated ops for other reasons, or merged constants. Across
915 single statements, fold already does all of this, plus more. There
916 is little point in duplicating logic, so I've only included the
917 identities that I could ever construct testcases to trigger. */
918
919static void
920eliminate_using_constants (enum tree_code opcode,
921 vec<operand_entry_t> *ops)
922{
923 operand_entry_t oelast = ops->last ();
924 tree type = TREE_TYPE (oelast->op);
925
926 if (oelast->rank == 0
927 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
928 {
929 switch (opcode)
930 {
931 case BIT_AND_EXPR:
932 if (integer_zerop (oelast->op))
933 {
934 if (ops->length () != 1)
935 {
936 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, "Found & 0, removing all other ops\n");
938
939 reassociate_stats.ops_eliminated += ops->length () - 1;
940
941 ops->truncate (0);
942 ops->quick_push (oelast);
943 return;
944 }
945 }
946 else if (integer_all_onesp (oelast->op))
947 {
948 if (ops->length () != 1)
949 {
950 if (dump_file && (dump_flags & TDF_DETAILS))
951 fprintf (dump_file, "Found & -1, removing\n");
952 ops->pop ();
953 reassociate_stats.ops_eliminated++;
954 }
955 }
956 break;
957 case BIT_IOR_EXPR:
958 if (integer_all_onesp (oelast->op))
959 {
960 if (ops->length () != 1)
961 {
962 if (dump_file && (dump_flags & TDF_DETAILS))
963 fprintf (dump_file, "Found | -1, removing all other ops\n");
964
965 reassociate_stats.ops_eliminated += ops->length () - 1;
966
967 ops->truncate (0);
968 ops->quick_push (oelast);
969 return;
970 }
971 }
972 else if (integer_zerop (oelast->op))
973 {
974 if (ops->length () != 1)
975 {
976 if (dump_file && (dump_flags & TDF_DETAILS))
977 fprintf (dump_file, "Found | 0, removing\n");
978 ops->pop ();
979 reassociate_stats.ops_eliminated++;
980 }
981 }
982 break;
983 case MULT_EXPR:
984 if (integer_zerop (oelast->op)
985 || (FLOAT_TYPE_P (type)
986 && !HONOR_NANS (type)
987 && !HONOR_SIGNED_ZEROS (type)
988 && real_zerop (oelast->op)))
989 {
990 if (ops->length () != 1)
991 {
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 fprintf (dump_file, "Found * 0, removing all other ops\n");
994
995 reassociate_stats.ops_eliminated += ops->length () - 1;
996 ops->truncate (1);
997 ops->quick_push (oelast);
998 return;
999 }
1000 }
1001 else if (integer_onep (oelast->op)
1002 || (FLOAT_TYPE_P (type)
1003 && !HONOR_SNANS (type)
1004 && real_onep (oelast->op)))
1005 {
1006 if (ops->length () != 1)
1007 {
1008 if (dump_file && (dump_flags & TDF_DETAILS))
1009 fprintf (dump_file, "Found * 1, removing\n");
1010 ops->pop ();
1011 reassociate_stats.ops_eliminated++;
1012 return;
1013 }
1014 }
1015 break;
1016 case BIT_XOR_EXPR:
1017 case PLUS_EXPR:
1018 case MINUS_EXPR:
1019 if (integer_zerop (oelast->op)
1020 || (FLOAT_TYPE_P (type)
1021 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1022 && fold_real_zero_addition_p (type, oelast->op,
1023 opcode == MINUS_EXPR)))
1024 {
1025 if (ops->length () != 1)
1026 {
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 fprintf (dump_file, "Found [|^+] 0, removing\n");
1029 ops->pop ();
1030 reassociate_stats.ops_eliminated++;
1031 return;
1032 }
1033 }
1034 break;
1035 default:
1036 break;
1037 }
1038 }
1039}
1040
1041
1042static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1043 bool, bool);
1044
1045/* Structure for tracking and counting operands. */
1046typedef struct oecount_s {
1047 int cnt;
1048 int id;
1049 enum tree_code oecode;
1050 tree op;
1051} oecount;
1052
1053
1054/* The heap for the oecount hashtable and the sorted list of operands. */
1055static vec<oecount> cvec;
1056
1057
1058/* Oecount hashtable helpers. */
1059
1060struct oecount_hasher
1061{
1062 typedef int value_type;
1063 typedef int compare_type;
1064 typedef int store_values_directly;
1065 static inline hashval_t hash (const value_type &);
1066 static inline bool equal (const value_type &, const compare_type &);
1067 static bool is_deleted (int &v) { return v == 1; }
1068 static void mark_deleted (int &e) { e = 1; }
1069 static bool is_empty (int &v) { return v == 0; }
1070 static void mark_empty (int &e) { e = 0; }
1071 static void remove (int &) {}
1072};
1073
1074/* Hash function for oecount. */
1075
1076inline hashval_t
1077oecount_hasher::hash (const value_type &p)
1078{
1079 const oecount *c = &cvec[p - 42];
1080 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1081}
1082
1083/* Comparison function for oecount. */
1084
1085inline bool
1086oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1087{
1088 const oecount *c1 = &cvec[p1 - 42];
1089 const oecount *c2 = &cvec[p2 - 42];
1090 return (c1->oecode == c2->oecode
1091 && c1->op == c2->op);
1092}
1093
1094/* Comparison function for qsort sorting oecount elements by count. */
1095
1096static int
1097oecount_cmp (const void *p1, const void *p2)
1098{
1099 const oecount *c1 = (const oecount *)p1;
1100 const oecount *c2 = (const oecount *)p2;
1101 if (c1->cnt != c2->cnt)
1102 return c1->cnt - c2->cnt;
1103 else
1104 /* If counts are identical, use unique IDs to stabilize qsort. */
1105 return c1->id - c2->id;
1106}
1107
1108/* Return TRUE iff STMT represents a builtin call that raises OP
1109 to some exponent. */
1110
1111static bool
1112stmt_is_power_of_op (gimple stmt, tree op)
1113{
1114 tree fndecl;
1115
1116 if (!is_gimple_call (stmt))
1117 return false;
1118
1119 fndecl = gimple_call_fndecl (stmt);
1120
1121 if (!fndecl
1122 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1123 return false;
1124
1125 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1126 {
1127 CASE_FLT_FN (BUILT_IN_POW):
1128 CASE_FLT_FN (BUILT_IN_POWI):
1129 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1130
1131 default:
1132 return false;
1133 }
1134}
1135
1136/* Given STMT which is a __builtin_pow* call, decrement its exponent
1137 in place and return the result. Assumes that stmt_is_power_of_op
1138 was previously called for STMT and returned TRUE. */
1139
1140static HOST_WIDE_INT
1141decrement_power (gimple stmt)
1142{
1143 REAL_VALUE_TYPE c, cint;
1144 HOST_WIDE_INT power;
1145 tree arg1;
1146
1147 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1148 {
1149 CASE_FLT_FN (BUILT_IN_POW):
1150 arg1 = gimple_call_arg (stmt, 1);
1151 c = TREE_REAL_CST (arg1);
1152 power = real_to_integer (&c) - 1;
1153 real_from_integer (&cint, VOIDmode, power, SIGNED);
1154 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1155 return power;
1156
1157 CASE_FLT_FN (BUILT_IN_POWI):
1158 arg1 = gimple_call_arg (stmt, 1);
1159 power = TREE_INT_CST_LOW (arg1) - 1;
1160 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1161 return power;
1162
1163 default:
1164 gcc_unreachable ();
1165 }
1166}
1167
1168/* Find the single immediate use of STMT's LHS, and replace it
1169 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1170 replace *DEF with OP as well. */
1171
1172static void
1173propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1174{
1175 tree lhs;
1176 gimple use_stmt;
1177 use_operand_p use;
1178 gimple_stmt_iterator gsi;
1179
1180 if (is_gimple_call (stmt))
1181 lhs = gimple_call_lhs (stmt);
1182 else
1183 lhs = gimple_assign_lhs (stmt);
1184
1185 gcc_assert (has_single_use (lhs));
1186 single_imm_use (lhs, &use, &use_stmt);
1187 if (lhs == *def)
1188 *def = op;
1189 SET_USE (use, op);
1190 if (TREE_CODE (op) != SSA_NAME)
1191 update_stmt (use_stmt);
1192 gsi = gsi_for_stmt (stmt);
1193 unlink_stmt_vdef (stmt);
1194 reassoc_remove_stmt (&gsi);
1195 release_defs (stmt);
1196}
1197
1198/* Walks the linear chain with result *DEF searching for an operation
1199 with operand OP and code OPCODE removing that from the chain. *DEF
1200 is updated if there is only one operand but no operation left. */
1201
1202static void
1203zero_one_operation (tree *def, enum tree_code opcode, tree op)
1204{
1205 gimple stmt = SSA_NAME_DEF_STMT (*def);
1206
1207 do
1208 {
1209 tree name;
1210
1211 if (opcode == MULT_EXPR
1212 && stmt_is_power_of_op (stmt, op))
1213 {
1214 if (decrement_power (stmt) == 1)
1215 propagate_op_to_single_use (op, stmt, def);
1216 return;
1217 }
1218
1219 name = gimple_assign_rhs1 (stmt);
1220
1221 /* If this is the operation we look for and one of the operands
1222 is ours simply propagate the other operand into the stmts
1223 single use. */
1224 if (gimple_assign_rhs_code (stmt) == opcode
1225 && (name == op
1226 || gimple_assign_rhs2 (stmt) == op))
1227 {
1228 if (name == op)
1229 name = gimple_assign_rhs2 (stmt);
1230 propagate_op_to_single_use (name, stmt, def);
1231 return;
1232 }
1233
1234 /* We might have a multiply of two __builtin_pow* calls, and
1235 the operand might be hiding in the rightmost one. */
1236 if (opcode == MULT_EXPR
1237 && gimple_assign_rhs_code (stmt) == opcode
1238 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1239 && has_single_use (gimple_assign_rhs2 (stmt)))
1240 {
1241 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1242 if (stmt_is_power_of_op (stmt2, op))
1243 {
1244 if (decrement_power (stmt2) == 1)
1245 propagate_op_to_single_use (op, stmt2, def);
1246 return;
1247 }
1248 }
1249
1250 /* Continue walking the chain. */
1251 gcc_assert (name != op
1252 && TREE_CODE (name) == SSA_NAME);
1253 stmt = SSA_NAME_DEF_STMT (name);
1254 }
1255 while (1);
1256}
1257
1258/* Returns true if statement S1 dominates statement S2. Like
1259 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1260
1261static bool
1262reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1263{
1264 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1265
1266 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1267 SSA_NAME. Assume it lives at the beginning of function and
1268 thus dominates everything. */
1269 if (!bb1 || s1 == s2)
1270 return true;
1271
1272 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1273 if (!bb2)
1274 return false;
1275
1276 if (bb1 == bb2)
1277 {
1278 /* PHIs in the same basic block are assumed to be
1279 executed all in parallel, if only one stmt is a PHI,
1280 it dominates the other stmt in the same basic block. */
1281 if (gimple_code (s1) == GIMPLE_PHI)
1282 return true;
1283
1284 if (gimple_code (s2) == GIMPLE_PHI)
1285 return false;
1286
1287 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1288
1289 if (gimple_uid (s1) < gimple_uid (s2))
1290 return true;
1291
1292 if (gimple_uid (s1) > gimple_uid (s2))
1293 return false;
1294
1295 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1296 unsigned int uid = gimple_uid (s1);
1297 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1298 {
1299 gimple s = gsi_stmt (gsi);
1300 if (gimple_uid (s) != uid)
1301 break;
1302 if (s == s2)
1303 return true;
1304 }
1305
1306 return false;
1307 }
1308
1309 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1310}
1311
1312/* Insert STMT after INSERT_POINT. */
1313
1314static void
1315insert_stmt_after (gimple stmt, gimple insert_point)
1316{
1317 gimple_stmt_iterator gsi;
1318 basic_block bb;
1319
1320 if (gimple_code (insert_point) == GIMPLE_PHI)
1321 bb = gimple_bb (insert_point);
1322 else if (!stmt_ends_bb_p (insert_point))
1323 {
1324 gsi = gsi_for_stmt (insert_point);
1325 gimple_set_uid (stmt, gimple_uid (insert_point));
1326 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1327 return;
1328 }
1329 else
1330 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1331 thus if it must end a basic block, it should be a call that can
1332 throw, or some assignment that can throw. If it throws, the LHS
1333 of it will not be initialized though, so only valid places using
1334 the SSA_NAME should be dominated by the fallthru edge. */
1335 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1336 gsi = gsi_after_labels (bb);
1337 if (gsi_end_p (gsi))
1338 {
1339 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1340 gimple_set_uid (stmt,
1341 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1342 }
1343 else
1344 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1345 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1346}
1347
1348/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1349 the result. Places the statement after the definition of either
1350 OP1 or OP2. Returns the new statement. */
1351
1352static gimple
1353build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1354{
1355 gimple op1def = NULL, op2def = NULL;
1356 gimple_stmt_iterator gsi;
1357 tree op;
1358 gassign *sum;
1359
1360 /* Create the addition statement. */
1361 op = make_ssa_name (type);
1362 sum = gimple_build_assign (op, opcode, op1, op2);
1363
1364 /* Find an insertion place and insert. */
1365 if (TREE_CODE (op1) == SSA_NAME)
1366 op1def = SSA_NAME_DEF_STMT (op1);
1367 if (TREE_CODE (op2) == SSA_NAME)
1368 op2def = SSA_NAME_DEF_STMT (op2);
1369 if ((!op1def || gimple_nop_p (op1def))
1370 && (!op2def || gimple_nop_p (op2def)))
1371 {
1372 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1373 if (gsi_end_p (gsi))
1374 {
1375 gimple_stmt_iterator gsi2
1376 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1377 gimple_set_uid (sum,
1378 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1379 }
1380 else
1381 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1382 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1383 }
1384 else
1385 {
1386 gimple insert_point;
1387 if ((!op1def || gimple_nop_p (op1def))
1388 || (op2def && !gimple_nop_p (op2def)
1389 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1390 insert_point = op2def;
1391 else
1392 insert_point = op1def;
1393 insert_stmt_after (sum, insert_point);
1394 }
1395 update_stmt (sum);
1396
1397 return sum;
1398}
1399
1400/* Perform un-distribution of divisions and multiplications.
1401 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1402 to (A + B) / X for real X.
1403
1404 The algorithm is organized as follows.
1405
1406 - First we walk the addition chain *OPS looking for summands that
1407 are defined by a multiplication or a real division. This results
1408 in the candidates bitmap with relevant indices into *OPS.
1409
1410 - Second we build the chains of multiplications or divisions for
1411 these candidates, counting the number of occurrences of (operand, code)
1412 pairs in all of the candidates chains.
1413
1414 - Third we sort the (operand, code) pairs by number of occurrence and
1415 process them starting with the pair with the most uses.
1416
1417 * For each such pair we walk the candidates again to build a
1418 second candidate bitmap noting all multiplication/division chains
1419 that have at least one occurrence of (operand, code).
1420
1421 * We build an alternate addition chain only covering these
1422 candidates with one (operand, code) operation removed from their
1423 multiplication/division chain.
1424
1425 * The first candidate gets replaced by the alternate addition chain
1426 multiplied/divided by the operand.
1427
1428 * All candidate chains get disabled for further processing and
1429 processing of (operand, code) pairs continues.
1430
1431 The alternate addition chains built are re-processed by the main
1432 reassociation algorithm which allows optimizing a * x * y + b * y * x
1433 to (a + b ) * x * y in one invocation of the reassociation pass. */
1434
1435static bool
1436undistribute_ops_list (enum tree_code opcode,
1437 vec<operand_entry_t> *ops, struct loop *loop)
1438{
1439 unsigned int length = ops->length ();
1440 operand_entry_t oe1;
1441 unsigned i, j;
1442 sbitmap candidates, candidates2;
1443 unsigned nr_candidates, nr_candidates2;
1444 sbitmap_iterator sbi0;
1445 vec<operand_entry_t> *subops;
1446 bool changed = false;
1447 int next_oecount_id = 0;
1448
1449 if (length <= 1
1450 || opcode != PLUS_EXPR)
1451 return false;
1452
1453 /* Build a list of candidates to process. */
1454 candidates = sbitmap_alloc (length);
1455 bitmap_clear (candidates);
1456 nr_candidates = 0;
1457 FOR_EACH_VEC_ELT (*ops, i, oe1)
1458 {
1459 enum tree_code dcode;
1460 gimple oe1def;
1461
1462 if (TREE_CODE (oe1->op) != SSA_NAME)
1463 continue;
1464 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1465 if (!is_gimple_assign (oe1def))
1466 continue;
1467 dcode = gimple_assign_rhs_code (oe1def);
1468 if ((dcode != MULT_EXPR
1469 && dcode != RDIV_EXPR)
1470 || !is_reassociable_op (oe1def, dcode, loop))
1471 continue;
1472
1473 bitmap_set_bit (candidates, i);
1474 nr_candidates++;
1475 }
1476
1477 if (nr_candidates < 2)
1478 {
1479 sbitmap_free (candidates);
1480 return false;
1481 }
1482
1483 if (dump_file && (dump_flags & TDF_DETAILS))
1484 {
1485 fprintf (dump_file, "searching for un-distribute opportunities ");
1486 print_generic_expr (dump_file,
1487 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1488 fprintf (dump_file, " %d\n", nr_candidates);
1489 }
1490
1491 /* Build linearized sub-operand lists and the counting table. */
1492 cvec.create (0);
1493
1494 hash_table<oecount_hasher> ctable (15);
1495
1496 /* ??? Macro arguments cannot have multi-argument template types in
1497 them. This typedef is needed to workaround that limitation. */
1498 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1499 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1500 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1501 {
1502 gimple oedef;
1503 enum tree_code oecode;
1504 unsigned j;
1505
1506 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1507 oecode = gimple_assign_rhs_code (oedef);
1508 linearize_expr_tree (&subops[i], oedef,
1509 associative_tree_code (oecode), false);
1510
1511 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1512 {
1513 oecount c;
1514 int *slot;
1515 int idx;
1516 c.oecode = oecode;
1517 c.cnt = 1;
1518 c.id = next_oecount_id++;
1519 c.op = oe1->op;
1520 cvec.safe_push (c);
1521 idx = cvec.length () + 41;
1522 slot = ctable.find_slot (idx, INSERT);
1523 if (!*slot)
1524 {
1525 *slot = idx;
1526 }
1527 else
1528 {
1529 cvec.pop ();
1530 cvec[*slot - 42].cnt++;
1531 }
1532 }
1533 }
1534
1535 /* Sort the counting table. */
1536 cvec.qsort (oecount_cmp);
1537
1538 if (dump_file && (dump_flags & TDF_DETAILS))
1539 {
1540 oecount *c;
1541 fprintf (dump_file, "Candidates:\n");
1542 FOR_EACH_VEC_ELT (cvec, j, c)
1543 {
1544 fprintf (dump_file, " %u %s: ", c->cnt,
1545 c->oecode == MULT_EXPR
1546 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1547 print_generic_expr (dump_file, c->op, 0);
1548 fprintf (dump_file, "\n");
1549 }
1550 }
1551
1552 /* Process the (operand, code) pairs in order of most occurrence. */
1553 candidates2 = sbitmap_alloc (length);
1554 while (!cvec.is_empty ())
1555 {
1556 oecount *c = &cvec.last ();
1557 if (c->cnt < 2)
1558 break;
1559
1560 /* Now collect the operands in the outer chain that contain
1561 the common operand in their inner chain. */
1562 bitmap_clear (candidates2);
1563 nr_candidates2 = 0;
1564 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1565 {
1566 gimple oedef;
1567 enum tree_code oecode;
1568 unsigned j;
1569 tree op = (*ops)[i]->op;
1570
1571 /* If we undistributed in this chain already this may be
1572 a constant. */
1573 if (TREE_CODE (op) != SSA_NAME)
1574 continue;
1575
1576 oedef = SSA_NAME_DEF_STMT (op);
1577 oecode = gimple_assign_rhs_code (oedef);
1578 if (oecode != c->oecode)
1579 continue;
1580
1581 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1582 {
1583 if (oe1->op == c->op)
1584 {
1585 bitmap_set_bit (candidates2, i);
1586 ++nr_candidates2;
1587 break;
1588 }
1589 }
1590 }
1591
1592 if (nr_candidates2 >= 2)
1593 {
1594 operand_entry_t oe1, oe2;
1595 gimple prod;
1596 int first = bitmap_first_set_bit (candidates2);
1597
1598 /* Build the new addition chain. */
1599 oe1 = (*ops)[first];
1600 if (dump_file && (dump_flags & TDF_DETAILS))
1601 {
1602 fprintf (dump_file, "Building (");
1603 print_generic_expr (dump_file, oe1->op, 0);
1604 }
1605 zero_one_operation (&oe1->op, c->oecode, c->op);
1606 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1607 {
1608 gimple sum;
1609 oe2 = (*ops)[i];
1610 if (dump_file && (dump_flags & TDF_DETAILS))
1611 {
1612 fprintf (dump_file, " + ");
1613 print_generic_expr (dump_file, oe2->op, 0);
1614 }
1615 zero_one_operation (&oe2->op, c->oecode, c->op);
1616 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1617 oe1->op, oe2->op, opcode);
1618 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1619 oe2->rank = 0;
1620 oe1->op = gimple_get_lhs (sum);
1621 }
1622
1623 /* Apply the multiplication/division. */
1624 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1625 oe1->op, c->op, c->oecode);
1626 if (dump_file && (dump_flags & TDF_DETAILS))
1627 {
1628 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1629 print_generic_expr (dump_file, c->op, 0);
1630 fprintf (dump_file, "\n");
1631 }
1632
1633 /* Record it in the addition chain and disable further
1634 undistribution with this op. */
1635 oe1->op = gimple_assign_lhs (prod);
1636 oe1->rank = get_rank (oe1->op);
1637 subops[first].release ();
1638
1639 changed = true;
1640 }
1641
1642 cvec.pop ();
1643 }
1644
1645 for (i = 0; i < ops->length (); ++i)
1646 subops[i].release ();
1647 free (subops);
1648 cvec.release ();
1649 sbitmap_free (candidates);
1650 sbitmap_free (candidates2);
1651
1652 return changed;
1653}
1654
1655/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1656 expression, examine the other OPS to see if any of them are comparisons
1657 of the same values, which we may be able to combine or eliminate.
1658 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1659
1660static bool
1661eliminate_redundant_comparison (enum tree_code opcode,
1662 vec<operand_entry_t> *ops,
1663 unsigned int currindex,
1664 operand_entry_t curr)
1665{
1666 tree op1, op2;
1667 enum tree_code lcode, rcode;
1668 gimple def1, def2;
1669 int i;
1670 operand_entry_t oe;
1671
1672 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1673 return false;
1674
1675 /* Check that CURR is a comparison. */
1676 if (TREE_CODE (curr->op) != SSA_NAME)
1677 return false;
1678 def1 = SSA_NAME_DEF_STMT (curr->op);
1679 if (!is_gimple_assign (def1))
1680 return false;
1681 lcode = gimple_assign_rhs_code (def1);
1682 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1683 return false;
1684 op1 = gimple_assign_rhs1 (def1);
1685 op2 = gimple_assign_rhs2 (def1);
1686
1687 /* Now look for a similar comparison in the remaining OPS. */
1688 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1689 {
1690 tree t;
1691
1692 if (TREE_CODE (oe->op) != SSA_NAME)
1693 continue;
1694 def2 = SSA_NAME_DEF_STMT (oe->op);
1695 if (!is_gimple_assign (def2))
1696 continue;
1697 rcode = gimple_assign_rhs_code (def2);
1698 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1699 continue;
1700
1701 /* If we got here, we have a match. See if we can combine the
1702 two comparisons. */
1703 if (opcode == BIT_IOR_EXPR)
1704 t = maybe_fold_or_comparisons (lcode, op1, op2,
1705 rcode, gimple_assign_rhs1 (def2),
1706 gimple_assign_rhs2 (def2));
1707 else
1708 t = maybe_fold_and_comparisons (lcode, op1, op2,
1709 rcode, gimple_assign_rhs1 (def2),
1710 gimple_assign_rhs2 (def2));
1711 if (!t)
1712 continue;
1713
1714 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1715 always give us a boolean_type_node value back. If the original
1716 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1717 we need to convert. */
1718 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1719 t = fold_convert (TREE_TYPE (curr->op), t);
1720
1721 if (TREE_CODE (t) != INTEGER_CST
1722 && !operand_equal_p (t, curr->op, 0))
1723 {
1724 enum tree_code subcode;
1725 tree newop1, newop2;
1726 if (!COMPARISON_CLASS_P (t))
1727 continue;
1728 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1729 STRIP_USELESS_TYPE_CONVERSION (newop1);
1730 STRIP_USELESS_TYPE_CONVERSION (newop2);
1731 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1732 continue;
1733 }
1734
1735 if (dump_file && (dump_flags & TDF_DETAILS))
1736 {
1737 fprintf (dump_file, "Equivalence: ");
1738 print_generic_expr (dump_file, curr->op, 0);
1739 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1740 print_generic_expr (dump_file, oe->op, 0);
1741 fprintf (dump_file, " -> ");
1742 print_generic_expr (dump_file, t, 0);
1743 fprintf (dump_file, "\n");
1744 }
1745
1746 /* Now we can delete oe, as it has been subsumed by the new combined
1747 expression t. */
1748 ops->ordered_remove (i);
1749 reassociate_stats.ops_eliminated ++;
1750
1751 /* If t is the same as curr->op, we're done. Otherwise we must
1752 replace curr->op with t. Special case is if we got a constant
1753 back, in which case we add it to the end instead of in place of
1754 the current entry. */
1755 if (TREE_CODE (t) == INTEGER_CST)
1756 {
1757 ops->ordered_remove (currindex);
1758 add_to_ops_vec (ops, t);
1759 }
1760 else if (!operand_equal_p (t, curr->op, 0))
1761 {
1762 gimple sum;
1763 enum tree_code subcode;
1764 tree newop1;
1765 tree newop2;
1766 gcc_assert (COMPARISON_CLASS_P (t));
1767 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1768 STRIP_USELESS_TYPE_CONVERSION (newop1);
1769 STRIP_USELESS_TYPE_CONVERSION (newop2);
1770 gcc_checking_assert (is_gimple_val (newop1)
1771 && is_gimple_val (newop2));
1772 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1773 curr->op = gimple_get_lhs (sum);
1774 }
1775 return true;
1776 }
1777
1778 return false;
1779}
1780
1781/* Perform various identities and other optimizations on the list of
1782 operand entries, stored in OPS. The tree code for the binary
1783 operation between all the operands is OPCODE. */
1784
1785static void
1786optimize_ops_list (enum tree_code opcode,
1787 vec<operand_entry_t> *ops)
1788{
1789 unsigned int length = ops->length ();
1790 unsigned int i;
1791 operand_entry_t oe;
1792 operand_entry_t oelast = NULL;
1793 bool iterate = false;
1794
1795 if (length == 1)
1796 return;
1797
1798 oelast = ops->last ();
1799
1800 /* If the last two are constants, pop the constants off, merge them
1801 and try the next two. */
1802 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1803 {
1804 operand_entry_t oelm1 = (*ops)[length - 2];
1805
1806 if (oelm1->rank == 0
1807 && is_gimple_min_invariant (oelm1->op)
1808 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1809 TREE_TYPE (oelast->op)))
1810 {
1811 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1812 oelm1->op, oelast->op);
1813
1814 if (folded && is_gimple_min_invariant (folded))
1815 {
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1817 fprintf (dump_file, "Merging constants\n");
1818
1819 ops->pop ();
1820 ops->pop ();
1821
1822 add_to_ops_vec (ops, folded);
1823 reassociate_stats.constants_eliminated++;
1824
1825 optimize_ops_list (opcode, ops);
1826 return;
1827 }
1828 }
1829 }
1830
1831 eliminate_using_constants (opcode, ops);
1832 oelast = NULL;
1833
1834 for (i = 0; ops->iterate (i, &oe);)
1835 {
1836 bool done = false;
1837
1838 if (eliminate_not_pairs (opcode, ops, i, oe))
1839 return;
1840 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1841 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1842 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1843 {
1844 if (done)
1845 return;
1846 iterate = true;
1847 oelast = NULL;
1848 continue;
1849 }
1850 oelast = oe;
1851 i++;
1852 }
1853
1854 length = ops->length ();
1855 oelast = ops->last ();
1856
1857 if (iterate)
1858 optimize_ops_list (opcode, ops);
1859}
1860
1861/* The following functions are subroutines to optimize_range_tests and allow
1862 it to try to change a logical combination of comparisons into a range
1863 test.
1864
1865 For example, both
1866 X == 2 || X == 5 || X == 3 || X == 4
1867 and
1868 X >= 2 && X <= 5
1869 are converted to
1870 (unsigned) (X - 2) <= 3
1871
1872 For more information see comments above fold_test_range in fold-const.c,
1873 this implementation is for GIMPLE. */
1874
1875struct range_entry
1876{
1877 tree exp;
1878 tree low;
1879 tree high;
1880 bool in_p;
1881 bool strict_overflow_p;
1882 unsigned int idx, next;
1883};
1884
1885/* This is similar to make_range in fold-const.c, but on top of
1886 GIMPLE instead of trees. If EXP is non-NULL, it should be
1887 an SSA_NAME and STMT argument is ignored, otherwise STMT
1888 argument should be a GIMPLE_COND. */
1889
1890static void
1891init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1892{
1893 int in_p;
1894 tree low, high;
1895 bool is_bool, strict_overflow_p;
1896
1897 r->exp = NULL_TREE;
1898 r->in_p = false;
1899 r->strict_overflow_p = false;
1900 r->low = NULL_TREE;
1901 r->high = NULL_TREE;
1902 if (exp != NULL_TREE
1903 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1904 return;
1905
1906 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1907 and see if we can refine the range. Some of the cases below may not
1908 happen, but it doesn't seem worth worrying about this. We "continue"
1909 the outer loop when we've changed something; otherwise we "break"
1910 the switch, which will "break" the while. */
1911 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1912 high = low;
1913 in_p = 0;
1914 strict_overflow_p = false;
1915 is_bool = false;
1916 if (exp == NULL_TREE)
1917 is_bool = true;
1918 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1919 {
1920 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1921 is_bool = true;
1922 else
1923 return;
1924 }
1925 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1926 is_bool = true;
1927
1928 while (1)
1929 {
1930 enum tree_code code;
1931 tree arg0, arg1, exp_type;
1932 tree nexp;
1933 location_t loc;
1934
1935 if (exp != NULL_TREE)
1936 {
1937 if (TREE_CODE (exp) != SSA_NAME
1938 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1939 break;
1940
1941 stmt = SSA_NAME_DEF_STMT (exp);
1942 if (!is_gimple_assign (stmt))
1943 break;
1944
1945 code = gimple_assign_rhs_code (stmt);
1946 arg0 = gimple_assign_rhs1 (stmt);
1947 arg1 = gimple_assign_rhs2 (stmt);
1948 exp_type = TREE_TYPE (exp);
1949 }
1950 else
1951 {
1952 code = gimple_cond_code (stmt);
1953 arg0 = gimple_cond_lhs (stmt);
1954 arg1 = gimple_cond_rhs (stmt);
1955 exp_type = boolean_type_node;
1956 }
1957
1958 if (TREE_CODE (arg0) != SSA_NAME)
1959 break;
1960 loc = gimple_location (stmt);
1961 switch (code)
1962 {
1963 case BIT_NOT_EXPR:
1964 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1965 /* Ensure the range is either +[-,0], +[0,0],
1966 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1967 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1968 or similar expression of unconditional true or
1969 false, it should not be negated. */
1970 && ((high && integer_zerop (high))
1971 || (low && integer_onep (low))))
1972 {
1973 in_p = !in_p;
1974 exp = arg0;
1975 continue;
1976 }
1977 break;
1978 case SSA_NAME:
1979 exp = arg0;
1980 continue;
1981 CASE_CONVERT:
1982 if (is_bool)
1983 goto do_default;
1984 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1985 {
1986 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1987 is_bool = true;
1988 else
1989 return;
1990 }
1991 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1992 is_bool = true;
1993 goto do_default;
1994 case EQ_EXPR:
1995 case NE_EXPR:
1996 case LT_EXPR:
1997 case LE_EXPR:
1998 case GE_EXPR:
1999 case GT_EXPR:
2000 is_bool = true;
2001 /* FALLTHRU */
2002 default:
2003 if (!is_bool)
2004 return;
2005 do_default:
2006 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2007 &low, &high, &in_p,
2008 &strict_overflow_p);
2009 if (nexp != NULL_TREE)
2010 {
2011 exp = nexp;
2012 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2013 continue;
2014 }
2015 break;
2016 }
2017 break;
2018 }
2019 if (is_bool)
2020 {
2021 r->exp = exp;
2022 r->in_p = in_p;
2023 r->low = low;
2024 r->high = high;
2025 r->strict_overflow_p = strict_overflow_p;
2026 }
2027}
2028
2029/* Comparison function for qsort. Sort entries
2030 without SSA_NAME exp first, then with SSA_NAMEs sorted
2031 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2032 by increasing ->low and if ->low is the same, by increasing
2033 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2034 maximum. */
2035
2036static int
2037range_entry_cmp (const void *a, const void *b)
2038{
2039 const struct range_entry *p = (const struct range_entry *) a;
2040 const struct range_entry *q = (const struct range_entry *) b;
2041
2042 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2043 {
2044 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2045 {
2046 /* Group range_entries for the same SSA_NAME together. */
2047 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2048 return -1;
2049 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2050 return 1;
2051 /* If ->low is different, NULL low goes first, then by
2052 ascending low. */
2053 if (p->low != NULL_TREE)
2054 {
2055 if (q->low != NULL_TREE)
2056 {
2057 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2058 p->low, q->low);
2059 if (tem && integer_onep (tem))
2060 return -1;
2061 tem = fold_binary (GT_EXPR, boolean_type_node,
2062 p->low, q->low);
2063 if (tem && integer_onep (tem))
2064 return 1;
2065 }
2066 else
2067 return 1;
2068 }
2069 else if (q->low != NULL_TREE)
2070 return -1;
2071 /* If ->high is different, NULL high goes last, before that by
2072 ascending high. */
2073 if (p->high != NULL_TREE)
2074 {
2075 if (q->high != NULL_TREE)
2076 {
2077 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2078 p->high, q->high);
2079 if (tem && integer_onep (tem))
2080 return -1;
2081 tem = fold_binary (GT_EXPR, boolean_type_node,
2082 p->high, q->high);
2083 if (tem && integer_onep (tem))
2084 return 1;
2085 }
2086 else
2087 return -1;
2088 }
2089 else if (q->high != NULL_TREE)
2090 return 1;
2091 /* If both ranges are the same, sort below by ascending idx. */
2092 }
2093 else
2094 return 1;
2095 }
2096 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2097 return -1;
2098
2099 if (p->idx < q->idx)
2100 return -1;
2101 else
2102 {
2103 gcc_checking_assert (p->idx > q->idx);
2104 return 1;
2105 }
2106}
2107
2108/* Helper routine of optimize_range_test.
2109 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2110 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2111 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2112 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2113 an array of COUNT pointers to other ranges. Return
2114 true if the range merge has been successful.
2115 If OPCODE is ERROR_MARK, this is called from within
2116 maybe_optimize_range_tests and is performing inter-bb range optimization.
2117 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2118 oe->rank. */
2119
2120static bool
2121update_range_test (struct range_entry *range, struct range_entry *otherrange,
2122 struct range_entry **otherrangep,
2123 unsigned int count, enum tree_code opcode,
2124 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2125 bool in_p, tree low, tree high, bool strict_overflow_p)
2126{
2127 operand_entry_t oe = (*ops)[range->idx];
2128 tree op = oe->op;
2129 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2130 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2131 location_t loc = gimple_location (stmt);
2132 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2133 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2134 in_p, low, high);
2135 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2136 gimple_stmt_iterator gsi;
2137 unsigned int i;
2138
2139 if (tem == NULL_TREE)
2140 return false;
2141
2142 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2143 warning_at (loc, OPT_Wstrict_overflow,
2144 "assuming signed overflow does not occur "
2145 "when simplifying range test");
2146
2147 if (dump_file && (dump_flags & TDF_DETAILS))
2148 {
2149 struct range_entry *r;
2150 fprintf (dump_file, "Optimizing range tests ");
2151 print_generic_expr (dump_file, range->exp, 0);
2152 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2153 print_generic_expr (dump_file, range->low, 0);
2154 fprintf (dump_file, ", ");
2155 print_generic_expr (dump_file, range->high, 0);
2156 fprintf (dump_file, "]");
2157 for (i = 0; i < count; i++)
2158 {
2159 if (otherrange)
2160 r = otherrange + i;
2161 else
2162 r = otherrangep[i];
2163 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2164 print_generic_expr (dump_file, r->low, 0);
2165 fprintf (dump_file, ", ");
2166 print_generic_expr (dump_file, r->high, 0);
2167 fprintf (dump_file, "]");
2168 }
2169 fprintf (dump_file, "\n into ");
2170 print_generic_expr (dump_file, tem, 0);
2171 fprintf (dump_file, "\n");
2172 }
2173
2174 if (opcode == BIT_IOR_EXPR
2175 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2176 tem = invert_truthvalue_loc (loc, tem);
2177
2178 tem = fold_convert_loc (loc, optype, tem);
2179 gsi = gsi_for_stmt (stmt);
f4d9d362 2180 unsigned int uid = gimple_uid (stmt);
dda118e3
JM
2181 /* In rare cases range->exp can be equal to lhs of stmt.
2182 In that case we have to insert after the stmt rather then before
f4d9d362
JM
2183 it. If stmt is a PHI, insert it at the start of the basic block. */
2184 if (op != range->exp)
2185 {
2186 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2187 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2188 GSI_SAME_STMT);
2189 gsi_prev (&gsi);
2190 }
2191 else if (gimple_code (stmt) != GIMPLE_PHI)
dda118e3
JM
2192 {
2193 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2194 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2195 GSI_CONTINUE_LINKING);
2196 }
2197 else
2198 {
f4d9d362
JM
2199 gsi = gsi_after_labels (gimple_bb (stmt));
2200 if (!gsi_end_p (gsi))
2201 uid = gimple_uid (gsi_stmt (gsi));
2202 else
2203 {
2204 gsi = gsi_start_bb (gimple_bb (stmt));
2205 uid = 1;
2206 while (!gsi_end_p (gsi))
2207 {
2208 uid = gimple_uid (gsi_stmt (gsi));
2209 gsi_next (&gsi);
2210 }
2211 }
dda118e3
JM
2212 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2213 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2214 GSI_SAME_STMT);
f4d9d362
JM
2215 if (gsi_end_p (gsi))
2216 gsi = gsi_last_bb (gimple_bb (stmt));
2217 else
2218 gsi_prev (&gsi);
dda118e3
JM
2219 }
2220 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2221 if (gimple_uid (gsi_stmt (gsi)))
2222 break;
2223 else
f4d9d362 2224 gimple_set_uid (gsi_stmt (gsi), uid);
dda118e3
JM
2225
2226 oe->op = tem;
2227 range->exp = exp;
2228 range->low = low;
2229 range->high = high;
2230 range->in_p = in_p;
2231 range->strict_overflow_p = false;
2232
2233 for (i = 0; i < count; i++)
2234 {
2235 if (otherrange)
2236 range = otherrange + i;
2237 else
2238 range = otherrangep[i];
2239 oe = (*ops)[range->idx];
2240 /* Now change all the other range test immediate uses, so that
2241 those tests will be optimized away. */
2242 if (opcode == ERROR_MARK)
2243 {
2244 if (oe->op)
2245 oe->op = build_int_cst (TREE_TYPE (oe->op),
2246 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2247 else
2248 oe->op = (oe->rank == BIT_IOR_EXPR
2249 ? boolean_false_node : boolean_true_node);
2250 }
2251 else
2252 oe->op = error_mark_node;
2253 range->exp = NULL_TREE;
2254 }
2255 return true;
2256}
2257
2258/* Optimize X == CST1 || X == CST2
2259 if popcount (CST1 ^ CST2) == 1 into
2260 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2261 Similarly for ranges. E.g.
2262 X != 2 && X != 3 && X != 10 && X != 11
2263 will be transformed by the previous optimization into
2264 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2265 and this loop can transform that into
2266 !(((X & ~8) - 2U) <= 1U). */
2267
2268static bool
2269optimize_range_tests_xor (enum tree_code opcode, tree type,
2270 tree lowi, tree lowj, tree highi, tree highj,
2271 vec<operand_entry_t> *ops,
2272 struct range_entry *rangei,
2273 struct range_entry *rangej)
2274{
2275 tree lowxor, highxor, tem, exp;
2276 /* Check lowi ^ lowj == highi ^ highj and
2277 popcount (lowi ^ lowj) == 1. */
2278 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2279 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2280 return false;
2281 if (!integer_pow2p (lowxor))
2282 return false;
2283 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2284 if (!tree_int_cst_equal (lowxor, highxor))
2285 return false;
2286
2287 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2288 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2289 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2290 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2291 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2292 NULL, rangei->in_p, lowj, highj,
2293 rangei->strict_overflow_p
2294 || rangej->strict_overflow_p))
2295 return true;
2296 return false;
2297}
2298
2299/* Optimize X == CST1 || X == CST2
2300 if popcount (CST2 - CST1) == 1 into
2301 ((X - CST1) & ~(CST2 - CST1)) == 0.
2302 Similarly for ranges. E.g.
2303 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2304 || X == 75 || X == 45
2305 will be transformed by the previous optimization into
2306 (X - 43U) <= 3U || (X - 75U) <= 3U
2307 and this loop can transform that into
2308 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2309static bool
2310optimize_range_tests_diff (enum tree_code opcode, tree type,
2311 tree lowi, tree lowj, tree highi, tree highj,
2312 vec<operand_entry_t> *ops,
2313 struct range_entry *rangei,
2314 struct range_entry *rangej)
2315{
2316 tree tem1, tem2, mask;
2317 /* Check highi - lowi == highj - lowj. */
2318 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2319 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2320 return false;
2321 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2322 if (!tree_int_cst_equal (tem1, tem2))
2323 return false;
2324 /* Check popcount (lowj - lowi) == 1. */
2325 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2326 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2327 return false;
2328 if (!integer_pow2p (tem1))
2329 return false;
2330
2331 type = unsigned_type_for (type);
2332 tem1 = fold_convert (type, tem1);
2333 tem2 = fold_convert (type, tem2);
2334 lowi = fold_convert (type, lowi);
2335 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2336 tem1 = fold_binary (MINUS_EXPR, type,
2337 fold_convert (type, rangei->exp), lowi);
2338 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2339 lowj = build_int_cst (type, 0);
2340 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2341 NULL, rangei->in_p, lowj, tem2,
2342 rangei->strict_overflow_p
2343 || rangej->strict_overflow_p))
2344 return true;
2345 return false;
2346}
2347
2348/* It does some common checks for function optimize_range_tests_xor and
2349 optimize_range_tests_diff.
2350 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2351 Else it calls optimize_range_tests_diff. */
2352
2353static bool
2354optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2355 bool optimize_xor, vec<operand_entry_t> *ops,
2356 struct range_entry *ranges)
2357{
2358 int i, j;
2359 bool any_changes = false;
2360 for (i = first; i < length; i++)
2361 {
2362 tree lowi, highi, lowj, highj, type, tem;
2363
2364 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2365 continue;
2366 type = TREE_TYPE (ranges[i].exp);
2367 if (!INTEGRAL_TYPE_P (type))
2368 continue;
2369 lowi = ranges[i].low;
2370 if (lowi == NULL_TREE)
2371 lowi = TYPE_MIN_VALUE (type);
2372 highi = ranges[i].high;
2373 if (highi == NULL_TREE)
2374 continue;
2375 for (j = i + 1; j < length && j < i + 64; j++)
2376 {
2377 bool changes;
2378 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2379 continue;
2380 lowj = ranges[j].low;
2381 if (lowj == NULL_TREE)
2382 continue;
2383 highj = ranges[j].high;
2384 if (highj == NULL_TREE)
2385 highj = TYPE_MAX_VALUE (type);
2386 /* Check lowj > highi. */
2387 tem = fold_binary (GT_EXPR, boolean_type_node,
2388 lowj, highi);
2389 if (tem == NULL_TREE || !integer_onep (tem))
2390 continue;
2391 if (optimize_xor)
2392 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2393 highi, highj, ops,
2394 ranges + i, ranges + j);
2395 else
2396 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2397 highi, highj, ops,
2398 ranges + i, ranges + j);
2399 if (changes)
2400 {
2401 any_changes = true;
2402 break;
2403 }
2404 }
2405 }
2406 return any_changes;
2407}
2408
2409/* Helper function of optimize_range_tests_to_bit_test. Handle a single
2410 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2411 EXP on success, NULL otherwise. */
2412
2413static tree
2414extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2415 wide_int *mask, tree *totallowp)
2416{
2417 tree tem = int_const_binop (MINUS_EXPR, high, low);
2418 if (tem == NULL_TREE
2419 || TREE_CODE (tem) != INTEGER_CST
2420 || TREE_OVERFLOW (tem)
2421 || tree_int_cst_sgn (tem) == -1
2422 || compare_tree_int (tem, prec) != -1)
2423 return NULL_TREE;
2424
2425 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2426 *mask = wi::shifted_mask (0, max, false, prec);
2427 if (TREE_CODE (exp) == BIT_AND_EXPR
2428 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2429 {
2430 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2431 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2432 if (wi::popcount (msk) == 1
2433 && wi::ltu_p (msk, prec - max))
2434 {
2435 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2436 max += msk.to_uhwi ();
2437 exp = TREE_OPERAND (exp, 0);
2438 if (integer_zerop (low)
2439 && TREE_CODE (exp) == PLUS_EXPR
2440 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2441 {
38c0c85b
JM
2442 tree ret = TREE_OPERAND (exp, 0);
2443 STRIP_NOPS (ret);
dda118e3
JM
2444 widest_int bias
2445 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2446 TYPE_PRECISION (TREE_TYPE (low))));
38c0c85b 2447 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
dda118e3
JM
2448 if (totallowp)
2449 {
2450 *totallowp = tbias;
38c0c85b 2451 return ret;
dda118e3
JM
2452 }
2453 else if (!tree_int_cst_lt (totallow, tbias))
2454 return NULL_TREE;
38c0c85b 2455 bias = wi::to_widest (tbias);
dda118e3
JM
2456 bias -= wi::to_widest (totallow);
2457 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2458 {
2459 *mask = wi::lshift (*mask, bias);
38c0c85b 2460 return ret;
dda118e3
JM
2461 }
2462 }
2463 }
2464 }
2465 if (totallowp)
2466 return exp;
2467 if (!tree_int_cst_lt (totallow, low))
2468 return exp;
2469 tem = int_const_binop (MINUS_EXPR, low, totallow);
2470 if (tem == NULL_TREE
2471 || TREE_CODE (tem) != INTEGER_CST
2472 || TREE_OVERFLOW (tem)
2473 || compare_tree_int (tem, prec - max) == 1)
2474 return NULL_TREE;
2475
2476 *mask = wi::lshift (*mask, wi::to_widest (tem));
2477 return exp;
2478}
2479
2480/* Attempt to optimize small range tests using bit test.
2481 E.g.
2482 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2483 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2484 has been by earlier optimizations optimized into:
2485 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2486 As all the 43 through 82 range is less than 64 numbers,
2487 for 64-bit word targets optimize that into:
2488 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2489
2490static bool
2491optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2492 vec<operand_entry_t> *ops,
2493 struct range_entry *ranges)
2494{
2495 int i, j;
2496 bool any_changes = false;
2497 int prec = GET_MODE_BITSIZE (word_mode);
2498 auto_vec<struct range_entry *, 64> candidates;
2499
2500 for (i = first; i < length - 2; i++)
2501 {
2502 tree lowi, highi, lowj, highj, type;
2503
2504 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2505 continue;
2506 type = TREE_TYPE (ranges[i].exp);
2507 if (!INTEGRAL_TYPE_P (type))
2508 continue;
2509 lowi = ranges[i].low;
2510 if (lowi == NULL_TREE)
2511 lowi = TYPE_MIN_VALUE (type);
2512 highi = ranges[i].high;
2513 if (highi == NULL_TREE)
2514 continue;
2515 wide_int mask;
2516 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2517 highi, &mask, &lowi);
2518 if (exp == NULL_TREE)
2519 continue;
2520 bool strict_overflow_p = ranges[i].strict_overflow_p;
2521 candidates.truncate (0);
2522 int end = MIN (i + 64, length);
2523 for (j = i + 1; j < end; j++)
2524 {
2525 tree exp2;
2526 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2527 continue;
2528 if (ranges[j].exp == exp)
2529 ;
2530 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2531 {
2532 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2533 if (exp2 == exp)
2534 ;
2535 else if (TREE_CODE (exp2) == PLUS_EXPR)
2536 {
2537 exp2 = TREE_OPERAND (exp2, 0);
2538 STRIP_NOPS (exp2);
2539 if (exp2 != exp)
2540 continue;
2541 }
2542 else
2543 continue;
2544 }
2545 else
2546 continue;
2547 lowj = ranges[j].low;
2548 if (lowj == NULL_TREE)
2549 continue;
2550 highj = ranges[j].high;
2551 if (highj == NULL_TREE)
2552 highj = TYPE_MAX_VALUE (type);
2553 wide_int mask2;
2554 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2555 highj, &mask2, NULL);
2556 if (exp2 != exp)
2557 continue;
2558 mask |= mask2;
2559 strict_overflow_p |= ranges[j].strict_overflow_p;
2560 candidates.safe_push (&ranges[j]);
2561 }
2562
2563 /* If we need otherwise 3 or more comparisons, use a bit test. */
2564 if (candidates.length () >= 2)
2565 {
2566 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2567 wi::to_widest (lowi)
2568 + prec - 1 - wi::clz (mask));
2569 operand_entry_t oe = (*ops)[ranges[i].idx];
2570 tree op = oe->op;
2571 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2572 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2573 location_t loc = gimple_location (stmt);
2574 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2575
2576 /* See if it isn't cheaper to pretend the minimum value of the
2577 range is 0, if maximum value is small enough.
2578 We can avoid then subtraction of the minimum value, but the
2579 mask constant could be perhaps more expensive. */
2580 if (compare_tree_int (lowi, 0) > 0
2581 && compare_tree_int (high, prec) < 0)
2582 {
2583 int cost_diff;
2584 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2585 rtx reg = gen_raw_REG (word_mode, 10000);
2586 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2587 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2588 GEN_INT (-m)), speed_p);
2589 rtx r = immed_wide_int_const (mask, word_mode);
2590 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2591 speed_p);
2592 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2593 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2594 speed_p);
2595 if (cost_diff > 0)
2596 {
2597 mask = wi::lshift (mask, m);
2598 lowi = build_zero_cst (TREE_TYPE (lowi));
2599 }
2600 }
2601
2602 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2603 false, lowi, high);
2604 if (tem == NULL_TREE || is_gimple_val (tem))
2605 continue;
2606 tree etype = unsigned_type_for (TREE_TYPE (exp));
2607 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2608 fold_convert_loc (loc, etype, exp),
2609 fold_convert_loc (loc, etype, lowi));
2610 exp = fold_convert_loc (loc, integer_type_node, exp);
2611 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2612 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2613 build_int_cst (word_type, 1), exp);
2614 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2615 wide_int_to_tree (word_type, mask));
2616 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2617 build_zero_cst (word_type));
2618 if (is_gimple_val (exp))
2619 continue;
2620
2621 /* The shift might have undefined behavior if TEM is true,
2622 but reassociate_bb isn't prepared to have basic blocks
2623 split when it is running. So, temporarily emit a code
2624 with BIT_IOR_EXPR instead of &&, and fix it up in
2625 branch_fixup. */
2626 gimple_seq seq;
2627 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2628 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2629 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2630 gimple_seq seq2;
2631 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2632 gimple_seq_add_seq_without_update (&seq, seq2);
2633 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2634 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2635 gimple g = gimple_build_assign (make_ssa_name (optype),
2636 BIT_IOR_EXPR, tem, exp);
2637 gimple_set_location (g, loc);
2638 gimple_seq_add_stmt_without_update (&seq, g);
2639 exp = gimple_assign_lhs (g);
2640 tree val = build_zero_cst (optype);
2641 if (update_range_test (&ranges[i], NULL, candidates.address (),
2642 candidates.length (), opcode, ops, exp,
2643 seq, false, val, val, strict_overflow_p))
2644 {
2645 any_changes = true;
2646 reassoc_branch_fixups.safe_push (tem);
2647 }
2648 else
2649 gimple_seq_discard (seq);
2650 }
2651 }
2652 return any_changes;
2653}
2654
2655/* Optimize range tests, similarly how fold_range_test optimizes
2656 it on trees. The tree code for the binary
2657 operation between all the operands is OPCODE.
2658 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2659 maybe_optimize_range_tests for inter-bb range optimization.
2660 In that case if oe->op is NULL, oe->id is bb->index whose
2661 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2662 the actual opcode. */
2663
2664static bool
2665optimize_range_tests (enum tree_code opcode,
2666 vec<operand_entry_t> *ops)
2667{
2668 unsigned int length = ops->length (), i, j, first;
2669 operand_entry_t oe;
2670 struct range_entry *ranges;
2671 bool any_changes = false;
2672
2673 if (length == 1)
2674 return false;
2675
2676 ranges = XNEWVEC (struct range_entry, length);
2677 for (i = 0; i < length; i++)
2678 {
2679 oe = (*ops)[i];
2680 ranges[i].idx = i;
2681 init_range_entry (ranges + i, oe->op,
2682 oe->op ? NULL :
2683 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2684 /* For | invert it now, we will invert it again before emitting
2685 the optimized expression. */
2686 if (opcode == BIT_IOR_EXPR
2687 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2688 ranges[i].in_p = !ranges[i].in_p;
2689 }
2690
2691 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2692 for (i = 0; i < length; i++)
2693 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2694 break;
2695
2696 /* Try to merge ranges. */
2697 for (first = i; i < length; i++)
2698 {
2699 tree low = ranges[i].low;
2700 tree high = ranges[i].high;
2701 int in_p = ranges[i].in_p;
2702 bool strict_overflow_p = ranges[i].strict_overflow_p;
2703 int update_fail_count = 0;
2704
2705 for (j = i + 1; j < length; j++)
2706 {
2707 if (ranges[i].exp != ranges[j].exp)
2708 break;
2709 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2710 ranges[j].in_p, ranges[j].low, ranges[j].high))
2711 break;
2712 strict_overflow_p |= ranges[j].strict_overflow_p;
2713 }
2714
2715 if (j == i + 1)
2716 continue;
2717
2718 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2719 opcode, ops, ranges[i].exp, NULL, in_p,
2720 low, high, strict_overflow_p))
2721 {
2722 i = j - 1;
2723 any_changes = true;
2724 }
2725 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2726 while update_range_test would fail. */
2727 else if (update_fail_count == 64)
2728 i = j - 1;
2729 else
2730 ++update_fail_count;
2731 }
2732
2733 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2734 ops, ranges);
2735
2736 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2737 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2738 ops, ranges);
2739 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2740 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2741 ops, ranges);
2742
2743 if (any_changes && opcode != ERROR_MARK)
2744 {
2745 j = 0;
2746 FOR_EACH_VEC_ELT (*ops, i, oe)
2747 {
2748 if (oe->op == error_mark_node)
2749 continue;
2750 else if (i != j)
2751 (*ops)[j] = oe;
2752 j++;
2753 }
2754 ops->truncate (j);
2755 }
2756
2757 XDELETEVEC (ranges);
2758 return any_changes;
2759}
2760
2761/* Return true if STMT is a cast like:
2762 <bb N>:
2763 ...
2764 _123 = (int) _234;
2765
2766 <bb M>:
2767 # _345 = PHI <_123(N), 1(...), 1(...)>
2768 where _234 has bool type, _123 has single use and
2769 bb N has a single successor M. This is commonly used in
2770 the last block of a range test. */
2771
2772static bool
2773final_range_test_p (gimple stmt)
2774{
2775 basic_block bb, rhs_bb;
2776 edge e;
2777 tree lhs, rhs;
2778 use_operand_p use_p;
2779 gimple use_stmt;
2780
2781 if (!gimple_assign_cast_p (stmt))
2782 return false;
2783 bb = gimple_bb (stmt);
2784 if (!single_succ_p (bb))
2785 return false;
2786 e = single_succ_edge (bb);
2787 if (e->flags & EDGE_COMPLEX)
2788 return false;
2789
2790 lhs = gimple_assign_lhs (stmt);
2791 rhs = gimple_assign_rhs1 (stmt);
2792 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2793 || TREE_CODE (rhs) != SSA_NAME
2794 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2795 return false;
2796
2797 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2798 if (!single_imm_use (lhs, &use_p, &use_stmt))
2799 return false;
2800
2801 if (gimple_code (use_stmt) != GIMPLE_PHI
2802 || gimple_bb (use_stmt) != e->dest)
2803 return false;
2804
2805 /* And that the rhs is defined in the same loop. */
2806 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2807 if (rhs_bb == NULL
2808 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2809 return false;
2810
2811 return true;
2812}
2813
2814/* Return true if BB is suitable basic block for inter-bb range test
2815 optimization. If BACKWARD is true, BB should be the only predecessor
2816 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2817 or compared with to find a common basic block to which all conditions
2818 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2819 be the only predecessor of BB. */
2820
2821static bool
2822suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2823 bool backward)
2824{
2825 edge_iterator ei, ei2;
2826 edge e, e2;
2827 gimple stmt;
2828 gphi_iterator gsi;
2829 bool other_edge_seen = false;
2830 bool is_cond;
2831
2832 if (test_bb == bb)
2833 return false;
2834 /* Check last stmt first. */
2835 stmt = last_stmt (bb);
2836 if (stmt == NULL
2837 || (gimple_code (stmt) != GIMPLE_COND
2838 && (backward || !final_range_test_p (stmt)))
2839 || gimple_visited_p (stmt)
2840 || stmt_could_throw_p (stmt)
2841 || *other_bb == bb)
2842 return false;
2843 is_cond = gimple_code (stmt) == GIMPLE_COND;
2844 if (is_cond)
2845 {
2846 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2847 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2848 to *OTHER_BB (if not set yet, try to find it out). */
2849 if (EDGE_COUNT (bb->succs) != 2)
2850 return false;
2851 FOR_EACH_EDGE (e, ei, bb->succs)
2852 {
2853 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2854 return false;
2855 if (e->dest == test_bb)
2856 {
2857 if (backward)
2858 continue;
2859 else
2860 return false;
2861 }
2862 if (e->dest == bb)
2863 return false;
2864 if (*other_bb == NULL)
2865 {
2866 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2867 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2868 return false;
2869 else if (e->dest == e2->dest)
2870 *other_bb = e->dest;
2871 if (*other_bb == NULL)
2872 return false;
2873 }
2874 if (e->dest == *other_bb)
2875 other_edge_seen = true;
2876 else if (backward)
2877 return false;
2878 }
2879 if (*other_bb == NULL || !other_edge_seen)
2880 return false;
2881 }
2882 else if (single_succ (bb) != *other_bb)
2883 return false;
2884
2885 /* Now check all PHIs of *OTHER_BB. */
2886 e = find_edge (bb, *other_bb);
2887 e2 = find_edge (test_bb, *other_bb);
2888 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2889 {
2890 gphi *phi = gsi.phi ();
2891 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2892 corresponding to BB and TEST_BB predecessor must be the same. */
2893 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2894 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2895 {
2896 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2897 one of the PHIs should have the lhs of the last stmt in
2898 that block as PHI arg and that PHI should have 0 or 1
2899 corresponding to it in all other range test basic blocks
2900 considered. */
2901 if (!is_cond)
2902 {
2903 if (gimple_phi_arg_def (phi, e->dest_idx)
2904 == gimple_assign_lhs (stmt)
2905 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2906 || integer_onep (gimple_phi_arg_def (phi,
2907 e2->dest_idx))))
2908 continue;
2909 }
2910 else
2911 {
2912 gimple test_last = last_stmt (test_bb);
2913 if (gimple_code (test_last) != GIMPLE_COND
2914 && gimple_phi_arg_def (phi, e2->dest_idx)
2915 == gimple_assign_lhs (test_last)
2916 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2917 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2918 continue;
2919 }
2920
2921 return false;
2922 }
2923 }
2924 return true;
2925}
2926
2927/* Return true if BB doesn't have side-effects that would disallow
2928 range test optimization, all SSA_NAMEs set in the bb are consumed
2929 in the bb and there are no PHIs. */
2930
2931static bool
2932no_side_effect_bb (basic_block bb)
2933{
2934 gimple_stmt_iterator gsi;
2935 gimple last;
2936
2937 if (!gimple_seq_empty_p (phi_nodes (bb)))
2938 return false;
2939 last = last_stmt (bb);
2940 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2941 {
2942 gimple stmt = gsi_stmt (gsi);
2943 tree lhs;
2944 imm_use_iterator imm_iter;
2945 use_operand_p use_p;
2946
2947 if (is_gimple_debug (stmt))
2948 continue;
2949 if (gimple_has_side_effects (stmt))
2950 return false;
2951 if (stmt == last)
2952 return true;
2953 if (!is_gimple_assign (stmt))
2954 return false;
2955 lhs = gimple_assign_lhs (stmt);
2956 if (TREE_CODE (lhs) != SSA_NAME)
2957 return false;
2958 if (gimple_assign_rhs_could_trap_p (stmt))
2959 return false;
2960 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2961 {
2962 gimple use_stmt = USE_STMT (use_p);
2963 if (is_gimple_debug (use_stmt))
2964 continue;
2965 if (gimple_bb (use_stmt) != bb)
2966 return false;
2967 }
2968 }
2969 return false;
2970}
2971
2972/* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2973 return true and fill in *OPS recursively. */
2974
2975static bool
2976get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2977 struct loop *loop)
2978{
2979 gimple stmt = SSA_NAME_DEF_STMT (var);
2980 tree rhs[2];
2981 int i;
2982
2983 if (!is_reassociable_op (stmt, code, loop))
2984 return false;
2985
2986 rhs[0] = gimple_assign_rhs1 (stmt);
2987 rhs[1] = gimple_assign_rhs2 (stmt);
2988 gimple_set_visited (stmt, true);
2989 for (i = 0; i < 2; i++)
2990 if (TREE_CODE (rhs[i]) == SSA_NAME
2991 && !get_ops (rhs[i], code, ops, loop)
2992 && has_single_use (rhs[i]))
2993 {
2994 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2995
2996 oe->op = rhs[i];
2997 oe->rank = code;
2998 oe->id = 0;
2999 oe->count = 1;
3000 ops->safe_push (oe);
3001 }
3002 return true;
3003}
3004
3005/* Find the ops that were added by get_ops starting from VAR, see if
3006 they were changed during update_range_test and if yes, create new
3007 stmts. */
3008
3009static tree
3010update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
3011 unsigned int *pidx, struct loop *loop)
3012{
3013 gimple stmt = SSA_NAME_DEF_STMT (var);
3014 tree rhs[4];
3015 int i;
3016
3017 if (!is_reassociable_op (stmt, code, loop))
3018 return NULL;
3019
3020 rhs[0] = gimple_assign_rhs1 (stmt);
3021 rhs[1] = gimple_assign_rhs2 (stmt);
3022 rhs[2] = rhs[0];
3023 rhs[3] = rhs[1];
3024 for (i = 0; i < 2; i++)
3025 if (TREE_CODE (rhs[i]) == SSA_NAME)
3026 {
3027 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3028 if (rhs[2 + i] == NULL_TREE)
3029 {
3030 if (has_single_use (rhs[i]))
3031 rhs[2 + i] = ops[(*pidx)++]->op;
3032 else
3033 rhs[2 + i] = rhs[i];
3034 }
3035 }
3036 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3037 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3038 {
3039 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3040 var = make_ssa_name (TREE_TYPE (var));
3041 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3042 rhs[2], rhs[3]);
3043 gimple_set_uid (g, gimple_uid (stmt));
3044 gimple_set_visited (g, true);
3045 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3046 }
3047 return var;
3048}
3049
3050/* Structure to track the initial value passed to get_ops and
3051 the range in the ops vector for each basic block. */
3052
3053struct inter_bb_range_test_entry
3054{
3055 tree op;
3056 unsigned int first_idx, last_idx;
3057};
3058
3059/* Inter-bb range test optimization. */
3060
3061static void
3062maybe_optimize_range_tests (gimple stmt)
3063{
3064 basic_block first_bb = gimple_bb (stmt);
3065 basic_block last_bb = first_bb;
3066 basic_block other_bb = NULL;
3067 basic_block bb;
3068 edge_iterator ei;
3069 edge e;
3070 auto_vec<operand_entry_t> ops;
3071 auto_vec<inter_bb_range_test_entry> bbinfo;
3072 bool any_changes = false;
3073
3074 /* Consider only basic blocks that end with GIMPLE_COND or
3075 a cast statement satisfying final_range_test_p. All
3076 but the last bb in the first_bb .. last_bb range
3077 should end with GIMPLE_COND. */
3078 if (gimple_code (stmt) == GIMPLE_COND)
3079 {
3080 if (EDGE_COUNT (first_bb->succs) != 2)
3081 return;
3082 }
3083 else if (final_range_test_p (stmt))
3084 other_bb = single_succ (first_bb);
3085 else
3086 return;
3087
3088 if (stmt_could_throw_p (stmt))
3089 return;
3090
3091 /* As relative ordering of post-dominator sons isn't fixed,
3092 maybe_optimize_range_tests can be called first on any
3093 bb in the range we want to optimize. So, start searching
3094 backwards, if first_bb can be set to a predecessor. */
3095 while (single_pred_p (first_bb))
3096 {
3097 basic_block pred_bb = single_pred (first_bb);
3098 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3099 break;
3100 if (!no_side_effect_bb (first_bb))
3101 break;
3102 first_bb = pred_bb;
3103 }
3104 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3105 Before starting forward search in last_bb successors, find
3106 out the other_bb. */
3107 if (first_bb == last_bb)
3108 {
3109 other_bb = NULL;
3110 /* As non-GIMPLE_COND last stmt always terminates the range,
3111 if forward search didn't discover anything, just give up. */
3112 if (gimple_code (stmt) != GIMPLE_COND)
3113 return;
3114 /* Look at both successors. Either it ends with a GIMPLE_COND
3115 and satisfies suitable_cond_bb, or ends with a cast and
3116 other_bb is that cast's successor. */
3117 FOR_EACH_EDGE (e, ei, first_bb->succs)
3118 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3119 || e->dest == first_bb)
3120 return;
3121 else if (single_pred_p (e->dest))
3122 {
3123 stmt = last_stmt (e->dest);
3124 if (stmt
3125 && gimple_code (stmt) == GIMPLE_COND
3126 && EDGE_COUNT (e->dest->succs) == 2)
3127 {
3128 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3129 break;
3130 else
3131 other_bb = NULL;
3132 }
3133 else if (stmt
3134 && final_range_test_p (stmt)
3135 && find_edge (first_bb, single_succ (e->dest)))
3136 {
3137 other_bb = single_succ (e->dest);
3138 if (other_bb == first_bb)
3139 other_bb = NULL;
3140 }
3141 }
3142 if (other_bb == NULL)
3143 return;
3144 }
3145 /* Now do the forward search, moving last_bb to successor bbs
3146 that aren't other_bb. */
3147 while (EDGE_COUNT (last_bb->succs) == 2)
3148 {
3149 FOR_EACH_EDGE (e, ei, last_bb->succs)
3150 if (e->dest != other_bb)
3151 break;
3152 if (e == NULL)
3153 break;
3154 if (!single_pred_p (e->dest))
3155 break;
3156 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3157 break;
3158 if (!no_side_effect_bb (e->dest))
3159 break;
3160 last_bb = e->dest;
3161 }
3162 if (first_bb == last_bb)
3163 return;
3164 /* Here basic blocks first_bb through last_bb's predecessor
3165 end with GIMPLE_COND, all of them have one of the edges to
3166 other_bb and another to another block in the range,
3167 all blocks except first_bb don't have side-effects and
3168 last_bb ends with either GIMPLE_COND, or cast satisfying
3169 final_range_test_p. */
3170 for (bb = last_bb; ; bb = single_pred (bb))
3171 {
3172 enum tree_code code;
3173 tree lhs, rhs;
3174 inter_bb_range_test_entry bb_ent;
3175
3176 bb_ent.op = NULL_TREE;
3177 bb_ent.first_idx = ops.length ();
3178 bb_ent.last_idx = bb_ent.first_idx;
3179 e = find_edge (bb, other_bb);
3180 stmt = last_stmt (bb);
3181 gimple_set_visited (stmt, true);
3182 if (gimple_code (stmt) != GIMPLE_COND)
3183 {
3184 use_operand_p use_p;
3185 gimple phi;
3186 edge e2;
3187 unsigned int d;
3188
3189 lhs = gimple_assign_lhs (stmt);
3190 rhs = gimple_assign_rhs1 (stmt);
3191 gcc_assert (bb == last_bb);
3192
3193 /* stmt is
3194 _123 = (int) _234;
3195
3196 followed by:
3197 <bb M>:
3198 # _345 = PHI <_123(N), 1(...), 1(...)>
3199
3200 or 0 instead of 1. If it is 0, the _234
3201 range test is anded together with all the
3202 other range tests, if it is 1, it is ored with
3203 them. */
3204 single_imm_use (lhs, &use_p, &phi);
3205 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3206 e2 = find_edge (first_bb, other_bb);
3207 d = e2->dest_idx;
3208 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3209 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3210 code = BIT_AND_EXPR;
3211 else
3212 {
3213 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3214 code = BIT_IOR_EXPR;
3215 }
3216
3217 /* If _234 SSA_NAME_DEF_STMT is
3218 _234 = _567 | _789;
3219 (or &, corresponding to 1/0 in the phi arguments,
3220 push into ops the individual range test arguments
3221 of the bitwise or resp. and, recursively. */
3222 if (!get_ops (rhs, code, &ops,
3223 loop_containing_stmt (stmt))
3224 && has_single_use (rhs))
3225 {
3226 /* Otherwise, push the _234 range test itself. */
3227 operand_entry_t oe
3228 = (operand_entry_t) pool_alloc (operand_entry_pool);
3229
3230 oe->op = rhs;
3231 oe->rank = code;
3232 oe->id = 0;
3233 oe->count = 1;
3234 ops.safe_push (oe);
3235 bb_ent.last_idx++;
3236 }
3237 else
3238 bb_ent.last_idx = ops.length ();
3239 bb_ent.op = rhs;
3240 bbinfo.safe_push (bb_ent);
3241 continue;
3242 }
3243 /* Otherwise stmt is GIMPLE_COND. */
3244 code = gimple_cond_code (stmt);
3245 lhs = gimple_cond_lhs (stmt);
3246 rhs = gimple_cond_rhs (stmt);
3247 if (TREE_CODE (lhs) == SSA_NAME
3248 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3249 && ((code != EQ_EXPR && code != NE_EXPR)
3250 || rhs != boolean_false_node
3251 /* Either push into ops the individual bitwise
3252 or resp. and operands, depending on which
3253 edge is other_bb. */
3254 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3255 ^ (code == EQ_EXPR))
3256 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3257 loop_containing_stmt (stmt))))
3258 {
3259 /* Or push the GIMPLE_COND stmt itself. */
3260 operand_entry_t oe
3261 = (operand_entry_t) pool_alloc (operand_entry_pool);
3262
3263 oe->op = NULL;
3264 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3265 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3266 /* oe->op = NULL signs that there is no SSA_NAME
3267 for the range test, and oe->id instead is the
3268 basic block number, at which's end the GIMPLE_COND
3269 is. */
3270 oe->id = bb->index;
3271 oe->count = 1;
3272 ops.safe_push (oe);
3273 bb_ent.op = NULL;
3274 bb_ent.last_idx++;
3275 }
3276 else if (ops.length () > bb_ent.first_idx)
3277 {
3278 bb_ent.op = lhs;
3279 bb_ent.last_idx = ops.length ();
3280 }
3281 bbinfo.safe_push (bb_ent);
3282 if (bb == first_bb)
3283 break;
3284 }
3285 if (ops.length () > 1)
3286 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3287 if (any_changes)
3288 {
3289 unsigned int idx;
3290 /* update_ops relies on has_single_use predicates returning the
3291 same values as it did during get_ops earlier. Additionally it
3292 never removes statements, only adds new ones and it should walk
3293 from the single imm use and check the predicate already before
3294 making those changes.
3295 On the other side, the handling of GIMPLE_COND directly can turn
3296 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3297 it needs to be done in a separate loop afterwards. */
3298 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3299 {
3300 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3301 && bbinfo[idx].op != NULL_TREE)
3302 {
3303 tree new_op;
3304
3305 stmt = last_stmt (bb);
3306 new_op = update_ops (bbinfo[idx].op,
3307 (enum tree_code)
3308 ops[bbinfo[idx].first_idx]->rank,
3309 ops, &bbinfo[idx].first_idx,
3310 loop_containing_stmt (stmt));
3311 if (new_op == NULL_TREE)
3312 {
3313 gcc_assert (bb == last_bb);
3314 new_op = ops[bbinfo[idx].first_idx++]->op;
3315 }
3316 if (bbinfo[idx].op != new_op)
3317 {
3318 imm_use_iterator iter;
3319 use_operand_p use_p;
3320 gimple use_stmt, cast_stmt = NULL;
3321
3322 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3323 if (is_gimple_debug (use_stmt))
3324 continue;
3325 else if (gimple_code (use_stmt) == GIMPLE_COND
3326 || gimple_code (use_stmt) == GIMPLE_PHI)
3327 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3328 SET_USE (use_p, new_op);
3329 else if (gimple_assign_cast_p (use_stmt))
3330 cast_stmt = use_stmt;
3331 else
3332 gcc_unreachable ();
3333 if (cast_stmt)
3334 {
3335 gcc_assert (bb == last_bb);
3336 tree lhs = gimple_assign_lhs (cast_stmt);
3337 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3338 enum tree_code rhs_code
3339 = gimple_assign_rhs_code (cast_stmt);
3340 gassign *g;
3341 if (is_gimple_min_invariant (new_op))
3342 {
3343 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3344 g = gimple_build_assign (new_lhs, new_op);
3345 }
3346 else
3347 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3348 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3349 gimple_set_uid (g, gimple_uid (cast_stmt));
3350 gimple_set_visited (g, true);
3351 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3352 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3353 if (is_gimple_debug (use_stmt))
3354 continue;
3355 else if (gimple_code (use_stmt) == GIMPLE_COND
3356 || gimple_code (use_stmt) == GIMPLE_PHI)
3357 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3358 SET_USE (use_p, new_lhs);
3359 else
3360 gcc_unreachable ();
3361 }
3362 }
3363 }
3364 if (bb == first_bb)
3365 break;
3366 }
3367 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3368 {
3369 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3370 && bbinfo[idx].op == NULL_TREE
3371 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3372 {
3373 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3374 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3375 gimple_cond_make_false (cond_stmt);
3376 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3377 gimple_cond_make_true (cond_stmt);
3378 else
3379 {
3380 gimple_cond_set_code (cond_stmt, NE_EXPR);
3381 gimple_cond_set_lhs (cond_stmt,
3382 ops[bbinfo[idx].first_idx]->op);
3383 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3384 }
3385 update_stmt (cond_stmt);
3386 }
3387 if (bb == first_bb)
3388 break;
3389 }
3390 }
3391}
3392
3393/* Return true if OPERAND is defined by a PHI node which uses the LHS
3394 of STMT in it's operands. This is also known as a "destructive
3395 update" operation. */
3396
3397static bool
3398is_phi_for_stmt (gimple stmt, tree operand)
3399{
3400 gimple def_stmt;
3401 gphi *def_phi;
3402 tree lhs;
3403 use_operand_p arg_p;
3404 ssa_op_iter i;
3405
3406 if (TREE_CODE (operand) != SSA_NAME)
3407 return false;
3408
3409 lhs = gimple_assign_lhs (stmt);
3410
3411 def_stmt = SSA_NAME_DEF_STMT (operand);
3412 def_phi = dyn_cast <gphi *> (def_stmt);
3413 if (!def_phi)
3414 return false;
3415
3416 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3417 if (lhs == USE_FROM_PTR (arg_p))
3418 return true;
3419 return false;
3420}
3421
3422/* Remove def stmt of VAR if VAR has zero uses and recurse
3423 on rhs1 operand if so. */
3424
3425static void
3426remove_visited_stmt_chain (tree var)
3427{
3428 gimple stmt;
3429 gimple_stmt_iterator gsi;
3430
3431 while (1)
3432 {
3433 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3434 return;
3435 stmt = SSA_NAME_DEF_STMT (var);
3436 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3437 {
3438 var = gimple_assign_rhs1 (stmt);
3439 gsi = gsi_for_stmt (stmt);
3440 reassoc_remove_stmt (&gsi);
3441 release_defs (stmt);
3442 }
3443 else
3444 return;
3445 }
3446}
3447
3448/* This function checks three consequtive operands in
3449 passed operands vector OPS starting from OPINDEX and
3450 swaps two operands if it is profitable for binary operation
3451 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3452
3453 We pair ops with the same rank if possible.
3454
3455 The alternative we try is to see if STMT is a destructive
3456 update style statement, which is like:
3457 b = phi (a, ...)
3458 a = c + b;
3459 In that case, we want to use the destructive update form to
3460 expose the possible vectorizer sum reduction opportunity.
3461 In that case, the third operand will be the phi node. This
3462 check is not performed if STMT is null.
3463
3464 We could, of course, try to be better as noted above, and do a
3465 lot of work to try to find these opportunities in >3 operand
3466 cases, but it is unlikely to be worth it. */
3467
3468static void
3469swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3470 unsigned int opindex, gimple stmt)
3471{
3472 operand_entry_t oe1, oe2, oe3;
3473
3474 oe1 = ops[opindex];
3475 oe2 = ops[opindex + 1];
3476 oe3 = ops[opindex + 2];
3477
3478 if ((oe1->rank == oe2->rank
3479 && oe2->rank != oe3->rank)
3480 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3481 && !is_phi_for_stmt (stmt, oe1->op)
3482 && !is_phi_for_stmt (stmt, oe2->op)))
3483 {
3484 struct operand_entry temp = *oe3;
3485 oe3->op = oe1->op;
3486 oe3->rank = oe1->rank;
3487 oe1->op = temp.op;
3488 oe1->rank= temp.rank;
3489 }
3490 else if ((oe1->rank == oe3->rank
3491 && oe2->rank != oe3->rank)
3492 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3493 && !is_phi_for_stmt (stmt, oe1->op)
3494 && !is_phi_for_stmt (stmt, oe3->op)))
3495 {
3496 struct operand_entry temp = *oe2;
3497 oe2->op = oe1->op;
3498 oe2->rank = oe1->rank;
3499 oe1->op = temp.op;
3500 oe1->rank = temp.rank;
3501 }
3502}
3503
3504/* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3505 two definitions, otherwise return STMT. */
3506
3507static inline gimple
3508find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3509{
3510 if (TREE_CODE (rhs1) == SSA_NAME
3511 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3512 stmt = SSA_NAME_DEF_STMT (rhs1);
3513 if (TREE_CODE (rhs2) == SSA_NAME
3514 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3515 stmt = SSA_NAME_DEF_STMT (rhs2);
3516 return stmt;
3517}
3518
3519/* Recursively rewrite our linearized statements so that the operators
3520 match those in OPS[OPINDEX], putting the computation in rank
3521 order. Return new lhs. */
3522
3523static tree
3524rewrite_expr_tree (gimple stmt, unsigned int opindex,
3525 vec<operand_entry_t> ops, bool changed)
3526{
3527 tree rhs1 = gimple_assign_rhs1 (stmt);
3528 tree rhs2 = gimple_assign_rhs2 (stmt);
3529 tree lhs = gimple_assign_lhs (stmt);
3530 operand_entry_t oe;
3531
3532 /* The final recursion case for this function is that you have
3533 exactly two operations left.
224ceb26 3534 If we had exactly one op in the entire list to start with, we
dda118e3
JM
3535 would have never called this function, and the tail recursion
3536 rewrites them one at a time. */
3537 if (opindex + 2 == ops.length ())
3538 {
3539 operand_entry_t oe1, oe2;
3540
3541 oe1 = ops[opindex];
3542 oe2 = ops[opindex + 1];
3543
3544 if (rhs1 != oe1->op || rhs2 != oe2->op)
3545 {
3546 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3547 unsigned int uid = gimple_uid (stmt);
3548
3549 if (dump_file && (dump_flags & TDF_DETAILS))
3550 {
3551 fprintf (dump_file, "Transforming ");
3552 print_gimple_stmt (dump_file, stmt, 0, 0);
3553 }
3554
224ceb26
JM
3555 /* Even when changed is false, reassociation could have e.g. removed
3556 some redundant operations, so unless we are just swapping the
3557 arguments or unless there is no change at all (then we just
3558 return lhs), force creation of a new SSA_NAME. */
3559 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
dda118e3
JM
3560 {
3561 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3562 lhs = make_ssa_name (TREE_TYPE (lhs));
3563 stmt
3564 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3565 oe1->op, oe2->op);
3566 gimple_set_uid (stmt, uid);
3567 gimple_set_visited (stmt, true);
3568 if (insert_point == gsi_stmt (gsi))
3569 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3570 else
3571 insert_stmt_after (stmt, insert_point);
3572 }
3573 else
3574 {
3575 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3576 == stmt);
3577 gimple_assign_set_rhs1 (stmt, oe1->op);
3578 gimple_assign_set_rhs2 (stmt, oe2->op);
3579 update_stmt (stmt);
3580 }
3581
3582 if (rhs1 != oe1->op && rhs1 != oe2->op)
3583 remove_visited_stmt_chain (rhs1);
3584
3585 if (dump_file && (dump_flags & TDF_DETAILS))
3586 {
3587 fprintf (dump_file, " into ");
3588 print_gimple_stmt (dump_file, stmt, 0, 0);
3589 }
3590 }
3591 return lhs;
3592 }
3593
3594 /* If we hit here, we should have 3 or more ops left. */
3595 gcc_assert (opindex + 2 < ops.length ());
3596
3597 /* Rewrite the next operator. */
3598 oe = ops[opindex];
3599
3600 /* Recurse on the LHS of the binary operator, which is guaranteed to
3601 be the non-leaf side. */
3602 tree new_rhs1
3603 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3604 changed || oe->op != rhs2);
3605
3606 if (oe->op != rhs2 || new_rhs1 != rhs1)
3607 {
3608 if (dump_file && (dump_flags & TDF_DETAILS))
3609 {
3610 fprintf (dump_file, "Transforming ");
3611 print_gimple_stmt (dump_file, stmt, 0, 0);
3612 }
3613
3614 /* If changed is false, this is either opindex == 0
3615 or all outer rhs2's were equal to corresponding oe->op,
3616 and powi_result is NULL.
3617 That means lhs is equivalent before and after reassociation.
3618 Otherwise ensure the old lhs SSA_NAME is not reused and
3619 create a new stmt as well, so that any debug stmts will be
3620 properly adjusted. */
3621 if (changed)
3622 {
3623 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3624 unsigned int uid = gimple_uid (stmt);
3625 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3626
3627 lhs = make_ssa_name (TREE_TYPE (lhs));
3628 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3629 new_rhs1, oe->op);
3630 gimple_set_uid (stmt, uid);
3631 gimple_set_visited (stmt, true);
3632 if (insert_point == gsi_stmt (gsi))
3633 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3634 else
3635 insert_stmt_after (stmt, insert_point);
3636 }
3637 else
3638 {
3639 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3640 == stmt);
3641 gimple_assign_set_rhs1 (stmt, new_rhs1);
3642 gimple_assign_set_rhs2 (stmt, oe->op);
3643 update_stmt (stmt);
3644 }
3645
3646 if (dump_file && (dump_flags & TDF_DETAILS))
3647 {
3648 fprintf (dump_file, " into ");
3649 print_gimple_stmt (dump_file, stmt, 0, 0);
3650 }
3651 }
3652 return lhs;
3653}
3654
3655/* Find out how many cycles we need to compute statements chain.
3656 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3657 maximum number of independent statements we may execute per cycle. */
3658
3659static int
3660get_required_cycles (int ops_num, int cpu_width)
3661{
3662 int res;
3663 int elog;
3664 unsigned int rest;
3665
3666 /* While we have more than 2 * cpu_width operands
3667 we may reduce number of operands by cpu_width
3668 per cycle. */
3669 res = ops_num / (2 * cpu_width);
3670
3671 /* Remained operands count may be reduced twice per cycle
3672 until we have only one operand. */
3673 rest = (unsigned)(ops_num - res * cpu_width);
3674 elog = exact_log2 (rest);
3675 if (elog >= 0)
3676 res += elog;
3677 else
3678 res += floor_log2 (rest) + 1;
3679
3680 return res;
3681}
3682
3683/* Returns an optimal number of registers to use for computation of
3684 given statements. */
3685
3686static int
3687get_reassociation_width (int ops_num, enum tree_code opc,
3688 machine_mode mode)
3689{
3690 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3691 int width;
3692 int width_min;
3693 int cycles_best;
3694
3695 if (param_width > 0)
3696 width = param_width;
3697 else
3698 width = targetm.sched.reassociation_width (opc, mode);
3699
3700 if (width == 1)
3701 return width;
3702
3703 /* Get the minimal time required for sequence computation. */
3704 cycles_best = get_required_cycles (ops_num, width);
3705
3706 /* Check if we may use less width and still compute sequence for
3707 the same time. It will allow us to reduce registers usage.
3708 get_required_cycles is monotonically increasing with lower width
3709 so we can perform a binary search for the minimal width that still
3710 results in the optimal cycle count. */
3711 width_min = 1;
3712 while (width > width_min)
3713 {
3714 int width_mid = (width + width_min) / 2;
3715
3716 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3717 width = width_mid;
3718 else if (width_min < width_mid)
3719 width_min = width_mid;
3720 else
3721 break;
3722 }
3723
3724 return width;
3725}
3726
3727/* Recursively rewrite our linearized statements so that the operators
3728 match those in OPS[OPINDEX], putting the computation in rank
3729 order and trying to allow operations to be executed in
3730 parallel. */
3731
3732static void
3733rewrite_expr_tree_parallel (gassign *stmt, int width,
3734 vec<operand_entry_t> ops)
3735{
3736 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3737 int op_num = ops.length ();
3738 int stmt_num = op_num - 1;
3739 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3740 int op_index = op_num - 1;
3741 int stmt_index = 0;
3742 int ready_stmts_end = 0;
3743 int i = 0;
3744 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3745
3746 /* We start expression rewriting from the top statements.
3747 So, in this loop we create a full list of statements
3748 we will work with. */
3749 stmts[stmt_num - 1] = stmt;
3750 for (i = stmt_num - 2; i >= 0; i--)
3751 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3752
3753 for (i = 0; i < stmt_num; i++)
3754 {
3755 tree op1, op2;
3756
3757 /* Determine whether we should use results of
3758 already handled statements or not. */
3759 if (ready_stmts_end == 0
3760 && (i - stmt_index >= width || op_index < 1))
3761 ready_stmts_end = i;
3762
3763 /* Now we choose operands for the next statement. Non zero
3764 value in ready_stmts_end means here that we should use
3765 the result of already generated statements as new operand. */
3766 if (ready_stmts_end > 0)
3767 {
3768 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3769 if (ready_stmts_end > stmt_index)
3770 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3771 else if (op_index >= 0)
3772 op2 = ops[op_index--]->op;
3773 else
3774 {
3775 gcc_assert (stmt_index < i);
3776 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3777 }
3778
3779 if (stmt_index >= ready_stmts_end)
3780 ready_stmts_end = 0;
3781 }
3782 else
3783 {
3784 if (op_index > 1)
3785 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3786 op2 = ops[op_index--]->op;
3787 op1 = ops[op_index--]->op;
3788 }
3789
3790 /* If we emit the last statement then we should put
3791 operands into the last statement. It will also
3792 break the loop. */
3793 if (op_index < 0 && stmt_index == i)
3794 i = stmt_num - 1;
3795
3796 if (dump_file && (dump_flags & TDF_DETAILS))
3797 {
3798 fprintf (dump_file, "Transforming ");
3799 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3800 }
3801
3802 /* We keep original statement only for the last one. All
3803 others are recreated. */
3804 if (i == stmt_num - 1)
3805 {
3806 gimple_assign_set_rhs1 (stmts[i], op1);
3807 gimple_assign_set_rhs2 (stmts[i], op2);
3808 update_stmt (stmts[i]);
3809 }
3810 else
3811 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3812
3813 if (dump_file && (dump_flags & TDF_DETAILS))
3814 {
3815 fprintf (dump_file, " into ");
3816 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3817 }
3818 }
3819
3820 remove_visited_stmt_chain (last_rhs1);
3821}
3822
3823/* Transform STMT, which is really (A +B) + (C + D) into the left
3824 linear form, ((A+B)+C)+D.
3825 Recurse on D if necessary. */
3826
3827static void
3828linearize_expr (gimple stmt)
3829{
3830 gimple_stmt_iterator gsi;
3831 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3832 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3833 gimple oldbinrhs = binrhs;
3834 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3835 gimple newbinrhs = NULL;
3836 struct loop *loop = loop_containing_stmt (stmt);
3837 tree lhs = gimple_assign_lhs (stmt);
3838
3839 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3840 && is_reassociable_op (binrhs, rhscode, loop));
3841
3842 gsi = gsi_for_stmt (stmt);
3843
3844 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3845 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3846 gimple_assign_rhs_code (binrhs),
3847 gimple_assign_lhs (binlhs),
3848 gimple_assign_rhs2 (binrhs));
3849 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3850 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3851 gimple_set_uid (binrhs, gimple_uid (stmt));
3852
3853 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3854 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3855
3856 if (dump_file && (dump_flags & TDF_DETAILS))
3857 {
3858 fprintf (dump_file, "Linearized: ");
3859 print_gimple_stmt (dump_file, stmt, 0, 0);
3860 }
3861
3862 reassociate_stats.linearized++;
3863 update_stmt (stmt);
3864
3865 gsi = gsi_for_stmt (oldbinrhs);
3866 reassoc_remove_stmt (&gsi);
3867 release_defs (oldbinrhs);
3868
3869 gimple_set_visited (stmt, true);
3870 gimple_set_visited (binlhs, true);
3871 gimple_set_visited (binrhs, true);
3872
3873 /* Tail recurse on the new rhs if it still needs reassociation. */
3874 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3875 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3876 want to change the algorithm while converting to tuples. */
3877 linearize_expr (stmt);
3878}
3879
3880/* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3881 it. Otherwise, return NULL. */
3882
3883static gimple
3884get_single_immediate_use (tree lhs)
3885{
3886 use_operand_p immuse;
3887 gimple immusestmt;
3888
3889 if (TREE_CODE (lhs) == SSA_NAME
3890 && single_imm_use (lhs, &immuse, &immusestmt)
3891 && is_gimple_assign (immusestmt))
3892 return immusestmt;
3893
3894 return NULL;
3895}
3896
3897/* Recursively negate the value of TONEGATE, and return the SSA_NAME
3898 representing the negated value. Insertions of any necessary
3899 instructions go before GSI.
3900 This function is recursive in that, if you hand it "a_5" as the
3901 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3902 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3903
3904static tree
3905negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3906{
3907 gimple negatedefstmt = NULL;
3908 tree resultofnegate;
3909 gimple_stmt_iterator gsi;
3910 unsigned int uid;
3911
3912 /* If we are trying to negate a name, defined by an add, negate the
3913 add operands instead. */
3914 if (TREE_CODE (tonegate) == SSA_NAME)
3915 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3916 if (TREE_CODE (tonegate) == SSA_NAME
3917 && is_gimple_assign (negatedefstmt)
3918 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3919 && has_single_use (gimple_assign_lhs (negatedefstmt))
3920 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3921 {
3922 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3923 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3924 tree lhs = gimple_assign_lhs (negatedefstmt);
3925 gimple g;
3926
3927 gsi = gsi_for_stmt (negatedefstmt);
3928 rhs1 = negate_value (rhs1, &gsi);
3929
3930 gsi = gsi_for_stmt (negatedefstmt);
3931 rhs2 = negate_value (rhs2, &gsi);
3932
3933 gsi = gsi_for_stmt (negatedefstmt);
3934 lhs = make_ssa_name (TREE_TYPE (lhs));
3935 gimple_set_visited (negatedefstmt, true);
3936 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3937 gimple_set_uid (g, gimple_uid (negatedefstmt));
3938 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3939 return lhs;
3940 }
3941
3942 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3943 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3944 NULL_TREE, true, GSI_SAME_STMT);
3945 gsi = *gsip;
3946 uid = gimple_uid (gsi_stmt (gsi));
3947 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3948 {
3949 gimple stmt = gsi_stmt (gsi);
3950 if (gimple_uid (stmt) != 0)
3951 break;
3952 gimple_set_uid (stmt, uid);
3953 }
3954 return resultofnegate;
3955}
3956
3957/* Return true if we should break up the subtract in STMT into an add
3958 with negate. This is true when we the subtract operands are really
3959 adds, or the subtract itself is used in an add expression. In
3960 either case, breaking up the subtract into an add with negate
3961 exposes the adds to reassociation. */
3962
3963static bool
3964should_break_up_subtract (gimple stmt)
3965{
3966 tree lhs = gimple_assign_lhs (stmt);
3967 tree binlhs = gimple_assign_rhs1 (stmt);
3968 tree binrhs = gimple_assign_rhs2 (stmt);
3969 gimple immusestmt;
3970 struct loop *loop = loop_containing_stmt (stmt);
3971
3972 if (TREE_CODE (binlhs) == SSA_NAME
3973 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3974 return true;
3975
3976 if (TREE_CODE (binrhs) == SSA_NAME
3977 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3978 return true;
3979
3980 if (TREE_CODE (lhs) == SSA_NAME
3981 && (immusestmt = get_single_immediate_use (lhs))
3982 && is_gimple_assign (immusestmt)
3983 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3984 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3985 return true;
3986 return false;
3987}
3988
3989/* Transform STMT from A - B into A + -B. */
3990
3991static void
3992break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3993{
3994 tree rhs1 = gimple_assign_rhs1 (stmt);
3995 tree rhs2 = gimple_assign_rhs2 (stmt);
3996
3997 if (dump_file && (dump_flags & TDF_DETAILS))
3998 {
3999 fprintf (dump_file, "Breaking up subtract ");
4000 print_gimple_stmt (dump_file, stmt, 0, 0);
4001 }
4002
4003 rhs2 = negate_value (rhs2, gsip);
4004 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4005 update_stmt (stmt);
4006}
4007
4008/* Determine whether STMT is a builtin call that raises an SSA name
4009 to an integer power and has only one use. If so, and this is early
4010 reassociation and unsafe math optimizations are permitted, place
4011 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4012 If any of these conditions does not hold, return FALSE. */
4013
4014static bool
4015acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
4016{
4017 tree fndecl, arg1;
4018 REAL_VALUE_TYPE c, cint;
4019
4020 if (!first_pass_instance
4021 || !flag_unsafe_math_optimizations
4022 || !is_gimple_call (stmt)
4023 || !has_single_use (gimple_call_lhs (stmt)))
4024 return false;
4025
4026 fndecl = gimple_call_fndecl (stmt);
4027
4028 if (!fndecl
4029 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
4030 return false;
4031
4032 switch (DECL_FUNCTION_CODE (fndecl))
4033 {
4034 CASE_FLT_FN (BUILT_IN_POW):
4035 if (flag_errno_math)
4036 return false;
4037
4038 *base = gimple_call_arg (stmt, 0);
4039 arg1 = gimple_call_arg (stmt, 1);
4040
4041 if (TREE_CODE (arg1) != REAL_CST)
4042 return false;
4043
4044 c = TREE_REAL_CST (arg1);
4045
4046 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4047 return false;
4048
4049 *exponent = real_to_integer (&c);
4050 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4051 if (!real_identical (&c, &cint))
4052 return false;
4053
4054 break;
4055
4056 CASE_FLT_FN (BUILT_IN_POWI):
4057 *base = gimple_call_arg (stmt, 0);
4058 arg1 = gimple_call_arg (stmt, 1);
4059
4060 if (!tree_fits_shwi_p (arg1))
4061 return false;
4062
4063 *exponent = tree_to_shwi (arg1);
4064 break;
4065
4066 default:
4067 return false;
4068 }
4069
4070 /* Expanding negative exponents is generally unproductive, so we don't
4071 complicate matters with those. Exponents of zero and one should
4072 have been handled by expression folding. */
4073 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4074 return false;
4075
4076 return true;
4077}
4078
4079/* Recursively linearize a binary expression that is the RHS of STMT.
4080 Place the operands of the expression tree in the vector named OPS. */
4081
4082static void
4083linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4084 bool is_associative, bool set_visited)
4085{
4086 tree binlhs = gimple_assign_rhs1 (stmt);
4087 tree binrhs = gimple_assign_rhs2 (stmt);
4088 gimple binlhsdef = NULL, binrhsdef = NULL;
4089 bool binlhsisreassoc = false;
4090 bool binrhsisreassoc = false;
4091 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4092 struct loop *loop = loop_containing_stmt (stmt);
4093 tree base = NULL_TREE;
4094 HOST_WIDE_INT exponent = 0;
4095
4096 if (set_visited)
4097 gimple_set_visited (stmt, true);
4098
4099 if (TREE_CODE (binlhs) == SSA_NAME)
4100 {
4101 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4102 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4103 && !stmt_could_throw_p (binlhsdef));
4104 }
4105
4106 if (TREE_CODE (binrhs) == SSA_NAME)
4107 {
4108 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4109 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4110 && !stmt_could_throw_p (binrhsdef));
4111 }
4112
4113 /* If the LHS is not reassociable, but the RHS is, we need to swap
4114 them. If neither is reassociable, there is nothing we can do, so
4115 just put them in the ops vector. If the LHS is reassociable,
4116 linearize it. If both are reassociable, then linearize the RHS
4117 and the LHS. */
4118
4119 if (!binlhsisreassoc)
4120 {
4121 tree temp;
4122
4123 /* If this is not a associative operation like division, give up. */
4124 if (!is_associative)
4125 {
4126 add_to_ops_vec (ops, binrhs);
4127 return;
4128 }
4129
4130 if (!binrhsisreassoc)
4131 {
4132 if (rhscode == MULT_EXPR
4133 && TREE_CODE (binrhs) == SSA_NAME
4134 && acceptable_pow_call (binrhsdef, &base, &exponent))
4135 {
4136 add_repeat_to_ops_vec (ops, base, exponent);
4137 gimple_set_visited (binrhsdef, true);
4138 }
4139 else
4140 add_to_ops_vec (ops, binrhs);
4141
4142 if (rhscode == MULT_EXPR
4143 && TREE_CODE (binlhs) == SSA_NAME
4144 && acceptable_pow_call (binlhsdef, &base, &exponent))
4145 {
4146 add_repeat_to_ops_vec (ops, base, exponent);
4147 gimple_set_visited (binlhsdef, true);
4148 }
4149 else
4150 add_to_ops_vec (ops, binlhs);
4151
4152 return;
4153 }
4154
4155 if (dump_file && (dump_flags & TDF_DETAILS))
4156 {
4157 fprintf (dump_file, "swapping operands of ");
4158 print_gimple_stmt (dump_file, stmt, 0, 0);
4159 }
4160
4161 swap_ssa_operands (stmt,
4162 gimple_assign_rhs1_ptr (stmt),
4163 gimple_assign_rhs2_ptr (stmt));
4164 update_stmt (stmt);
4165
4166 if (dump_file && (dump_flags & TDF_DETAILS))
4167 {
4168 fprintf (dump_file, " is now ");
4169 print_gimple_stmt (dump_file, stmt, 0, 0);
4170 }
4171
4172 /* We want to make it so the lhs is always the reassociative op,
4173 so swap. */
4174 temp = binlhs;
4175 binlhs = binrhs;
4176 binrhs = temp;
4177 }
4178 else if (binrhsisreassoc)
4179 {
4180 linearize_expr (stmt);
4181 binlhs = gimple_assign_rhs1 (stmt);
4182 binrhs = gimple_assign_rhs2 (stmt);
4183 }
4184
4185 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4186 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4187 rhscode, loop));
4188 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4189 is_associative, set_visited);
4190
4191 if (rhscode == MULT_EXPR
4192 && TREE_CODE (binrhs) == SSA_NAME
4193 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4194 {
4195 add_repeat_to_ops_vec (ops, base, exponent);
4196 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4197 }
4198 else
4199 add_to_ops_vec (ops, binrhs);
4200}
4201
4202/* Repropagate the negates back into subtracts, since no other pass
4203 currently does it. */
4204
4205static void
4206repropagate_negates (void)
4207{
4208 unsigned int i = 0;
4209 tree negate;
4210
4211 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4212 {
4213 gimple user = get_single_immediate_use (negate);
4214
4215 if (!user || !is_gimple_assign (user))
4216 continue;
4217
4218 /* The negate operand can be either operand of a PLUS_EXPR
4219 (it can be the LHS if the RHS is a constant for example).
4220
4221 Force the negate operand to the RHS of the PLUS_EXPR, then
4222 transform the PLUS_EXPR into a MINUS_EXPR. */
4223 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4224 {
4225 /* If the negated operand appears on the LHS of the
4226 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4227 to force the negated operand to the RHS of the PLUS_EXPR. */
4228 if (gimple_assign_rhs1 (user) == negate)
4229 {
4230 swap_ssa_operands (user,
4231 gimple_assign_rhs1_ptr (user),
4232 gimple_assign_rhs2_ptr (user));
4233 }
4234
4235 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4236 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4237 if (gimple_assign_rhs2 (user) == negate)
4238 {
4239 tree rhs1 = gimple_assign_rhs1 (user);
4240 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4241 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4242 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4243 update_stmt (user);
4244 }
4245 }
4246 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4247 {
4248 if (gimple_assign_rhs1 (user) == negate)
4249 {
4250 /* We have
4251 x = -a
4252 y = x - b
4253 which we transform into
4254 x = a + b
4255 y = -x .
4256 This pushes down the negate which we possibly can merge
4257 into some other operation, hence insert it into the
4258 plus_negates vector. */
4259 gimple feed = SSA_NAME_DEF_STMT (negate);
4260 tree a = gimple_assign_rhs1 (feed);
4261 tree b = gimple_assign_rhs2 (user);
4262 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4263 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4264 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4265 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4266 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4267 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4268 user = gsi_stmt (gsi2);
4269 update_stmt (user);
4270 reassoc_remove_stmt (&gsi);
4271 release_defs (feed);
4272 plus_negates.safe_push (gimple_assign_lhs (user));
4273 }
4274 else
4275 {
4276 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4277 rid of one operation. */
4278 gimple feed = SSA_NAME_DEF_STMT (negate);
4279 tree a = gimple_assign_rhs1 (feed);
4280 tree rhs1 = gimple_assign_rhs1 (user);
4281 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4282 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4283 update_stmt (gsi_stmt (gsi));
4284 }
4285 }
4286 }
4287}
4288
4289/* Returns true if OP is of a type for which we can do reassociation.
4290 That is for integral or non-saturating fixed-point types, and for
4291 floating point type when associative-math is enabled. */
4292
4293static bool
4294can_reassociate_p (tree op)
4295{
4296 tree type = TREE_TYPE (op);
4297 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4298 || NON_SAT_FIXED_POINT_TYPE_P (type)
4299 || (flag_associative_math && FLOAT_TYPE_P (type)))
4300 return true;
4301 return false;
4302}
4303
4304/* Break up subtract operations in block BB.
4305
4306 We do this top down because we don't know whether the subtract is
4307 part of a possible chain of reassociation except at the top.
4308
4309 IE given
4310 d = f + g
4311 c = a + e
4312 b = c - d
4313 q = b - r
4314 k = t - q
4315
4316 we want to break up k = t - q, but we won't until we've transformed q
4317 = b - r, which won't be broken up until we transform b = c - d.
4318
4319 En passant, clear the GIMPLE visited flag on every statement
4320 and set UIDs within each basic block. */
4321
4322static void
4323break_up_subtract_bb (basic_block bb)
4324{
4325 gimple_stmt_iterator gsi;
4326 basic_block son;
4327 unsigned int uid = 1;
4328
4329 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4330 {
4331 gimple stmt = gsi_stmt (gsi);
4332 gimple_set_visited (stmt, false);
4333 gimple_set_uid (stmt, uid++);
4334
4335 if (!is_gimple_assign (stmt)
4336 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4337 continue;
4338
4339 /* Look for simple gimple subtract operations. */
4340 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4341 {
4342 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4343 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4344 continue;
4345
4346 /* Check for a subtract used only in an addition. If this
4347 is the case, transform it into add of a negate for better
4348 reassociation. IE transform C = A-B into C = A + -B if C
4349 is only used in an addition. */
4350 if (should_break_up_subtract (stmt))
4351 break_up_subtract (stmt, &gsi);
4352 }
4353 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4354 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4355 plus_negates.safe_push (gimple_assign_lhs (stmt));
4356 }
4357 for (son = first_dom_son (CDI_DOMINATORS, bb);
4358 son;
4359 son = next_dom_son (CDI_DOMINATORS, son))
4360 break_up_subtract_bb (son);
4361}
4362
4363/* Used for repeated factor analysis. */
4364struct repeat_factor_d
4365{
4366 /* An SSA name that occurs in a multiply chain. */
4367 tree factor;
4368
4369 /* Cached rank of the factor. */
4370 unsigned rank;
4371
4372 /* Number of occurrences of the factor in the chain. */
4373 HOST_WIDE_INT count;
4374
4375 /* An SSA name representing the product of this factor and
4376 all factors appearing later in the repeated factor vector. */
4377 tree repr;
4378};
4379
4380typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4381typedef const struct repeat_factor_d *const_repeat_factor_t;
4382
4383
4384static vec<repeat_factor> repeat_factor_vec;
4385
4386/* Used for sorting the repeat factor vector. Sort primarily by
4387 ascending occurrence count, secondarily by descending rank. */
4388
4389static int
4390compare_repeat_factors (const void *x1, const void *x2)
4391{
4392 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4393 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4394
4395 if (rf1->count != rf2->count)
4396 return rf1->count - rf2->count;
4397
4398 return rf2->rank - rf1->rank;
4399}
4400
4401/* Look for repeated operands in OPS in the multiply tree rooted at
4402 STMT. Replace them with an optimal sequence of multiplies and powi
4403 builtin calls, and remove the used operands from OPS. Return an
4404 SSA name representing the value of the replacement sequence. */
4405
4406static tree
4407attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4408{
4409 unsigned i, j, vec_len;
4410 int ii;
4411 operand_entry_t oe;
4412 repeat_factor_t rf1, rf2;
4413 repeat_factor rfnew;
4414 tree result = NULL_TREE;
4415 tree target_ssa, iter_result;
4416 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4417 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4418 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4419 gimple mul_stmt, pow_stmt;
4420
4421 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4422 target. */
4423 if (!powi_fndecl)
4424 return NULL_TREE;
4425
4426 /* Allocate the repeated factor vector. */
4427 repeat_factor_vec.create (10);
4428
4429 /* Scan the OPS vector for all SSA names in the product and build
4430 up a vector of occurrence counts for each factor. */
4431 FOR_EACH_VEC_ELT (*ops, i, oe)
4432 {
4433 if (TREE_CODE (oe->op) == SSA_NAME)
4434 {
4435 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4436 {
4437 if (rf1->factor == oe->op)
4438 {
4439 rf1->count += oe->count;
4440 break;
4441 }
4442 }
4443
4444 if (j >= repeat_factor_vec.length ())
4445 {
4446 rfnew.factor = oe->op;
4447 rfnew.rank = oe->rank;
4448 rfnew.count = oe->count;
4449 rfnew.repr = NULL_TREE;
4450 repeat_factor_vec.safe_push (rfnew);
4451 }
4452 }
4453 }
4454
4455 /* Sort the repeated factor vector by (a) increasing occurrence count,
4456 and (b) decreasing rank. */
4457 repeat_factor_vec.qsort (compare_repeat_factors);
4458
4459 /* It is generally best to combine as many base factors as possible
4460 into a product before applying __builtin_powi to the result.
4461 However, the sort order chosen for the repeated factor vector
4462 allows us to cache partial results for the product of the base
4463 factors for subsequent use. When we already have a cached partial
4464 result from a previous iteration, it is best to make use of it
4465 before looking for another __builtin_pow opportunity.
4466
4467 As an example, consider x * x * y * y * y * z * z * z * z.
4468 We want to first compose the product x * y * z, raise it to the
4469 second power, then multiply this by y * z, and finally multiply
4470 by z. This can be done in 5 multiplies provided we cache y * z
4471 for use in both expressions:
4472
4473 t1 = y * z
4474 t2 = t1 * x
4475 t3 = t2 * t2
4476 t4 = t1 * t3
4477 result = t4 * z
4478
4479 If we instead ignored the cached y * z and first multiplied by
4480 the __builtin_pow opportunity z * z, we would get the inferior:
4481
4482 t1 = y * z
4483 t2 = t1 * x
4484 t3 = t2 * t2
4485 t4 = z * z
4486 t5 = t3 * t4
4487 result = t5 * y */
4488
4489 vec_len = repeat_factor_vec.length ();
4490
4491 /* Repeatedly look for opportunities to create a builtin_powi call. */
4492 while (true)
4493 {
4494 HOST_WIDE_INT power;
4495
4496 /* First look for the largest cached product of factors from
4497 preceding iterations. If found, create a builtin_powi for
4498 it if the minimum occurrence count for its factors is at
4499 least 2, or just use this cached product as our next
4500 multiplicand if the minimum occurrence count is 1. */
4501 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4502 {
4503 if (rf1->repr && rf1->count > 0)
4504 break;
4505 }
4506
4507 if (j < vec_len)
4508 {
4509 power = rf1->count;
4510
4511 if (power == 1)
4512 {
4513 iter_result = rf1->repr;
4514
4515 if (dump_file && (dump_flags & TDF_DETAILS))
4516 {
4517 unsigned elt;
4518 repeat_factor_t rf;
4519 fputs ("Multiplying by cached product ", dump_file);
4520 for (elt = j; elt < vec_len; elt++)
4521 {
4522 rf = &repeat_factor_vec[elt];
4523 print_generic_expr (dump_file, rf->factor, 0);
4524 if (elt < vec_len - 1)
4525 fputs (" * ", dump_file);
4526 }
4527 fputs ("\n", dump_file);
4528 }
4529 }
4530 else
4531 {
4532 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4533 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4534 build_int_cst (integer_type_node,
4535 power));
4536 gimple_call_set_lhs (pow_stmt, iter_result);
4537 gimple_set_location (pow_stmt, gimple_location (stmt));
7d4d0543 4538 gimple_set_uid (pow_stmt, gimple_uid (stmt));
dda118e3
JM
4539 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4540
4541 if (dump_file && (dump_flags & TDF_DETAILS))
4542 {
4543 unsigned elt;
4544 repeat_factor_t rf;
4545 fputs ("Building __builtin_pow call for cached product (",
4546 dump_file);
4547 for (elt = j; elt < vec_len; elt++)
4548 {
4549 rf = &repeat_factor_vec[elt];
4550 print_generic_expr (dump_file, rf->factor, 0);
4551 if (elt < vec_len - 1)
4552 fputs (" * ", dump_file);
4553 }
4554 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4555 power);
4556 }
4557 }
4558 }
4559 else
4560 {
4561 /* Otherwise, find the first factor in the repeated factor
4562 vector whose occurrence count is at least 2. If no such
4563 factor exists, there are no builtin_powi opportunities
4564 remaining. */
4565 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4566 {
4567 if (rf1->count >= 2)
4568 break;
4569 }
4570
4571 if (j >= vec_len)
4572 break;
4573
4574 power = rf1->count;
4575
4576 if (dump_file && (dump_flags & TDF_DETAILS))
4577 {
4578 unsigned elt;
4579 repeat_factor_t rf;
4580 fputs ("Building __builtin_pow call for (", dump_file);
4581 for (elt = j; elt < vec_len; elt++)
4582 {
4583 rf = &repeat_factor_vec[elt];
4584 print_generic_expr (dump_file, rf->factor, 0);
4585 if (elt < vec_len - 1)
4586 fputs (" * ", dump_file);
4587 }
4588 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4589 }
4590
4591 reassociate_stats.pows_created++;
4592
4593 /* Visit each element of the vector in reverse order (so that
4594 high-occurrence elements are visited first, and within the
4595 same occurrence count, lower-ranked elements are visited
4596 first). Form a linear product of all elements in this order
4597 whose occurrencce count is at least that of element J.
4598 Record the SSA name representing the product of each element
4599 with all subsequent elements in the vector. */
4600 if (j == vec_len - 1)
4601 rf1->repr = rf1->factor;
4602 else
4603 {
4604 for (ii = vec_len - 2; ii >= (int)j; ii--)
4605 {
4606 tree op1, op2;
4607
4608 rf1 = &repeat_factor_vec[ii];
4609 rf2 = &repeat_factor_vec[ii + 1];
4610
4611 /* Init the last factor's representative to be itself. */
4612 if (!rf2->repr)
4613 rf2->repr = rf2->factor;
4614
4615 op1 = rf1->factor;
4616 op2 = rf2->repr;
4617
4618 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4619 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4620 op1, op2);
4621 gimple_set_location (mul_stmt, gimple_location (stmt));
7d4d0543 4622 gimple_set_uid (mul_stmt, gimple_uid (stmt));
dda118e3
JM
4623 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4624 rf1->repr = target_ssa;
4625
4626 /* Don't reprocess the multiply we just introduced. */
4627 gimple_set_visited (mul_stmt, true);
4628 }
4629 }
4630
4631 /* Form a call to __builtin_powi for the maximum product
4632 just formed, raised to the power obtained earlier. */
4633 rf1 = &repeat_factor_vec[j];
4634 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4635 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4636 build_int_cst (integer_type_node,
4637 power));
4638 gimple_call_set_lhs (pow_stmt, iter_result);
4639 gimple_set_location (pow_stmt, gimple_location (stmt));
7d4d0543 4640 gimple_set_uid (pow_stmt, gimple_uid (stmt));
dda118e3
JM
4641 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4642 }
4643
4644 /* If we previously formed at least one other builtin_powi call,
4645 form the product of this one and those others. */
4646 if (result)
4647 {
4648 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4649 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4650 result, iter_result);
4651 gimple_set_location (mul_stmt, gimple_location (stmt));
7d4d0543 4652 gimple_set_uid (mul_stmt, gimple_uid (stmt));
dda118e3
JM
4653 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4654 gimple_set_visited (mul_stmt, true);
4655 result = new_result;
4656 }
4657 else
4658 result = iter_result;
4659
4660 /* Decrement the occurrence count of each element in the product
4661 by the count found above, and remove this many copies of each
4662 factor from OPS. */
4663 for (i = j; i < vec_len; i++)
4664 {
4665 unsigned k = power;
4666 unsigned n;
4667
4668 rf1 = &repeat_factor_vec[i];
4669 rf1->count -= power;
4670
4671 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4672 {
4673 if (oe->op == rf1->factor)
4674 {
4675 if (oe->count <= k)
4676 {
4677 ops->ordered_remove (n);
4678 k -= oe->count;
4679
4680 if (k == 0)
4681 break;
4682 }
4683 else
4684 {
4685 oe->count -= k;
4686 break;
4687 }
4688 }
4689 }
4690 }
4691 }
4692
4693 /* At this point all elements in the repeated factor vector have a
4694 remaining occurrence count of 0 or 1, and those with a count of 1
4695 don't have cached representatives. Re-sort the ops vector and
4696 clean up. */
4697 ops->qsort (sort_by_operand_rank);
4698 repeat_factor_vec.release ();
4699
4700 /* Return the final product computed herein. Note that there may
4701 still be some elements with single occurrence count left in OPS;
4702 those will be handled by the normal reassociation logic. */
4703 return result;
4704}
4705
4706/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4707
4708static void
4709transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4710{
4711 tree rhs1;
4712
4713 if (dump_file && (dump_flags & TDF_DETAILS))
4714 {
4715 fprintf (dump_file, "Transforming ");
4716 print_gimple_stmt (dump_file, stmt, 0, 0);
4717 }
4718
4719 rhs1 = gimple_assign_rhs1 (stmt);
4720 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4721 update_stmt (stmt);
4722 remove_visited_stmt_chain (rhs1);
4723
4724 if (dump_file && (dump_flags & TDF_DETAILS))
4725 {
4726 fprintf (dump_file, " into ");
4727 print_gimple_stmt (dump_file, stmt, 0, 0);
4728 }
4729}
4730
4731/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4732
4733static void
4734transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4735 tree rhs1, tree rhs2)
4736{
4737 if (dump_file && (dump_flags & TDF_DETAILS))
4738 {
4739 fprintf (dump_file, "Transforming ");
4740 print_gimple_stmt (dump_file, stmt, 0, 0);
4741 }
4742
4743 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4744 update_stmt (gsi_stmt (*gsi));
4745 remove_visited_stmt_chain (rhs1);
4746
4747 if (dump_file && (dump_flags & TDF_DETAILS))
4748 {
4749 fprintf (dump_file, " into ");
4750 print_gimple_stmt (dump_file, stmt, 0, 0);
4751 }
4752}
4753
4754/* Reassociate expressions in basic block BB and its post-dominator as
4755 children. */
4756
4757static void
4758reassociate_bb (basic_block bb)
4759{
4760 gimple_stmt_iterator gsi;
4761 basic_block son;
4762 gimple stmt = last_stmt (bb);
4763
4764 if (stmt && !gimple_visited_p (stmt))
4765 maybe_optimize_range_tests (stmt);
4766
4767 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4768 {
4769 stmt = gsi_stmt (gsi);
4770
4771 if (is_gimple_assign (stmt)
4772 && !stmt_could_throw_p (stmt))
4773 {
4774 tree lhs, rhs1, rhs2;
4775 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4776
4777 /* If this is not a gimple binary expression, there is
4778 nothing for us to do with it. */
4779 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4780 continue;
4781
4782 /* If this was part of an already processed statement,
4783 we don't need to touch it again. */
4784 if (gimple_visited_p (stmt))
4785 {
4786 /* This statement might have become dead because of previous
4787 reassociations. */
4788 if (has_zero_uses (gimple_get_lhs (stmt)))
4789 {
4790 reassoc_remove_stmt (&gsi);
4791 release_defs (stmt);
4792 /* We might end up removing the last stmt above which
4793 places the iterator to the end of the sequence.
4794 Reset it to the last stmt in this case which might
4795 be the end of the sequence as well if we removed
4796 the last statement of the sequence. In which case
4797 we need to bail out. */
4798 if (gsi_end_p (gsi))
4799 {
4800 gsi = gsi_last_bb (bb);
4801 if (gsi_end_p (gsi))
4802 break;
4803 }
4804 }
4805 continue;
4806 }
4807
4808 lhs = gimple_assign_lhs (stmt);
4809 rhs1 = gimple_assign_rhs1 (stmt);
4810 rhs2 = gimple_assign_rhs2 (stmt);
4811
4812 /* For non-bit or min/max operations we can't associate
4813 all types. Verify that here. */
4814 if (rhs_code != BIT_IOR_EXPR
4815 && rhs_code != BIT_AND_EXPR
4816 && rhs_code != BIT_XOR_EXPR
4817 && rhs_code != MIN_EXPR
4818 && rhs_code != MAX_EXPR
4819 && (!can_reassociate_p (lhs)
4820 || !can_reassociate_p (rhs1)
4821 || !can_reassociate_p (rhs2)))
4822 continue;
4823
4824 if (associative_tree_code (rhs_code))
4825 {
4826 auto_vec<operand_entry_t> ops;
4827 tree powi_result = NULL_TREE;
4828
4829 /* There may be no immediate uses left by the time we
4830 get here because we may have eliminated them all. */
4831 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4832 continue;
4833
4834 gimple_set_visited (stmt, true);
4835 linearize_expr_tree (&ops, stmt, true, true);
4836 ops.qsort (sort_by_operand_rank);
4837 optimize_ops_list (rhs_code, &ops);
4838 if (undistribute_ops_list (rhs_code, &ops,
4839 loop_containing_stmt (stmt)))
4840 {
4841 ops.qsort (sort_by_operand_rank);
4842 optimize_ops_list (rhs_code, &ops);
4843 }
4844
4845 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4846 optimize_range_tests (rhs_code, &ops);
4847
4848 if (first_pass_instance
4849 && rhs_code == MULT_EXPR
4850 && flag_unsafe_math_optimizations)
4851 powi_result = attempt_builtin_powi (stmt, &ops);
4852
4853 /* If the operand vector is now empty, all operands were
4854 consumed by the __builtin_powi optimization. */
4855 if (ops.length () == 0)
4856 transform_stmt_to_copy (&gsi, stmt, powi_result);
4857 else if (ops.length () == 1)
4858 {
4859 tree last_op = ops.last ()->op;
4860
4861 if (powi_result)
4862 transform_stmt_to_multiply (&gsi, stmt, last_op,
4863 powi_result);
4864 else
4865 transform_stmt_to_copy (&gsi, stmt, last_op);
4866 }
4867 else
4868 {
4869 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4870 int ops_num = ops.length ();
4871 int width = get_reassociation_width (ops_num, rhs_code, mode);
4872 tree new_lhs = lhs;
4873
4874 if (dump_file && (dump_flags & TDF_DETAILS))
4875 fprintf (dump_file,
4876 "Width = %d was chosen for reassociation\n", width);
4877
4878 if (width > 1
4879 && ops.length () > 3)
4880 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4881 width, ops);
4882 else
4883 {
4884 /* When there are three operands left, we want
4885 to make sure the ones that get the double
4886 binary op are chosen wisely. */
4887 int len = ops.length ();
4888 if (len >= 3)
4889 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4890
4891 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4892 powi_result != NULL);
4893 }
4894
4895 /* If we combined some repeated factors into a
4896 __builtin_powi call, multiply that result by the
4897 reassociated operands. */
4898 if (powi_result)
4899 {
4900 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4901 tree type = TREE_TYPE (lhs);
4902 tree target_ssa = make_temp_ssa_name (type, NULL,
4903 "reassocpow");
4904 gimple_set_lhs (lhs_stmt, target_ssa);
4905 update_stmt (lhs_stmt);
4906 if (lhs != new_lhs)
4907 target_ssa = new_lhs;
4908 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4909 powi_result, target_ssa);
4910 gimple_set_location (mul_stmt, gimple_location (stmt));
7d4d0543 4911 gimple_set_uid (mul_stmt, gimple_uid (stmt));
dda118e3
JM
4912 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4913 }
4914 }
4915 }
4916 }
4917 }
4918 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4919 son;
4920 son = next_dom_son (CDI_POST_DOMINATORS, son))
4921 reassociate_bb (son);
4922}
4923
4924/* Add jumps around shifts for range tests turned into bit tests.
4925 For each SSA_NAME VAR we have code like:
4926 VAR = ...; // final stmt of range comparison
4927 // bit test here...;
4928 OTHERVAR = ...; // final stmt of the bit test sequence
4929 RES = VAR | OTHERVAR;
4930 Turn the above into:
4931 VAR = ...;
4932 if (VAR != 0)
4933 goto <l3>;
4934 else
4935 goto <l2>;
4936 <l2>:
4937 // bit test here...;
4938 OTHERVAR = ...;
4939 <l3>:
4940 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4941
4942static void
4943branch_fixup (void)
4944{
4945 tree var;
4946 unsigned int i;
4947
4948 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4949 {
4950 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4951 gimple use_stmt;
4952 use_operand_p use;
4953 bool ok = single_imm_use (var, &use, &use_stmt);
4954 gcc_assert (ok
4955 && is_gimple_assign (use_stmt)
4956 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4957 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4958
4959 basic_block cond_bb = gimple_bb (def_stmt);
4960 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4961 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4962
4963 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4964 gimple g = gimple_build_cond (NE_EXPR, var,
4965 build_zero_cst (TREE_TYPE (var)),
4966 NULL_TREE, NULL_TREE);
4967 location_t loc = gimple_location (use_stmt);
4968 gimple_set_location (g, loc);
4969 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4970
4971 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4972 etrue->probability = REG_BR_PROB_BASE / 2;
4973 etrue->count = cond_bb->count / 2;
4974 edge efalse = find_edge (cond_bb, then_bb);
4975 efalse->flags = EDGE_FALSE_VALUE;
4976 efalse->probability -= etrue->probability;
4977 efalse->count -= etrue->count;
4978 then_bb->count -= etrue->count;
4979
4980 tree othervar = NULL_TREE;
4981 if (gimple_assign_rhs1 (use_stmt) == var)
4982 othervar = gimple_assign_rhs2 (use_stmt);
4983 else if (gimple_assign_rhs2 (use_stmt) == var)
4984 othervar = gimple_assign_rhs1 (use_stmt);
4985 else
4986 gcc_unreachable ();
4987 tree lhs = gimple_assign_lhs (use_stmt);
4988 gphi *phi = create_phi_node (lhs, merge_bb);
4989 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4990 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4991 gsi = gsi_for_stmt (use_stmt);
4992 gsi_remove (&gsi, true);
4993
4994 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4995 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4996 }
4997 reassoc_branch_fixups.release ();
4998}
4999
5000void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
5001void debug_ops_vector (vec<operand_entry_t> ops);
5002
5003/* Dump the operand entry vector OPS to FILE. */
5004
5005void
5006dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
5007{
5008 operand_entry_t oe;
5009 unsigned int i;
5010
5011 FOR_EACH_VEC_ELT (ops, i, oe)
5012 {
5013 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5014 print_generic_expr (file, oe->op, 0);
5015 }
5016}
5017
5018/* Dump the operand entry vector OPS to STDERR. */
5019
5020DEBUG_FUNCTION void
5021debug_ops_vector (vec<operand_entry_t> ops)
5022{
5023 dump_ops_vector (stderr, ops);
5024}
5025
5026static void
5027do_reassoc (void)
5028{
5029 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5030 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5031}
5032
5033/* Initialize the reassociation pass. */
5034
5035static void
5036init_reassoc (void)
5037{
5038 int i;
5039 long rank = 2;
5040 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5041
5042 /* Find the loops, so that we can prevent moving calculations in
5043 them. */
5044 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5045
5046 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5047
5048 operand_entry_pool = create_alloc_pool ("operand entry pool",
5049 sizeof (struct operand_entry), 30);
5050 next_operand_entry_id = 0;
5051
5052 /* Reverse RPO (Reverse Post Order) will give us something where
5053 deeper loops come later. */
5054 pre_and_rev_post_order_compute (NULL, bbs, false);
5055 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5056 operand_rank = new hash_map<tree, long>;
5057
5058 /* Give each default definition a distinct rank. This includes
5059 parameters and the static chain. Walk backwards over all
5060 SSA names so that we get proper rank ordering according
5061 to tree_swap_operands_p. */
5062 for (i = num_ssa_names - 1; i > 0; --i)
5063 {
5064 tree name = ssa_name (i);
5065 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5066 insert_operand_rank (name, ++rank);
5067 }
5068
5069 /* Set up rank for each BB */
5070 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5071 bb_rank[bbs[i]] = ++rank << 16;
5072
5073 free (bbs);
5074 calculate_dominance_info (CDI_POST_DOMINATORS);
5075 plus_negates = vNULL;
5076}
5077
5078/* Cleanup after the reassociation pass, and print stats if
5079 requested. */
5080
5081static void
5082fini_reassoc (void)
5083{
5084 statistics_counter_event (cfun, "Linearized",
5085 reassociate_stats.linearized);
5086 statistics_counter_event (cfun, "Constants eliminated",
5087 reassociate_stats.constants_eliminated);
5088 statistics_counter_event (cfun, "Ops eliminated",
5089 reassociate_stats.ops_eliminated);
5090 statistics_counter_event (cfun, "Statements rewritten",
5091 reassociate_stats.rewritten);
5092 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5093 reassociate_stats.pows_encountered);
5094 statistics_counter_event (cfun, "Built-in powi calls created",
5095 reassociate_stats.pows_created);
5096
5097 delete operand_rank;
5098 free_alloc_pool (operand_entry_pool);
5099 free (bb_rank);
5100 plus_negates.release ();
5101 free_dominance_info (CDI_POST_DOMINATORS);
5102 loop_optimizer_finalize ();
5103}
5104
5105/* Gate and execute functions for Reassociation. */
5106
5107static unsigned int
5108execute_reassoc (void)
5109{
5110 init_reassoc ();
5111
5112 do_reassoc ();
5113 repropagate_negates ();
5114 branch_fixup ();
5115
5116 fini_reassoc ();
5117 return 0;
5118}
5119
5120namespace {
5121
5122const pass_data pass_data_reassoc =
5123{
5124 GIMPLE_PASS, /* type */
5125 "reassoc", /* name */
5126 OPTGROUP_NONE, /* optinfo_flags */
5127 TV_TREE_REASSOC, /* tv_id */
5128 ( PROP_cfg | PROP_ssa ), /* properties_required */
5129 0, /* properties_provided */
5130 0, /* properties_destroyed */
5131 0, /* todo_flags_start */
5132 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5133};
5134
5135class pass_reassoc : public gimple_opt_pass
5136{
5137public:
5138 pass_reassoc (gcc::context *ctxt)
5139 : gimple_opt_pass (pass_data_reassoc, ctxt)
5140 {}
5141
5142 /* opt_pass methods: */
5143 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5144 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5145 virtual unsigned int execute (function *) { return execute_reassoc (); }
5146
5147}; // class pass_reassoc
5148
5149} // anon namespace
5150
5151gimple_opt_pass *
5152make_pass_reassoc (gcc::context *ctxt)
5153{
5154 return new pass_reassoc (ctxt);
5155}