1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used; /* Number of registers used. */
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
225 /* The maximum invariant id. */
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
246 /* The number of uses covered by the assignment. */
249 /* Number of uses that cannot be expressed by the candidates in the set. */
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
261 /* The number of candidates in the set. */
264 /* Total number of registers needed. */
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
280 /* Difference of two iv candidate assignments. */
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
369 if (!just_once_each_iteration_p (loop, exit->src))
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
379 dump_iv (FILE *file, struct iv *iv)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
435 fprintf (file, " outside\n");
439 fprintf (file, " address\n");
443 fprintf (file, " compare\n");
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
472 dump_uses (FILE *file, struct ivopts_data *data)
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
505 fprintf (file, " final value replacement\n");
512 fprintf (file, " incremented before exit test\n");
516 fprintf (file, " incremented at end\n");
520 fprintf (file, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
604 if (sbb == loop->latch)
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
626 if (stmt_bb != cand_bb)
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
635 if (bsi_stmt (bsi) == stmt)
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
652 return stmt_after_ip_normal_pos (loop, stmt);
655 return stmt_after_ip_original_pos (cand, stmt);
662 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
665 abnormal_ssa_name_p (tree exp)
670 if (TREE_CODE (exp) != SSA_NAME)
673 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
676 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
677 abnormal phi node. Callback for for_each_index. */
680 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
681 void *data ATTRIBUTE_UNUSED)
683 if (TREE_CODE (base) == ARRAY_REF)
685 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
687 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
691 return !abnormal_ssa_name_p (*index);
694 /* Returns true if EXPR contains a ssa name that occurs in an
695 abnormal phi node. */
698 contains_abnormal_ssa_name_p (tree expr)
701 enum tree_code_class class;
706 code = TREE_CODE (expr);
707 class = TREE_CODE_CLASS (code);
709 if (code == SSA_NAME)
710 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
712 if (code == INTEGER_CST
713 || is_gimple_min_invariant (expr))
716 if (code == ADDR_EXPR)
717 return !for_each_index (&TREE_OPERAND (expr, 0),
718 idx_contains_abnormal_ssa_name_p,
725 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
730 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
742 /* Element of the table in that we cache the numbers of iterations obtained
743 from exits of the loop. */
747 /* The edge for that the number of iterations is cached. */
750 /* Number of iterations corresponding to this exit, or NULL if it cannot be
755 /* Hash function for nfe_cache_elt E. */
758 nfe_hash (const void *e)
760 const struct nfe_cache_elt *elt = e;
762 return htab_hash_pointer (elt->exit);
765 /* Equality function for nfe_cache_elt E1 and edge E2. */
768 nfe_eq (const void *e1, const void *e2)
770 const struct nfe_cache_elt *elt1 = e1;
772 return elt1->exit == e2;
775 /* Returns tree describing number of iterations determined from
776 EXIT of DATA->current_loop, or NULL if something goes wrong. */
779 niter_for_exit (struct ivopts_data *data, edge exit)
781 struct nfe_cache_elt *nfe_desc;
782 struct tree_niter_desc desc;
785 slot = htab_find_slot_with_hash (data->niters, exit,
786 htab_hash_pointer (exit),
791 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
792 nfe_desc->exit = exit;
794 /* Try to determine number of iterations. We must know it
795 unconditionally (i.e., without possibility of # of iterations
796 being zero). Also, we cannot safely work with ssa names that
797 appear in phi nodes on abnormal edges, so that we do not create
798 overlapping life ranges for them (PR 27283). */
799 if (number_of_iterations_exit (data->current_loop,
801 && zero_p (desc.may_be_zero)
802 && !contains_abnormal_ssa_name_p (desc.niter))
803 nfe_desc->niter = desc.niter;
805 nfe_desc->niter = NULL_TREE;
810 return nfe_desc->niter;
813 /* Returns tree describing number of iterations determined from
814 single dominating exit of DATA->current_loop, or NULL if something
818 niter_for_single_dom_exit (struct ivopts_data *data)
820 edge exit = single_dom_exit (data->current_loop);
825 return niter_for_exit (data, exit);
828 /* Initializes data structures used by the iv optimization pass, stored
829 in DATA. LOOPS is the loop tree. */
832 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
836 data->version_info_size = 2 * num_ssa_names;
837 data->version_info = xcalloc (data->version_info_size,
838 sizeof (struct version_info));
839 data->relevant = BITMAP_ALLOC (NULL);
840 data->important_candidates = BITMAP_ALLOC (NULL);
841 data->max_inv_id = 0;
842 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
844 for (i = 1; i < loops->num; i++)
845 if (loops->parray[i])
846 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
848 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
849 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
850 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
853 /* Returns a memory object to that EXPR points. In case we are able to
854 determine that it does not point to any such object, NULL is returned. */
857 determine_base_object (tree expr)
859 enum tree_code code = TREE_CODE (expr);
860 tree base, obj, op0, op1;
862 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
871 obj = TREE_OPERAND (expr, 0);
872 base = get_base_address (obj);
877 if (TREE_CODE (base) == INDIRECT_REF)
878 return determine_base_object (TREE_OPERAND (base, 0));
880 return fold_convert (ptr_type_node,
881 build_fold_addr_expr (base));
885 op0 = determine_base_object (TREE_OPERAND (expr, 0));
886 op1 = determine_base_object (TREE_OPERAND (expr, 1));
892 return (code == PLUS_EXPR
894 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
896 return fold_build2 (code, ptr_type_node, op0, op1);
900 return determine_base_object (TREE_OPERAND (expr, 0));
903 return fold_convert (ptr_type_node, expr);
907 /* Allocates an induction variable with given initial value BASE and step STEP
911 alloc_iv (tree base, tree step)
913 struct iv *iv = xcalloc (1, sizeof (struct iv));
915 if (step && integer_zerop (step))
919 iv->base_object = determine_base_object (base);
922 iv->have_use_for = false;
924 iv->ssa_name = NULL_TREE;
929 /* Sets STEP and BASE for induction variable IV. */
932 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
934 struct version_info *info = name_info (data, iv);
936 gcc_assert (!info->iv);
938 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
939 info->iv = alloc_iv (base, step);
940 info->iv->ssa_name = iv;
943 /* Finds induction variable declaration for VAR. */
946 get_iv (struct ivopts_data *data, tree var)
950 if (!name_info (data, var)->iv)
952 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
955 || !flow_bb_inside_loop_p (data->current_loop, bb))
956 set_iv (data, var, var, NULL_TREE);
959 return name_info (data, var)->iv;
962 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
963 not define a simple affine biv with nonzero step. */
966 determine_biv_step (tree phi)
968 struct loop *loop = bb_for_stmt (phi)->loop_father;
969 tree name = PHI_RESULT (phi);
972 if (!is_gimple_reg (name))
975 if (!simple_iv (loop, phi, name, &iv, true))
978 return (zero_p (iv.step) ? NULL_TREE : iv.step);
981 /* Finds basic ivs. */
984 find_bivs (struct ivopts_data *data)
986 tree phi, step, type, base;
988 struct loop *loop = data->current_loop;
990 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
992 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
995 step = determine_biv_step (phi);
999 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1000 base = expand_simple_operations (base);
1001 if (contains_abnormal_ssa_name_p (base)
1002 || contains_abnormal_ssa_name_p (step))
1005 type = TREE_TYPE (PHI_RESULT (phi));
1006 base = fold_convert (type, base);
1008 step = fold_convert (type, step);
1010 set_iv (data, PHI_RESULT (phi), base, step);
1017 /* Marks basic ivs. */
1020 mark_bivs (struct ivopts_data *data)
1023 struct iv *iv, *incr_iv;
1024 struct loop *loop = data->current_loop;
1025 basic_block incr_bb;
1027 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1029 iv = get_iv (data, PHI_RESULT (phi));
1033 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1034 incr_iv = get_iv (data, var);
1038 /* If the increment is in the subloop, ignore it. */
1039 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1040 if (incr_bb->loop_father != data->current_loop
1041 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1045 incr_iv->biv_p = true;
1049 /* Checks whether STMT defines a linear induction variable and stores its
1050 parameters to IV. */
1053 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1056 struct loop *loop = data->current_loop;
1058 iv->base = NULL_TREE;
1059 iv->step = NULL_TREE;
1061 if (TREE_CODE (stmt) != MODIFY_EXPR)
1064 lhs = TREE_OPERAND (stmt, 0);
1065 if (TREE_CODE (lhs) != SSA_NAME)
1068 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1070 iv->base = expand_simple_operations (iv->base);
1072 if (contains_abnormal_ssa_name_p (iv->base)
1073 || contains_abnormal_ssa_name_p (iv->step))
1079 /* Finds general ivs in statement STMT. */
1082 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1086 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1089 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1092 /* Finds general ivs in basic block BB. */
1095 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1097 block_stmt_iterator bsi;
1099 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1100 find_givs_in_stmt (data, bsi_stmt (bsi));
1103 /* Finds general ivs. */
1106 find_givs (struct ivopts_data *data)
1108 struct loop *loop = data->current_loop;
1109 basic_block *body = get_loop_body_in_dom_order (loop);
1112 for (i = 0; i < loop->num_nodes; i++)
1113 find_givs_in_bb (data, body[i]);
1117 /* For each ssa name defined in LOOP determines whether it is an induction
1118 variable and if so, its initial value and step. */
1121 find_induction_variables (struct ivopts_data *data)
1126 if (!find_bivs (data))
1132 if (dump_file && (dump_flags & TDF_DETAILS))
1134 tree niter = niter_for_single_dom_exit (data);
1138 fprintf (dump_file, " number of iterations ");
1139 print_generic_expr (dump_file, niter, TDF_SLIM);
1140 fprintf (dump_file, "\n\n");
1143 fprintf (dump_file, "Induction variables:\n\n");
1145 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1147 if (ver_info (data, i)->iv)
1148 dump_iv (dump_file, ver_info (data, i)->iv);
1155 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1157 static struct iv_use *
1158 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1159 tree stmt, enum use_type use_type)
1161 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1163 use->id = n_iv_uses (data);
1164 use->type = use_type;
1168 use->related_cands = BITMAP_ALLOC (NULL);
1170 /* To avoid showing ssa name in the dumps, if it was not reset by the
1172 iv->ssa_name = NULL_TREE;
1174 if (dump_file && (dump_flags & TDF_DETAILS))
1175 dump_use (dump_file, use);
1177 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1182 /* Checks whether OP is a loop-level invariant and if so, records it.
1183 NONLINEAR_USE is true if the invariant is used in a way we do not
1184 handle specially. */
1187 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1190 struct version_info *info;
1192 if (TREE_CODE (op) != SSA_NAME
1193 || !is_gimple_reg (op))
1196 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1198 && flow_bb_inside_loop_p (data->current_loop, bb))
1201 info = name_info (data, op);
1203 info->has_nonlin_use |= nonlinear_use;
1205 info->inv_id = ++data->max_inv_id;
1206 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1209 /* Checks whether the use OP is interesting and if so, records it
1212 static struct iv_use *
1213 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1221 if (TREE_CODE (op) != SSA_NAME)
1224 iv = get_iv (data, op);
1228 if (iv->have_use_for)
1230 use = iv_use (data, iv->use_id);
1232 gcc_assert (use->type == USE_NONLINEAR_EXPR
1233 || use->type == USE_OUTER);
1235 if (type == USE_NONLINEAR_EXPR)
1236 use->type = USE_NONLINEAR_EXPR;
1240 if (zero_p (iv->step))
1242 record_invariant (data, op, true);
1245 iv->have_use_for = true;
1247 civ = xmalloc (sizeof (struct iv));
1250 stmt = SSA_NAME_DEF_STMT (op);
1251 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1252 || TREE_CODE (stmt) == MODIFY_EXPR);
1254 use = record_use (data, NULL, civ, stmt, type);
1255 iv->use_id = use->id;
1260 /* Checks whether the use OP is interesting and if so, records it. */
1262 static struct iv_use *
1263 find_interesting_uses_op (struct ivopts_data *data, tree op)
1265 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1268 /* Records a definition of induction variable OP that is used outside of the
1271 static struct iv_use *
1272 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1274 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1277 /* Checks whether the condition *COND_P in STMT is interesting
1278 and if so, records it. */
1281 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1285 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1287 tree zero = integer_zero_node;
1289 const_iv.step = NULL_TREE;
1291 if (TREE_CODE (*cond_p) != SSA_NAME
1292 && !COMPARISON_CLASS_P (*cond_p))
1295 if (TREE_CODE (*cond_p) == SSA_NAME)
1302 op0_p = &TREE_OPERAND (*cond_p, 0);
1303 op1_p = &TREE_OPERAND (*cond_p, 1);
1306 if (TREE_CODE (*op0_p) == SSA_NAME)
1307 iv0 = get_iv (data, *op0_p);
1311 if (TREE_CODE (*op1_p) == SSA_NAME)
1312 iv1 = get_iv (data, *op1_p);
1316 if (/* When comparing with non-invariant value, we may not do any senseful
1317 induction variable elimination. */
1319 /* Eliminating condition based on two ivs would be nontrivial.
1320 ??? TODO -- it is not really important to handle this case. */
1321 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1323 find_interesting_uses_op (data, *op0_p);
1324 find_interesting_uses_op (data, *op1_p);
1328 if (zero_p (iv0->step) && zero_p (iv1->step))
1330 /* If both are invariants, this is a work for unswitching. */
1334 civ = xmalloc (sizeof (struct iv));
1335 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1336 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1339 /* Returns true if expression EXPR is obviously invariant in LOOP,
1340 i.e. if all its operands are defined outside of the LOOP. */
1343 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1348 if (is_gimple_min_invariant (expr))
1351 if (TREE_CODE (expr) == SSA_NAME)
1353 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1355 && flow_bb_inside_loop_p (loop, def_bb))
1364 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1365 for (i = 0; i < len; i++)
1366 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1372 /* Cumulates the steps of indices into DATA and replaces their values with the
1373 initial ones. Returns false when the value of the index cannot be determined.
1374 Callback for for_each_index. */
1376 struct ifs_ivopts_data
1378 struct ivopts_data *ivopts_data;
1384 idx_find_step (tree base, tree *idx, void *data)
1386 struct ifs_ivopts_data *dta = data;
1388 tree step, iv_step, lbound, off;
1389 struct loop *loop = dta->ivopts_data->current_loop;
1391 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1392 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1395 /* If base is a component ref, require that the offset of the reference
1397 if (TREE_CODE (base) == COMPONENT_REF)
1399 off = component_ref_field_offset (base);
1400 return expr_invariant_in_loop_p (loop, off);
1403 /* If base is array, first check whether we will be able to move the
1404 reference out of the loop (in order to take its address in strength
1405 reduction). In order for this to work we need both lower bound
1406 and step to be loop invariants. */
1407 if (TREE_CODE (base) == ARRAY_REF)
1409 step = array_ref_element_size (base);
1410 lbound = array_ref_low_bound (base);
1412 if (!expr_invariant_in_loop_p (loop, step)
1413 || !expr_invariant_in_loop_p (loop, lbound))
1417 if (TREE_CODE (*idx) != SSA_NAME)
1420 iv = get_iv (dta->ivopts_data, *idx);
1429 if (TREE_CODE (base) == ARRAY_REF)
1431 step = array_ref_element_size (base);
1433 /* We only handle addresses whose step is an integer constant. */
1434 if (TREE_CODE (step) != INTEGER_CST)
1438 /* The step for pointer arithmetics already is 1 byte. */
1439 step = build_int_cst (sizetype, 1);
1441 /* FIXME: convert_step should not be used outside chrec_convert: fix
1442 this by calling chrec_convert. */
1443 iv_step = convert_step (dta->ivopts_data->current_loop,
1444 sizetype, iv->base, iv->step, dta->stmt);
1448 /* The index might wrap. */
1452 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1455 *dta->step_p = step;
1457 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1462 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1463 object is passed to it in DATA. */
1466 idx_record_use (tree base, tree *idx,
1469 find_interesting_uses_op (data, *idx);
1470 if (TREE_CODE (base) == ARRAY_REF)
1472 find_interesting_uses_op (data, array_ref_element_size (base));
1473 find_interesting_uses_op (data, array_ref_low_bound (base));
1478 /* Returns true if memory reference REF may be unaligned. */
1481 may_be_unaligned_p (tree ref)
1485 HOST_WIDE_INT bitsize;
1486 HOST_WIDE_INT bitpos;
1488 enum machine_mode mode;
1489 int unsignedp, volatilep;
1490 unsigned base_align;
1492 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1493 thus they are not misaligned. */
1494 if (TREE_CODE (ref) == TARGET_MEM_REF)
1497 /* The test below is basically copy of what expr.c:normal_inner_ref
1498 does to check whether the object must be loaded by parts when
1499 STRICT_ALIGNMENT is true. */
1500 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1501 &unsignedp, &volatilep, true);
1502 base_type = TREE_TYPE (base);
1503 base_align = TYPE_ALIGN (base_type);
1506 && (base_align < GET_MODE_ALIGNMENT (mode)
1507 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1508 || bitpos % BITS_PER_UNIT != 0))
1514 /* Finds addresses in *OP_P inside STMT. */
1517 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1519 tree base = *op_p, step = NULL;
1521 struct ifs_ivopts_data ifs_ivopts_data;
1523 /* Do not play with volatile memory references. A bit too conservative,
1524 perhaps, but safe. */
1525 if (stmt_ann (stmt)->has_volatile_ops)
1528 /* Ignore bitfields for now. Not really something terribly complicated
1530 if (TREE_CODE (base) == BIT_FIELD_REF
1531 || (TREE_CODE (base) == COMPONENT_REF
1532 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1))))
1535 if (STRICT_ALIGNMENT
1536 && may_be_unaligned_p (base))
1539 base = unshare_expr (base);
1541 if (TREE_CODE (base) == TARGET_MEM_REF)
1543 tree type = build_pointer_type (TREE_TYPE (base));
1547 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1549 civ = get_iv (data, TMR_BASE (base));
1553 TMR_BASE (base) = civ->base;
1556 if (TMR_INDEX (base)
1557 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1559 civ = get_iv (data, TMR_INDEX (base));
1563 TMR_INDEX (base) = civ->base;
1568 if (TMR_STEP (base))
1569 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1572 step = fold_build2 (PLUS_EXPR, type, step, astep);
1580 base = tree_mem_ref_addr (type, base);
1584 ifs_ivopts_data.ivopts_data = data;
1585 ifs_ivopts_data.stmt = stmt;
1586 ifs_ivopts_data.step_p = &step;
1587 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1591 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1592 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1594 base = build_fold_addr_expr (base);
1597 civ = alloc_iv (base, step);
1598 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1602 for_each_index (op_p, idx_record_use, data);
1605 /* Finds and records invariants used in STMT. */
1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1611 use_operand_p use_p;
1614 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1616 op = USE_FROM_PTR (use_p);
1617 record_invariant (data, op, false);
1621 /* Finds interesting uses of induction variables in the statement STMT. */
1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1629 use_operand_p use_p;
1631 find_invariants_stmt (data, stmt);
1633 if (TREE_CODE (stmt) == COND_EXPR)
1635 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1639 if (TREE_CODE (stmt) == MODIFY_EXPR)
1641 lhs = TREE_OPERAND (stmt, 0);
1642 rhs = TREE_OPERAND (stmt, 1);
1644 if (TREE_CODE (lhs) == SSA_NAME)
1646 /* If the statement defines an induction variable, the uses are not
1647 interesting by themselves. */
1649 iv = get_iv (data, lhs);
1651 if (iv && !zero_p (iv->step))
1655 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1657 case tcc_comparison:
1658 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1662 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663 if (REFERENCE_CLASS_P (lhs))
1664 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1670 if (REFERENCE_CLASS_P (lhs)
1671 && is_gimple_val (rhs))
1673 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674 find_interesting_uses_op (data, rhs);
1678 /* TODO -- we should also handle address uses of type
1680 memory = call (whatever);
1687 if (TREE_CODE (stmt) == PHI_NODE
1688 && bb_for_stmt (stmt) == data->current_loop->header)
1690 lhs = PHI_RESULT (stmt);
1691 iv = get_iv (data, lhs);
1693 if (iv && !zero_p (iv->step))
1697 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1699 op = USE_FROM_PTR (use_p);
1701 if (TREE_CODE (op) != SSA_NAME)
1704 iv = get_iv (data, op);
1708 find_interesting_uses_op (data, op);
1712 /* Finds interesting uses of induction variables outside of loops
1713 on loop exit edge EXIT. */
1716 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1720 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1722 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723 find_interesting_uses_outer (data, def);
1727 /* Finds uses of the induction variables that are interesting. */
1730 find_interesting_uses (struct ivopts_data *data)
1733 block_stmt_iterator bsi;
1735 basic_block *body = get_loop_body (data->current_loop);
1737 struct version_info *info;
1740 if (dump_file && (dump_flags & TDF_DETAILS))
1741 fprintf (dump_file, "Uses:\n\n");
1743 for (i = 0; i < data->current_loop->num_nodes; i++)
1748 FOR_EACH_EDGE (e, ei, bb->succs)
1749 if (e->dest != EXIT_BLOCK_PTR
1750 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751 find_interesting_uses_outside (data, e);
1753 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754 find_interesting_uses_stmt (data, phi);
1755 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1759 if (dump_file && (dump_flags & TDF_DETAILS))
1763 fprintf (dump_file, "\n");
1765 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1767 info = ver_info (data, i);
1770 fprintf (dump_file, " ");
1771 print_generic_expr (dump_file, info->name, TDF_SLIM);
1772 fprintf (dump_file, " is invariant (%d)%s\n",
1773 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1777 fprintf (dump_file, "\n");
1783 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1784 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1785 we are at the top-level of the processed address. */
1788 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789 unsigned HOST_WIDE_INT *offset)
1791 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792 enum tree_code code;
1793 tree type, orig_type = TREE_TYPE (expr);
1794 unsigned HOST_WIDE_INT off0, off1, st;
1795 tree orig_expr = expr;
1799 type = TREE_TYPE (expr);
1800 code = TREE_CODE (expr);
1806 if (!cst_and_fits_in_hwi (expr)
1810 *offset = int_cst_value (expr);
1811 return build_int_cst_type (orig_type, 0);
1815 op0 = TREE_OPERAND (expr, 0);
1816 op1 = TREE_OPERAND (expr, 1);
1818 op0 = strip_offset_1 (op0, false, false, &off0);
1819 op1 = strip_offset_1 (op1, false, false, &off1);
1821 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822 if (op0 == TREE_OPERAND (expr, 0)
1823 && op1 == TREE_OPERAND (expr, 1))
1828 else if (zero_p (op0))
1830 if (code == PLUS_EXPR)
1833 expr = fold_build1 (NEGATE_EXPR, type, op1);
1836 expr = fold_build2 (code, type, op0, op1);
1838 return fold_convert (orig_type, expr);
1844 step = array_ref_element_size (expr);
1845 if (!cst_and_fits_in_hwi (step))
1848 st = int_cst_value (step);
1849 op1 = TREE_OPERAND (expr, 1);
1850 op1 = strip_offset_1 (op1, false, false, &off1);
1851 *offset = off1 * st;
1856 /* Strip the component reference completely. */
1857 op0 = TREE_OPERAND (expr, 0);
1858 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1868 tmp = component_ref_field_offset (expr);
1870 && cst_and_fits_in_hwi (tmp))
1872 /* Strip the component reference completely. */
1873 op0 = TREE_OPERAND (expr, 0);
1874 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875 *offset = off0 + int_cst_value (tmp);
1881 op0 = TREE_OPERAND (expr, 0);
1882 op0 = strip_offset_1 (op0, true, true, &off0);
1885 if (op0 == TREE_OPERAND (expr, 0))
1888 expr = build_fold_addr_expr (op0);
1889 return fold_convert (orig_type, expr);
1892 inside_addr = false;
1899 /* Default handling of expressions for that we want to recurse into
1900 the first operand. */
1901 op0 = TREE_OPERAND (expr, 0);
1902 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1905 if (op0 == TREE_OPERAND (expr, 0)
1906 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1909 expr = copy_node (expr);
1910 TREE_OPERAND (expr, 0) = op0;
1912 TREE_OPERAND (expr, 1) = op1;
1914 /* Inside address, we might strip the top level component references,
1915 thus changing type of the expression. Handling of ADDR_EXPR
1917 expr = fold_convert (orig_type, expr);
1922 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1925 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1927 return strip_offset_1 (expr, false, false, offset);
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931 For integer types, we return unsigned variant of the type, which
1932 avoids problems with overflows. For pointer types, we return void *. */
1935 generic_type_for (tree type)
1937 if (POINTER_TYPE_P (type))
1938 return ptr_type_node;
1940 if (TYPE_UNSIGNED (type))
1943 return unsigned_type_for (type);
1946 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1947 the bitmap to that we should store it. */
1949 static struct ivopts_data *fd_ivopts_data;
1951 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1953 bitmap *depends_on = data;
1954 struct version_info *info;
1956 if (TREE_CODE (*expr_p) != SSA_NAME)
1958 info = name_info (fd_ivopts_data, *expr_p);
1960 if (!info->inv_id || info->has_nonlin_use)
1964 *depends_on = BITMAP_ALLOC (NULL);
1965 bitmap_set_bit (*depends_on, info->inv_id);
1970 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1971 position to POS. If USE is not NULL, the candidate is set as related to
1972 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1973 replacement of the final value of the iv by a direct computation. */
1975 static struct iv_cand *
1976 add_candidate_1 (struct ivopts_data *data,
1977 tree base, tree step, bool important, enum iv_position pos,
1978 struct iv_use *use, tree incremented_at)
1981 struct iv_cand *cand = NULL;
1982 tree type, orig_type;
1986 orig_type = TREE_TYPE (base);
1987 type = generic_type_for (orig_type);
1988 if (type != orig_type)
1990 base = fold_convert (type, base);
1992 step = fold_convert (type, step);
1996 for (i = 0; i < n_iv_cands (data); i++)
1998 cand = iv_cand (data, i);
2000 if (cand->pos != pos)
2003 if (cand->incremented_at != incremented_at)
2017 if (!operand_equal_p (base, cand->iv->base, 0))
2020 if (zero_p (cand->iv->step))
2027 if (step && operand_equal_p (step, cand->iv->step, 0))
2032 if (i == n_iv_cands (data))
2034 cand = xcalloc (1, sizeof (struct iv_cand));
2040 cand->iv = alloc_iv (base, step);
2043 if (pos != IP_ORIGINAL && cand->iv)
2045 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046 cand->var_after = cand->var_before;
2048 cand->important = important;
2049 cand->incremented_at = incremented_at;
2050 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2053 && TREE_CODE (step) != INTEGER_CST)
2055 fd_ivopts_data = data;
2056 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2059 if (dump_file && (dump_flags & TDF_DETAILS))
2060 dump_cand (dump_file, cand);
2063 if (important && !cand->important)
2065 cand->important = true;
2066 if (dump_file && (dump_flags & TDF_DETAILS))
2067 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2072 bitmap_set_bit (use->related_cands, i);
2073 if (dump_file && (dump_flags & TDF_DETAILS))
2074 fprintf (dump_file, "Candidate %d is related to use %d\n",
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2084 The purpose is to avoid splitting latch edge with a biv increment, thus
2085 creating a jump, possibly confusing other optimization passes and leaving
2086 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2087 is not available (so we do not have a better alternative), or if the latch
2088 edge is already nonempty. */
2091 allow_ip_end_pos_p (struct loop *loop)
2093 if (!ip_normal_pos (loop))
2096 if (!empty_block_p (ip_end_pos (loop)))
2102 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2103 position to POS. If USE is not NULL, the candidate is set as related to
2104 it. The candidate computation is scheduled on all available positions. */
2107 add_candidate (struct ivopts_data *data,
2108 tree base, tree step, bool important, struct iv_use *use)
2110 if (ip_normal_pos (data->current_loop))
2111 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112 if (ip_end_pos (data->current_loop)
2113 && allow_ip_end_pos_p (data->current_loop))
2114 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118 type with SIZE bits. */
2121 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2124 tree type = lang_hooks.types.type_for_size (size, true);
2125 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2129 /* Adds standard iv candidates. */
2132 add_standard_iv_candidates (struct ivopts_data *data)
2134 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2136 /* The same for a double-integer type if it is still fast enough. */
2137 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2142 /* Adds candidates bases on the old induction variable IV. */
2145 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2148 struct iv_cand *cand;
2150 add_candidate (data, iv->base, iv->step, true, NULL);
2152 /* The same, but with initial value zero. */
2153 add_candidate (data,
2154 build_int_cst (TREE_TYPE (iv->base), 0),
2155 iv->step, true, NULL);
2157 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158 if (TREE_CODE (phi) == PHI_NODE)
2160 /* Additionally record the possibility of leaving the original iv
2162 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163 cand = add_candidate_1 (data,
2164 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165 SSA_NAME_DEF_STMT (def));
2166 cand->var_before = iv->ssa_name;
2167 cand->var_after = def;
2171 /* Adds candidates based on the old induction variables. */
2174 add_old_ivs_candidates (struct ivopts_data *data)
2180 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2182 iv = ver_info (data, i)->iv;
2183 if (iv && iv->biv_p && !zero_p (iv->step))
2184 add_old_iv_candidates (data, iv);
2188 /* Adds candidates based on the value of the induction variable IV and USE. */
2191 add_iv_value_candidates (struct ivopts_data *data,
2192 struct iv *iv, struct iv_use *use)
2194 unsigned HOST_WIDE_INT offset;
2197 add_candidate (data, iv->base, iv->step, false, use);
2199 /* The same, but with initial value zero. Make such variable important,
2200 since it is generic enough so that possibly many uses may be based
2202 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203 iv->step, true, use);
2205 /* Third, try removing the constant offset. */
2206 base = strip_offset (iv->base, &offset);
2208 add_candidate (data, base, iv->step, false, use);
2211 /* Possibly adds pseudocandidate for replacing the final value of USE by
2212 a direct computation. */
2215 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2217 /* We must know where we exit the loop and how many times does it roll. */
2218 if (! niter_for_single_dom_exit (data))
2221 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2224 /* Adds candidates based on the uses. */
2227 add_derived_ivs_candidates (struct ivopts_data *data)
2231 for (i = 0; i < n_iv_uses (data); i++)
2233 struct iv_use *use = iv_use (data, i);
2240 case USE_NONLINEAR_EXPR:
2243 /* Just add the ivs based on the value of the iv used here. */
2244 add_iv_value_candidates (data, use->iv, use);
2248 add_iv_value_candidates (data, use->iv, use);
2250 /* Additionally, add the pseudocandidate for the possibility to
2251 replace the final value by a direct computation. */
2252 add_iv_outer_candidates (data, use);
2261 /* Record important candidates and add them to related_cands bitmaps
2265 record_important_candidates (struct ivopts_data *data)
2270 for (i = 0; i < n_iv_cands (data); i++)
2272 struct iv_cand *cand = iv_cand (data, i);
2274 if (cand->important)
2275 bitmap_set_bit (data->important_candidates, i);
2278 data->consider_all_candidates = (n_iv_cands (data)
2279 <= CONSIDER_ALL_CANDIDATES_BOUND);
2281 if (data->consider_all_candidates)
2283 /* We will not need "related_cands" bitmaps in this case,
2284 so release them to decrease peak memory consumption. */
2285 for (i = 0; i < n_iv_uses (data); i++)
2287 use = iv_use (data, i);
2288 BITMAP_FREE (use->related_cands);
2293 /* Add important candidates to the related_cands bitmaps. */
2294 for (i = 0; i < n_iv_uses (data); i++)
2295 bitmap_ior_into (iv_use (data, i)->related_cands,
2296 data->important_candidates);
2300 /* Finds the candidates for the induction variables. */
2303 find_iv_candidates (struct ivopts_data *data)
2305 /* Add commonly used ivs. */
2306 add_standard_iv_candidates (data);
2308 /* Add old induction variables. */
2309 add_old_ivs_candidates (data);
2311 /* Add induction variables derived from uses. */
2312 add_derived_ivs_candidates (data);
2314 /* Record the important candidates. */
2315 record_important_candidates (data);
2318 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2319 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2320 we allocate a simple list to every use. */
2323 alloc_use_cost_map (struct ivopts_data *data)
2325 unsigned i, size, s, j;
2327 for (i = 0; i < n_iv_uses (data); i++)
2329 struct iv_use *use = iv_use (data, i);
2332 if (data->consider_all_candidates)
2333 size = n_iv_cands (data);
2337 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2342 /* Round up to the power of two, so that moduling by it is fast. */
2343 for (size = 1; size < s; size <<= 1)
2347 use->n_map_members = size;
2348 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2352 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2353 on invariants DEPENDS_ON and that the value used in expressing it
2357 set_use_iv_cost (struct ivopts_data *data,
2358 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2359 bitmap depends_on, tree value)
2365 BITMAP_FREE (depends_on);
2369 if (data->consider_all_candidates)
2371 use->cost_map[cand->id].cand = cand;
2372 use->cost_map[cand->id].cost = cost;
2373 use->cost_map[cand->id].depends_on = depends_on;
2374 use->cost_map[cand->id].value = value;
2378 /* n_map_members is a power of two, so this computes modulo. */
2379 s = cand->id & (use->n_map_members - 1);
2380 for (i = s; i < use->n_map_members; i++)
2381 if (!use->cost_map[i].cand)
2383 for (i = 0; i < s; i++)
2384 if (!use->cost_map[i].cand)
2390 use->cost_map[i].cand = cand;
2391 use->cost_map[i].cost = cost;
2392 use->cost_map[i].depends_on = depends_on;
2393 use->cost_map[i].value = value;
2396 /* Gets cost of (USE, CANDIDATE) pair. */
2398 static struct cost_pair *
2399 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2400 struct iv_cand *cand)
2403 struct cost_pair *ret;
2408 if (data->consider_all_candidates)
2410 ret = use->cost_map + cand->id;
2417 /* n_map_members is a power of two, so this computes modulo. */
2418 s = cand->id & (use->n_map_members - 1);
2419 for (i = s; i < use->n_map_members; i++)
2420 if (use->cost_map[i].cand == cand)
2421 return use->cost_map + i;
2423 for (i = 0; i < s; i++)
2424 if (use->cost_map[i].cand == cand)
2425 return use->cost_map + i;
2430 /* Returns estimate on cost of computing SEQ. */
2438 for (; seq; seq = NEXT_INSN (seq))
2440 set = single_set (seq);
2442 cost += rtx_cost (set, SET);
2450 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2452 produce_memory_decl_rtl (tree obj, int *regno)
2457 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2459 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2460 x = gen_rtx_SYMBOL_REF (Pmode, name);
2463 x = gen_raw_REG (Pmode, (*regno)++);
2465 return gen_rtx_MEM (DECL_MODE (obj), x);
2468 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2469 walk_tree. DATA contains the actual fake register number. */
2472 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2474 tree obj = NULL_TREE;
2478 switch (TREE_CODE (*expr_p))
2481 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2482 handled_component_p (*expr_p);
2483 expr_p = &TREE_OPERAND (*expr_p, 0))
2487 x = produce_memory_decl_rtl (obj, regno);
2492 obj = SSA_NAME_VAR (*expr_p);
2493 if (!DECL_RTL_SET_P (obj))
2494 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2503 if (DECL_RTL_SET_P (obj))
2506 if (DECL_MODE (obj) == BLKmode)
2507 x = produce_memory_decl_rtl (obj, regno);
2509 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2519 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2520 SET_DECL_RTL (obj, x);
2526 /* Determines cost of the computation of EXPR. */
2529 computation_cost (tree expr)
2532 tree type = TREE_TYPE (expr);
2534 /* Avoid using hard regs in ways which may be unsupported. */
2535 int regno = LAST_VIRTUAL_REGISTER + 1;
2537 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2539 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2543 cost = seq_cost (seq);
2545 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2550 /* Returns variable containing the value of candidate CAND at statement AT. */
2553 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2555 if (stmt_after_increment (loop, cand, stmt))
2556 return cand->var_after;
2558 return cand->var_before;
2561 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2562 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2565 tree_int_cst_sign_bit (tree t)
2567 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2568 unsigned HOST_WIDE_INT w;
2570 if (bitno < HOST_BITS_PER_WIDE_INT)
2571 w = TREE_INT_CST_LOW (t);
2574 w = TREE_INT_CST_HIGH (t);
2575 bitno -= HOST_BITS_PER_WIDE_INT;
2578 return (w >> bitno) & 1;
2581 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2582 return cst. Otherwise return NULL_TREE. */
2585 constant_multiple_of (tree type, tree top, tree bot)
2587 tree res, mby, p0, p1;
2588 enum tree_code code;
2594 if (operand_equal_p (top, bot, 0))
2595 return build_int_cst (type, 1);
2597 code = TREE_CODE (top);
2601 mby = TREE_OPERAND (top, 1);
2602 if (TREE_CODE (mby) != INTEGER_CST)
2605 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2609 return fold_binary_to_constant (MULT_EXPR, type, res,
2610 fold_convert (type, mby));
2614 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2617 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2621 return fold_binary_to_constant (code, type, p0, p1);
2624 if (TREE_CODE (bot) != INTEGER_CST)
2627 bot = fold_convert (type, bot);
2628 top = fold_convert (type, top);
2630 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2631 the result afterwards. */
2632 if (tree_int_cst_sign_bit (bot))
2635 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2640 /* Ditto for TOP. */
2641 if (tree_int_cst_sign_bit (top))
2644 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2647 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2650 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2652 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2660 /* Sets COMB to CST. */
2663 aff_combination_const (struct affine_tree_combination *comb, tree type,
2664 unsigned HOST_WIDE_INT cst)
2666 unsigned prec = TYPE_PRECISION (type);
2669 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2672 comb->rest = NULL_TREE;
2673 comb->offset = cst & comb->mask;
2676 /* Sets COMB to single element ELT. */
2679 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2681 unsigned prec = TYPE_PRECISION (type);
2684 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2687 comb->elts[0] = elt;
2689 comb->rest = NULL_TREE;
2693 /* Scales COMB by SCALE. */
2696 aff_combination_scale (struct affine_tree_combination *comb,
2697 unsigned HOST_WIDE_INT scale)
2706 aff_combination_const (comb, comb->type, 0);
2710 comb->offset = (scale * comb->offset) & comb->mask;
2711 for (i = 0, j = 0; i < comb->n; i++)
2713 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2714 comb->elts[j] = comb->elts[i];
2715 if (comb->coefs[j] != 0)
2722 if (comb->n < MAX_AFF_ELTS)
2724 comb->coefs[comb->n] = scale;
2725 comb->elts[comb->n] = comb->rest;
2726 comb->rest = NULL_TREE;
2730 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2731 build_int_cst_type (comb->type, scale));
2735 /* Adds ELT * SCALE to COMB. */
2738 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2739 unsigned HOST_WIDE_INT scale)
2746 for (i = 0; i < comb->n; i++)
2747 if (operand_equal_p (comb->elts[i], elt, 0))
2749 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2754 comb->coefs[i] = comb->coefs[comb->n];
2755 comb->elts[i] = comb->elts[comb->n];
2759 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2760 comb->coefs[comb->n] = 1;
2761 comb->elts[comb->n] = comb->rest;
2762 comb->rest = NULL_TREE;
2767 if (comb->n < MAX_AFF_ELTS)
2769 comb->coefs[comb->n] = scale;
2770 comb->elts[comb->n] = elt;
2776 elt = fold_convert (comb->type, elt);
2778 elt = fold_build2 (MULT_EXPR, comb->type,
2779 fold_convert (comb->type, elt),
2780 build_int_cst_type (comb->type, scale));
2783 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2788 /* Adds COMB2 to COMB1. */
2791 aff_combination_add (struct affine_tree_combination *comb1,
2792 struct affine_tree_combination *comb2)
2796 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2797 for (i = 0; i < comb2->n; i++)
2798 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2800 aff_combination_add_elt (comb1, comb2->rest, 1);
2803 /* Splits EXPR into an affine combination of parts. */
2806 tree_to_aff_combination (tree expr, tree type,
2807 struct affine_tree_combination *comb)
2809 struct affine_tree_combination tmp;
2810 enum tree_code code;
2811 tree cst, core, toffset;
2812 HOST_WIDE_INT bitpos, bitsize;
2813 enum machine_mode mode;
2814 int unsignedp, volatilep;
2818 code = TREE_CODE (expr);
2822 aff_combination_const (comb, type, int_cst_value (expr));
2827 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2828 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2829 if (code == MINUS_EXPR)
2830 aff_combination_scale (&tmp, -1);
2831 aff_combination_add (comb, &tmp);
2835 cst = TREE_OPERAND (expr, 1);
2836 if (TREE_CODE (cst) != INTEGER_CST)
2838 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2839 aff_combination_scale (comb, int_cst_value (cst));
2843 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2844 aff_combination_scale (comb, -1);
2848 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2849 &toffset, &mode, &unsignedp, &volatilep,
2851 if (bitpos % BITS_PER_UNIT != 0)
2853 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2854 core = build_fold_addr_expr (core);
2855 if (TREE_CODE (core) == ADDR_EXPR)
2856 aff_combination_add_elt (comb, core, 1);
2859 tree_to_aff_combination (core, type, &tmp);
2860 aff_combination_add (comb, &tmp);
2864 tree_to_aff_combination (toffset, type, &tmp);
2865 aff_combination_add (comb, &tmp);
2873 aff_combination_elt (comb, type, expr);
2876 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2879 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2880 unsigned HOST_WIDE_INT mask)
2882 enum tree_code code;
2885 elt = fold_convert (type, elt);
2892 return fold_build2 (PLUS_EXPR, type, expr, elt);
2898 return fold_build1 (NEGATE_EXPR, type, elt);
2900 return fold_build2 (MINUS_EXPR, type, expr, elt);
2904 return fold_build2 (MULT_EXPR, type, elt,
2905 build_int_cst_type (type, scale));
2907 if ((scale | (mask >> 1)) == mask)
2909 /* Scale is negative. */
2911 scale = (-scale) & mask;
2916 elt = fold_build2 (MULT_EXPR, type, elt,
2917 build_int_cst_type (type, scale));
2918 return fold_build2 (code, type, expr, elt);
2921 /* Copies the tree elements of COMB to ensure that they are not shared. */
2924 unshare_aff_combination (struct affine_tree_combination *comb)
2928 for (i = 0; i < comb->n; i++)
2929 comb->elts[i] = unshare_expr (comb->elts[i]);
2931 comb->rest = unshare_expr (comb->rest);
2934 /* Makes tree from the affine combination COMB. */
2937 aff_combination_to_tree (struct affine_tree_combination *comb)
2939 tree type = comb->type;
2940 tree expr = comb->rest;
2942 unsigned HOST_WIDE_INT off, sgn;
2944 /* Handle the special case produced by get_computation_aff when
2945 the type does not fit in HOST_WIDE_INT. */
2946 if (comb->n == 0 && comb->offset == 0)
2947 return fold_convert (type, expr);
2949 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2951 for (i = 0; i < comb->n; i++)
2952 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2955 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2957 /* Offset is negative. */
2958 off = (-comb->offset) & comb->mask;
2966 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2970 /* Determines the expression by that USE is expressed from induction variable
2971 CAND at statement AT in LOOP. The expression is stored in a decomposed
2972 form into AFF. Returns false if USE cannot be expressed using CAND. */
2975 get_computation_aff (struct loop *loop,
2976 struct iv_use *use, struct iv_cand *cand, tree at,
2977 struct affine_tree_combination *aff)
2979 tree ubase = use->iv->base;
2980 tree ustep = use->iv->step;
2981 tree cbase = cand->iv->base;
2982 tree cstep = cand->iv->step;
2983 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2987 unsigned HOST_WIDE_INT ustepi, cstepi;
2988 HOST_WIDE_INT ratioi;
2989 struct affine_tree_combination cbase_aff, expr_aff;
2990 tree cstep_orig = cstep, ustep_orig = ustep;
2992 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2994 /* We do not have a precision to express the values of use. */
2998 expr = var_at_stmt (loop, cand, at);
3000 if (TREE_TYPE (expr) != ctype)
3002 /* This may happen with the original ivs. */
3003 expr = fold_convert (ctype, expr);
3006 if (TYPE_UNSIGNED (utype))
3010 uutype = unsigned_type_for (utype);
3011 ubase = fold_convert (uutype, ubase);
3012 ustep = fold_convert (uutype, ustep);
3015 if (uutype != ctype)
3017 expr = fold_convert (uutype, expr);
3018 cbase = fold_convert (uutype, cbase);
3019 cstep = fold_convert (uutype, cstep);
3021 /* If the conversion is not noop, we must take it into account when
3022 considering the value of the step. */
3023 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3027 if (cst_and_fits_in_hwi (cstep_orig)
3028 && cst_and_fits_in_hwi (ustep_orig))
3030 ustepi = int_cst_value (ustep_orig);
3031 cstepi = int_cst_value (cstep_orig);
3033 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3035 /* TODO maybe consider case when ustep divides cstep and the ratio is
3036 a power of 2 (so that the division is fast to execute)? We would
3037 need to be much more careful with overflows etc. then. */
3041 ratio = build_int_cst_type (uutype, ratioi);
3045 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3049 /* Ratioi is only used to detect special cases when the multiplicative
3050 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3051 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3052 to integer_onep/integer_all_onesp, since the former ignores
3054 if (cst_and_fits_in_hwi (ratio))
3055 ratioi = int_cst_value (ratio);
3056 else if (integer_onep (ratio))
3058 else if (integer_all_onesp (ratio))
3064 /* We may need to shift the value if we are after the increment. */
3065 if (stmt_after_increment (loop, cand, at))
3066 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3068 /* use = ubase - ratio * cbase + ratio * var.
3070 In general case ubase + ratio * (var - cbase) could be better (one less
3071 multiplication), but often it is possible to eliminate redundant parts
3072 of computations from (ubase - ratio * cbase) term, and if it does not
3073 happen, fold is able to apply the distributive law to obtain this form
3076 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3078 /* Let's compute in trees and just return the result in AFF. This case
3079 should not be very common, and fold itself is not that bad either,
3080 so making the aff. functions more complicated to handle this case
3081 is not that urgent. */
3084 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3085 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3087 else if (ratioi == -1)
3089 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3090 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3094 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3095 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3096 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3097 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3108 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3109 possible to compute ratioi. */
3110 gcc_assert (ratioi);
3112 tree_to_aff_combination (ubase, uutype, aff);
3113 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3114 tree_to_aff_combination (expr, uutype, &expr_aff);
3115 aff_combination_scale (&cbase_aff, -ratioi);
3116 aff_combination_scale (&expr_aff, ratioi);
3117 aff_combination_add (aff, &cbase_aff);
3118 aff_combination_add (aff, &expr_aff);
3123 /* Determines the expression by that USE is expressed from induction variable
3124 CAND at statement AT in LOOP. The computation is unshared. */
3127 get_computation_at (struct loop *loop,
3128 struct iv_use *use, struct iv_cand *cand, tree at)
3130 struct affine_tree_combination aff;
3131 tree type = TREE_TYPE (use->iv->base);
3133 if (!get_computation_aff (loop, use, cand, at, &aff))
3135 unshare_aff_combination (&aff);
3136 return fold_convert (type, aff_combination_to_tree (&aff));
3139 /* Determines the expression by that USE is expressed from induction variable
3140 CAND in LOOP. The computation is unshared. */
3143 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3145 return get_computation_at (loop, use, cand, use->stmt);
3148 /* Returns cost of addition in MODE. */
3151 add_cost (enum machine_mode mode)
3153 static unsigned costs[NUM_MACHINE_MODES];
3161 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3162 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3163 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3168 cost = seq_cost (seq);
3174 if (dump_file && (dump_flags & TDF_DETAILS))
3175 fprintf (dump_file, "Addition in %s costs %d\n",
3176 GET_MODE_NAME (mode), cost);
3180 /* Entry in a hashtable of already known costs for multiplication. */
3183 HOST_WIDE_INT cst; /* The constant to multiply by. */
3184 enum machine_mode mode; /* In mode. */
3185 unsigned cost; /* The cost. */
3188 /* Counts hash value for the ENTRY. */
3191 mbc_entry_hash (const void *entry)
3193 const struct mbc_entry *e = entry;
3195 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3198 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3201 mbc_entry_eq (const void *entry1, const void *entry2)
3203 const struct mbc_entry *e1 = entry1;
3204 const struct mbc_entry *e2 = entry2;
3206 return (e1->mode == e2->mode
3207 && e1->cst == e2->cst);
3210 /* Returns cost of multiplication by constant CST in MODE. */
3213 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3215 static htab_t costs;
3216 struct mbc_entry **cached, act;
3221 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3225 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3227 return (*cached)->cost;
3229 *cached = xmalloc (sizeof (struct mbc_entry));
3230 (*cached)->mode = mode;
3231 (*cached)->cst = cst;
3234 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3235 gen_int_mode (cst, mode), NULL_RTX, 0);
3239 cost = seq_cost (seq);
3241 if (dump_file && (dump_flags & TDF_DETAILS))
3242 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3243 (int) cst, GET_MODE_NAME (mode), cost);
3245 (*cached)->cost = cost;
3250 /* Returns true if multiplying by RATIO is allowed in address. */
3253 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3255 #define MAX_RATIO 128
3256 static sbitmap valid_mult;
3260 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3264 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3265 sbitmap_zero (valid_mult);
3266 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3267 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3269 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3270 if (memory_address_p (Pmode, addr))
3271 SET_BIT (valid_mult, i + MAX_RATIO);
3274 if (dump_file && (dump_flags & TDF_DETAILS))
3276 fprintf (dump_file, " allowed multipliers:");
3277 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3278 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3279 fprintf (dump_file, " %d", (int) i);
3280 fprintf (dump_file, "\n");
3281 fprintf (dump_file, "\n");
3285 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3288 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3291 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3292 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3293 variable is omitted. The created memory accesses MODE.
3295 TODO -- there must be some better way. This all is quite crude. */
3298 get_address_cost (bool symbol_present, bool var_present,
3299 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3301 static bool initialized = false;
3302 static HOST_WIDE_INT rat, off;
3303 static HOST_WIDE_INT min_offset, max_offset;
3304 static unsigned costs[2][2][2][2];
3305 unsigned cost, acost;
3306 rtx seq, addr, base;
3307 bool offset_p, ratio_p;
3309 HOST_WIDE_INT s_offset;
3310 unsigned HOST_WIDE_INT mask;
3318 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3320 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3321 for (i = 1; i <= 1 << 20; i <<= 1)
3323 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3324 if (!memory_address_p (Pmode, addr))
3327 max_offset = i >> 1;
3330 for (i = 1; i <= 1 << 20; i <<= 1)
3332 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3333 if (!memory_address_p (Pmode, addr))
3336 min_offset = -(i >> 1);
3338 if (dump_file && (dump_flags & TDF_DETAILS))
3340 fprintf (dump_file, "get_address_cost:\n");
3341 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3342 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3346 for (i = 2; i <= MAX_RATIO; i++)
3347 if (multiplier_allowed_in_address_p (i))
3354 bits = GET_MODE_BITSIZE (Pmode);
3355 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3357 if ((offset >> (bits - 1) & 1))
3362 offset_p = (s_offset != 0
3363 && min_offset <= s_offset && s_offset <= max_offset);
3364 ratio_p = (ratio != 1
3365 && multiplier_allowed_in_address_p (ratio));
3367 if (ratio != 1 && !ratio_p)
3368 cost += multiply_by_cost (ratio, Pmode);
3370 if (s_offset && !offset_p && !symbol_present)
3372 cost += add_cost (Pmode);
3376 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3379 int old_cse_not_expected;
3382 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3383 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3385 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3388 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3392 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3394 base = gen_rtx_fmt_e (CONST, Pmode,
3395 gen_rtx_fmt_ee (PLUS, Pmode,
3397 gen_int_mode (off, Pmode)));
3400 base = gen_int_mode (off, Pmode);
3405 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3408 /* To avoid splitting addressing modes, pretend that no cse will
3410 old_cse_not_expected = cse_not_expected;
3411 cse_not_expected = true;
3412 addr = memory_address (Pmode, addr);
3413 cse_not_expected = old_cse_not_expected;
3417 acost = seq_cost (seq);
3418 acost += address_cost (addr, Pmode);
3422 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3425 return cost + acost;
3428 /* Estimates cost of forcing expression EXPR into a variable. */
3431 force_expr_to_var_cost (tree expr)
3433 static bool costs_initialized = false;
3434 static unsigned integer_cost;
3435 static unsigned symbol_cost;
3436 static unsigned address_cost;
3438 unsigned cost0, cost1, cost;
3439 enum machine_mode mode;
3441 if (!costs_initialized)
3443 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3444 rtx x = gen_rtx_MEM (DECL_MODE (var),
3445 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3447 tree type = build_pointer_type (integer_type_node);
3449 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3452 SET_DECL_RTL (var, x);
3453 TREE_STATIC (var) = 1;
3454 addr = build1 (ADDR_EXPR, type, var);
3455 symbol_cost = computation_cost (addr) + 1;
3458 = computation_cost (build2 (PLUS_EXPR, type,
3460 build_int_cst_type (type, 2000))) + 1;
3461 if (dump_file && (dump_flags & TDF_DETAILS))
3463 fprintf (dump_file, "force_expr_to_var_cost:\n");
3464 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3465 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3466 fprintf (dump_file, " address %d\n", (int) address_cost);
3467 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3468 fprintf (dump_file, "\n");
3471 costs_initialized = true;
3476 if (SSA_VAR_P (expr))
3479 if (TREE_INVARIANT (expr))
3481 if (TREE_CODE (expr) == INTEGER_CST)
3482 return integer_cost;
3484 if (TREE_CODE (expr) == ADDR_EXPR)
3486 tree obj = TREE_OPERAND (expr, 0);
3488 if (TREE_CODE (obj) == VAR_DECL
3489 || TREE_CODE (obj) == PARM_DECL
3490 || TREE_CODE (obj) == RESULT_DECL)
3494 return address_cost;
3497 switch (TREE_CODE (expr))
3502 op0 = TREE_OPERAND (expr, 0);
3503 op1 = TREE_OPERAND (expr, 1);
3507 if (is_gimple_val (op0))
3510 cost0 = force_expr_to_var_cost (op0);
3512 if (is_gimple_val (op1))
3515 cost1 = force_expr_to_var_cost (op1);
3520 /* Just an arbitrary value, FIXME. */
3521 return target_spill_cost;
3524 mode = TYPE_MODE (TREE_TYPE (expr));
3525 switch (TREE_CODE (expr))
3529 cost = add_cost (mode);
3533 if (cst_and_fits_in_hwi (op0))
3534 cost = multiply_by_cost (int_cst_value (op0), mode);
3535 else if (cst_and_fits_in_hwi (op1))
3536 cost = multiply_by_cost (int_cst_value (op1), mode);
3538 return target_spill_cost;
3548 /* Bound the cost by target_spill_cost. The parts of complicated
3549 computations often are either loop invariant or at least can
3550 be shared between several iv uses, so letting this grow without
3551 limits would not give reasonable results. */
3552 return cost < target_spill_cost ? cost : target_spill_cost;
3555 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3556 invariants the computation depends on. */
3559 force_var_cost (struct ivopts_data *data,
3560 tree expr, bitmap *depends_on)
3564 fd_ivopts_data = data;
3565 walk_tree (&expr, find_depends, depends_on, NULL);
3568 return force_expr_to_var_cost (expr);
3571 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3572 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3573 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3574 invariants the computation depends on. */
3577 split_address_cost (struct ivopts_data *data,
3578 tree addr, bool *symbol_present, bool *var_present,
3579 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3582 HOST_WIDE_INT bitsize;
3583 HOST_WIDE_INT bitpos;
3585 enum machine_mode mode;
3586 int unsignedp, volatilep;
3588 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3589 &unsignedp, &volatilep, false);
3592 || bitpos % BITS_PER_UNIT != 0
3593 || TREE_CODE (core) != VAR_DECL)
3595 *symbol_present = false;
3596 *var_present = true;
3597 fd_ivopts_data = data;
3598 walk_tree (&addr, find_depends, depends_on, NULL);
3599 return target_spill_cost;
3602 *offset += bitpos / BITS_PER_UNIT;
3603 if (TREE_STATIC (core)
3604 || DECL_EXTERNAL (core))
3606 *symbol_present = true;
3607 *var_present = false;
3611 *symbol_present = false;
3612 *var_present = true;
3616 /* Estimates cost of expressing difference of addresses E1 - E2 as
3617 var + symbol + offset. The value of offset is added to OFFSET,
3618 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3619 part is missing. DEPENDS_ON is a set of the invariants the computation
3623 ptr_difference_cost (struct ivopts_data *data,
3624 tree e1, tree e2, bool *symbol_present, bool *var_present,
3625 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3627 HOST_WIDE_INT diff = 0;
3630 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3632 if (ptr_difference_const (e1, e2, &diff))
3635 *symbol_present = false;
3636 *var_present = false;
3640 if (e2 == integer_zero_node)
3641 return split_address_cost (data, TREE_OPERAND (e1, 0),
3642 symbol_present, var_present, offset, depends_on);
3644 *symbol_present = false;
3645 *var_present = true;
3647 cost = force_var_cost (data, e1, depends_on);
3648 cost += force_var_cost (data, e2, depends_on);
3649 cost += add_cost (Pmode);
3654 /* Estimates cost of expressing difference E1 - E2 as
3655 var + symbol + offset. The value of offset is added to OFFSET,
3656 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3657 part is missing. DEPENDS_ON is a set of the invariants the computation
3661 difference_cost (struct ivopts_data *data,
3662 tree e1, tree e2, bool *symbol_present, bool *var_present,
3663 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3666 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3667 unsigned HOST_WIDE_INT off1, off2;
3669 e1 = strip_offset (e1, &off1);
3670 e2 = strip_offset (e2, &off2);
3671 *offset += off1 - off2;
3676 if (TREE_CODE (e1) == ADDR_EXPR)
3677 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3679 *symbol_present = false;
3681 if (operand_equal_p (e1, e2, 0))
3683 *var_present = false;
3686 *var_present = true;
3688 return force_var_cost (data, e1, depends_on);
3692 cost = force_var_cost (data, e2, depends_on);
3693 cost += multiply_by_cost (-1, mode);
3698 cost = force_var_cost (data, e1, depends_on);
3699 cost += force_var_cost (data, e2, depends_on);
3700 cost += add_cost (mode);
3705 /* Determines the cost of the computation by that USE is expressed
3706 from induction variable CAND. If ADDRESS_P is true, we just need
3707 to create an address from it, otherwise we want to get it into
3708 register. A set of invariants we depend on is stored in
3709 DEPENDS_ON. AT is the statement at that the value is computed. */
3712 get_computation_cost_at (struct ivopts_data *data,
3713 struct iv_use *use, struct iv_cand *cand,
3714 bool address_p, bitmap *depends_on, tree at)
3716 tree ubase = use->iv->base, ustep = use->iv->step;
3718 tree utype = TREE_TYPE (ubase), ctype;
3719 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3720 HOST_WIDE_INT ratio, aratio;
3721 bool var_present, symbol_present;
3722 unsigned cost = 0, n_sums;
3726 /* Only consider real candidates. */
3730 cbase = cand->iv->base;
3731 cstep = cand->iv->step;
3732 ctype = TREE_TYPE (cbase);
3734 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3736 /* We do not have a precision to express the values of use. */
3742 /* Do not try to express address of an object with computation based
3743 on address of a different object. This may cause problems in rtl
3744 level alias analysis (that does not expect this to be happening,
3745 as this is illegal in C), and would be unlikely to be useful
3747 if (use->iv->base_object
3748 && cand->iv->base_object
3749 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3753 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3755 /* TODO -- add direct handling of this case. */
3759 /* CSTEPI is removed from the offset in case statement is after the
3760 increment. If the step is not constant, we use zero instead.
3761 This is a bit imprecise (there is the extra addition), but
3762 redundancy elimination is likely to transform the code so that
3763 it uses value of the variable before increment anyway,
3764 so it is not that much unrealistic. */
3765 if (cst_and_fits_in_hwi (cstep))
3766 cstepi = int_cst_value (cstep);
3770 if (cst_and_fits_in_hwi (ustep)
3771 && cst_and_fits_in_hwi (cstep))
3773 ustepi = int_cst_value (ustep);
3775 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3782 rat = constant_multiple_of (utype, ustep, cstep);
3787 if (cst_and_fits_in_hwi (rat))
3788 ratio = int_cst_value (rat);
3789 else if (integer_onep (rat))
3791 else if (integer_all_onesp (rat))
3797 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3798 or ratio == 1, it is better to handle this like
3800 ubase - ratio * cbase + ratio * var
3802 (also holds in the case ratio == -1, TODO. */
3804 if (cst_and_fits_in_hwi (cbase))
3806 offset = - ratio * int_cst_value (cbase);
3807 cost += difference_cost (data,
3808 ubase, integer_zero_node,
3809 &symbol_present, &var_present, &offset,
3812 else if (ratio == 1)
3814 cost += difference_cost (data,
3816 &symbol_present, &var_present, &offset,
3821 cost += force_var_cost (data, cbase, depends_on);
3822 cost += add_cost (TYPE_MODE (ctype));
3823 cost += difference_cost (data,
3824 ubase, integer_zero_node,
3825 &symbol_present, &var_present, &offset,
3829 /* If we are after the increment, the value of the candidate is higher by
3831 if (stmt_after_increment (data->current_loop, cand, at))
3832 offset -= ratio * cstepi;
3834 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3835 (symbol/var/const parts may be omitted). If we are looking for an address,
3836 find the cost of addressing this. */
3838 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3840 /* Otherwise estimate the costs for computing the expression. */
3841 aratio = ratio > 0 ? ratio : -ratio;
3842 if (!symbol_present && !var_present && !offset)
3845 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3851 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3855 /* Symbol + offset should be compile-time computable. */
3856 && (symbol_present || offset))
3859 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3863 /* Just get the expression, expand it and measure the cost. */
3864 tree comp = get_computation_at (data->current_loop, use, cand, at);
3870 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3872 return computation_cost (comp);
3876 /* Determines the cost of the computation by that USE is expressed
3877 from induction variable CAND. If ADDRESS_P is true, we just need
3878 to create an address from it, otherwise we want to get it into
3879 register. A set of invariants we depend on is stored in
3883 get_computation_cost (struct ivopts_data *data,
3884 struct iv_use *use, struct iv_cand *cand,
3885 bool address_p, bitmap *depends_on)
3887 return get_computation_cost_at (data,
3888 use, cand, address_p, depends_on, use->stmt);
3891 /* Determines cost of basing replacement of USE on CAND in a generic
3895 determine_use_iv_cost_generic (struct ivopts_data *data,
3896 struct iv_use *use, struct iv_cand *cand)
3901 /* The simple case first -- if we need to express value of the preserved
3902 original biv, the cost is 0. This also prevents us from counting the
3903 cost of increment twice -- once at this use and once in the cost of
3905 if (cand->pos == IP_ORIGINAL
3906 && cand->incremented_at == use->stmt)
3908 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3912 cost = get_computation_cost (data, use, cand, false, &depends_on);
3913 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3915 return cost != INFTY;
3918 /* Determines cost of basing replacement of USE on CAND in an address. */
3921 determine_use_iv_cost_address (struct ivopts_data *data,
3922 struct iv_use *use, struct iv_cand *cand)
3925 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3927 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3929 return cost != INFTY;
3932 /* Computes value of induction variable IV in iteration NITER. */
3935 iv_value (struct iv *iv, tree niter)
3938 tree type = TREE_TYPE (iv->base);
3940 niter = fold_convert (type, niter);
3941 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3943 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3946 /* Computes value of candidate CAND at position AT in iteration NITER. */
3949 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3951 tree val = iv_value (cand->iv, niter);
3952 tree type = TREE_TYPE (cand->iv->base);
3954 if (stmt_after_increment (loop, cand, at))
3955 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3960 /* Returns period of induction variable iv. */
3963 iv_period (struct iv *iv)
3965 tree step = iv->step, period, type;
3968 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3970 /* Period of the iv is gcd (step, type range). Since type range is power
3971 of two, it suffices to determine the maximum power of two that divides
3973 pow2div = num_ending_zeros (step);
3974 type = unsigned_type_for (TREE_TYPE (step));
3976 period = build_low_bits_mask (type,
3977 (TYPE_PRECISION (type)
3978 - tree_low_cst (pow2div, 1)));
3983 /* Returns the comparison operator used when eliminating the iv USE. */
3985 static enum tree_code
3986 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3988 struct loop *loop = data->current_loop;
3992 ex_bb = bb_for_stmt (use->stmt);
3993 exit = EDGE_SUCC (ex_bb, 0);
3994 if (flow_bb_inside_loop_p (loop, exit->dest))
3995 exit = EDGE_SUCC (ex_bb, 1);
3997 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4000 /* Check whether it is possible to express the condition in USE by comparison
4001 of candidate CAND. If so, store the value compared with to BOUND. */
4004 may_eliminate_iv (struct ivopts_data *data,
4005 struct iv_use *use, struct iv_cand *cand, tree *bound)
4010 tree wider_type, period, per_type;
4011 struct loop *loop = data->current_loop;
4013 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4016 /* For now works only for exits that dominate the loop latch. TODO -- extend
4017 for other conditions inside loop body. */
4018 ex_bb = bb_for_stmt (use->stmt);
4019 if (use->stmt != last_stmt (ex_bb)
4020 || TREE_CODE (use->stmt) != COND_EXPR)
4022 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4025 exit = EDGE_SUCC (ex_bb, 0);
4026 if (flow_bb_inside_loop_p (loop, exit->dest))
4027 exit = EDGE_SUCC (ex_bb, 1);
4028 if (flow_bb_inside_loop_p (loop, exit->dest))
4031 nit = niter_for_exit (data, exit);
4035 nit_type = TREE_TYPE (nit);
4037 /* Determine whether we may use the variable to test whether niter iterations
4038 elapsed. This is the case iff the period of the induction variable is
4039 greater than the number of iterations. */
4040 period = iv_period (cand->iv);
4043 per_type = TREE_TYPE (period);
4045 wider_type = TREE_TYPE (period);
4046 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4047 wider_type = per_type;
4049 wider_type = nit_type;
4051 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4052 fold_convert (wider_type, period),
4053 fold_convert (wider_type, nit))))
4056 *bound = cand_value_at (loop, cand, use->stmt, nit);
4060 /* Determines cost of basing replacement of USE on CAND in a condition. */
4063 determine_use_iv_cost_condition (struct ivopts_data *data,
4064 struct iv_use *use, struct iv_cand *cand)
4066 tree bound = NULL_TREE, op, cond;
4067 bitmap depends_on = NULL;
4070 /* Only consider real candidates. */
4073 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4077 if (may_eliminate_iv (data, use, cand, &bound))
4079 cost = force_var_cost (data, bound, &depends_on);
4081 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4082 return cost != INFTY;
4085 /* The induction variable elimination failed; just express the original
4086 giv. If it is compared with an invariant, note that we cannot get
4088 cost = get_computation_cost (data, use, cand, false, &depends_on);
4091 if (TREE_CODE (cond) != SSA_NAME)
4093 op = TREE_OPERAND (cond, 0);
4094 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4095 op = TREE_OPERAND (cond, 1);
4096 if (TREE_CODE (op) == SSA_NAME)
4098 op = get_iv (data, op)->base;
4099 fd_ivopts_data = data;
4100 walk_tree (&op, find_depends, &depends_on, NULL);
4104 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4105 return cost != INFTY;
4108 /* Checks whether it is possible to replace the final value of USE by
4109 a direct computation. If so, the formula is stored to *VALUE. */
4112 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4115 struct loop *loop = data->current_loop;
4119 exit = single_dom_exit (loop);
4123 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4124 bb_for_stmt (use->stmt)));
4126 nit = niter_for_single_dom_exit (data);
4130 *value = iv_value (use->iv, nit);
4135 /* Determines cost of replacing final value of USE using CAND. */
4138 determine_use_iv_cost_outer (struct ivopts_data *data,
4139 struct iv_use *use, struct iv_cand *cand)
4144 tree value = NULL_TREE;
4145 struct loop *loop = data->current_loop;
4147 /* The simple case first -- if we need to express value of the preserved
4148 original biv, the cost is 0. This also prevents us from counting the
4149 cost of increment twice -- once at this use and once in the cost of
4151 if (cand->pos == IP_ORIGINAL
4152 && cand->incremented_at == use->stmt)
4154 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4160 if (!may_replace_final_value (data, use, &value))
4162 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4167 cost = force_var_cost (data, value, &depends_on);
4169 cost /= AVG_LOOP_NITER (loop);
4171 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4172 return cost != INFTY;
4175 exit = single_dom_exit (loop);
4178 /* If there is just a single exit, we may use value of the candidate
4179 after we take it to determine the value of use. */
4180 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4181 last_stmt (exit->src));
4183 cost /= AVG_LOOP_NITER (loop);
4187 /* Otherwise we just need to compute the iv. */
4188 cost = get_computation_cost (data, use, cand, false, &depends_on);
4191 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4193 return cost != INFTY;
4196 /* Determines cost of basing replacement of USE on CAND. Returns false
4197 if USE cannot be based on CAND. */
4200 determine_use_iv_cost (struct ivopts_data *data,
4201 struct iv_use *use, struct iv_cand *cand)
4205 case USE_NONLINEAR_EXPR:
4206 return determine_use_iv_cost_generic (data, use, cand);
4209 return determine_use_iv_cost_outer (data, use, cand);
4212 return determine_use_iv_cost_address (data, use, cand);
4215 return determine_use_iv_cost_condition (data, use, cand);
4222 /* Determines costs of basing the use of the iv on an iv candidate. */
4225 determine_use_iv_costs (struct ivopts_data *data)
4229 struct iv_cand *cand;
4230 bitmap to_clear = BITMAP_ALLOC (NULL);
4232 alloc_use_cost_map (data);
4234 for (i = 0; i < n_iv_uses (data); i++)
4236 use = iv_use (data, i);
4238 if (data->consider_all_candidates)
4240 for (j = 0; j < n_iv_cands (data); j++)
4242 cand = iv_cand (data, j);
4243 determine_use_iv_cost (data, use, cand);
4250 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4252 cand = iv_cand (data, j);
4253 if (!determine_use_iv_cost (data, use, cand))
4254 bitmap_set_bit (to_clear, j);
4257 /* Remove the candidates for that the cost is infinite from
4258 the list of related candidates. */
4259 bitmap_and_compl_into (use->related_cands, to_clear);
4260 bitmap_clear (to_clear);
4264 BITMAP_FREE (to_clear);
4266 if (dump_file && (dump_flags & TDF_DETAILS))
4268 fprintf (dump_file, "Use-candidate costs:\n");
4270 for (i = 0; i < n_iv_uses (data); i++)
4272 use = iv_use (data, i);
4274 fprintf (dump_file, "Use %d:\n", i);
4275 fprintf (dump_file, " cand\tcost\tdepends on\n");
4276 for (j = 0; j < use->n_map_members; j++)
4278 if (!use->cost_map[j].cand
4279 || use->cost_map[j].cost == INFTY)
4282 fprintf (dump_file, " %d\t%d\t",
4283 use->cost_map[j].cand->id,
4284 use->cost_map[j].cost);
4285 if (use->cost_map[j].depends_on)
4286 bitmap_print (dump_file,
4287 use->cost_map[j].depends_on, "","");
4288 fprintf (dump_file, "\n");
4291 fprintf (dump_file, "\n");
4293 fprintf (dump_file, "\n");
4297 /* Determines cost of the candidate CAND. */
4300 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4302 unsigned cost_base, cost_step;
4311 /* There are two costs associated with the candidate -- its increment
4312 and its initialization. The second is almost negligible for any loop
4313 that rolls enough, so we take it just very little into account. */
4315 base = cand->iv->base;
4316 cost_base = force_var_cost (data, base, NULL);
4317 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4319 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4321 /* Prefer the original iv unless we may gain something by replacing it;
4322 this is not really relevant for artificial ivs created by other
4324 if (cand->pos == IP_ORIGINAL
4325 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4328 /* Prefer not to insert statements into latch unless there are some
4329 already (so that we do not create unnecessary jumps). */
4330 if (cand->pos == IP_END
4331 && empty_block_p (ip_end_pos (data->current_loop)))
4335 /* Determines costs of computation of the candidates. */
4338 determine_iv_costs (struct ivopts_data *data)
4342 if (dump_file && (dump_flags & TDF_DETAILS))
4344 fprintf (dump_file, "Candidate costs:\n");
4345 fprintf (dump_file, " cand\tcost\n");
4348 for (i = 0; i < n_iv_cands (data); i++)
4350 struct iv_cand *cand = iv_cand (data, i);
4352 determine_iv_cost (data, cand);
4354 if (dump_file && (dump_flags & TDF_DETAILS))
4355 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4358 if (dump_file && (dump_flags & TDF_DETAILS))
4359 fprintf (dump_file, "\n");
4362 /* Calculates cost for having SIZE induction variables. */
4365 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4367 return global_cost_for_size (size,
4368 loop_data (data->current_loop)->regs_used,
4372 /* For each size of the induction variable set determine the penalty. */
4375 determine_set_costs (struct ivopts_data *data)
4379 struct loop *loop = data->current_loop;
4382 /* We use the following model (definitely improvable, especially the
4383 cost function -- TODO):
4385 We estimate the number of registers available (using MD data), name it A.
4387 We estimate the number of registers used by the loop, name it U. This
4388 number is obtained as the number of loop phi nodes (not counting virtual
4389 registers and bivs) + the number of variables from outside of the loop.
4391 We set a reserve R (free regs that are used for temporary computations,
4392 etc.). For now the reserve is a constant 3.
4394 Let I be the number of induction variables.
4396 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4397 make a lot of ivs without a reason).
4398 -- if A - R < U + I <= A, the cost is I * PRES_COST
4399 -- if U + I > A, the cost is I * PRES_COST and
4400 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4402 if (dump_file && (dump_flags & TDF_DETAILS))
4404 fprintf (dump_file, "Global costs:\n");
4405 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4406 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4407 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4408 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4412 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4414 op = PHI_RESULT (phi);
4416 if (!is_gimple_reg (op))
4419 if (get_iv (data, op))
4425 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4427 struct version_info *info = ver_info (data, j);
4429 if (info->inv_id && info->has_nonlin_use)
4433 loop_data (loop)->regs_used = n;
4434 if (dump_file && (dump_flags & TDF_DETAILS))
4435 fprintf (dump_file, " regs_used %d\n", n);
4437 if (dump_file && (dump_flags & TDF_DETAILS))
4439 fprintf (dump_file, " cost for size:\n");
4440 fprintf (dump_file, " ivs\tcost\n");
4441 for (j = 0; j <= 2 * target_avail_regs; j++)
4442 fprintf (dump_file, " %d\t%d\n", j,
4443 ivopts_global_cost_for_size (data, j));
4444 fprintf (dump_file, "\n");
4448 /* Returns true if A is a cheaper cost pair than B. */
4451 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4459 if (a->cost < b->cost)
4462 if (a->cost > b->cost)
4465 /* In case the costs are the same, prefer the cheaper candidate. */
4466 if (a->cand->cost < b->cand->cost)
4472 /* Computes the cost field of IVS structure. */
4475 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4479 cost += ivs->cand_use_cost;
4480 cost += ivs->cand_cost;
4481 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4486 /* Remove invariants in set INVS to set IVS. */
4489 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4497 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4499 ivs->n_invariant_uses[iid]--;
4500 if (ivs->n_invariant_uses[iid] == 0)
4505 /* Set USE not to be expressed by any candidate in IVS. */
4508 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4511 unsigned uid = use->id, cid;
4512 struct cost_pair *cp;
4514 cp = ivs->cand_for_use[uid];
4520 ivs->cand_for_use[uid] = NULL;
4521 ivs->n_cand_uses[cid]--;
4523 if (ivs->n_cand_uses[cid] == 0)
4525 bitmap_clear_bit (ivs->cands, cid);
4526 /* Do not count the pseudocandidates. */
4530 ivs->cand_cost -= cp->cand->cost;
4532 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4535 ivs->cand_use_cost -= cp->cost;
4537 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4538 iv_ca_recount_cost (data, ivs);
4541 /* Add invariants in set INVS to set IVS. */
4544 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4552 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4554 ivs->n_invariant_uses[iid]++;
4555 if (ivs->n_invariant_uses[iid] == 1)
4560 /* Set cost pair for USE in set IVS to CP. */
4563 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4564 struct iv_use *use, struct cost_pair *cp)
4566 unsigned uid = use->id, cid;
4568 if (ivs->cand_for_use[uid] == cp)
4571 if (ivs->cand_for_use[uid])
4572 iv_ca_set_no_cp (data, ivs, use);
4579 ivs->cand_for_use[uid] = cp;
4580 ivs->n_cand_uses[cid]++;
4581 if (ivs->n_cand_uses[cid] == 1)
4583 bitmap_set_bit (ivs->cands, cid);
4584 /* Do not count the pseudocandidates. */
4588 ivs->cand_cost += cp->cand->cost;
4590 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4593 ivs->cand_use_cost += cp->cost;
4594 iv_ca_set_add_invariants (ivs, cp->depends_on);
4595 iv_ca_recount_cost (data, ivs);
4599 /* Extend set IVS by expressing USE by some of the candidates in it
4603 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4606 struct cost_pair *best_cp = NULL, *cp;
4610 gcc_assert (ivs->upto >= use->id);
4612 if (ivs->upto == use->id)
4618 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4620 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4622 if (cheaper_cost_pair (cp, best_cp))
4626 iv_ca_set_cp (data, ivs, use, best_cp);
4629 /* Get cost for assignment IVS. */
4632 iv_ca_cost (struct iv_ca *ivs)
4634 return (ivs->bad_uses ? INFTY : ivs->cost);
4637 /* Returns true if all dependences of CP are among invariants in IVS. */
4640 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4645 if (!cp->depends_on)
4648 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4650 if (ivs->n_invariant_uses[i] == 0)
4657 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4658 it before NEXT_CHANGE. */
4660 static struct iv_ca_delta *
4661 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4662 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4664 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4667 change->old_cp = old_cp;
4668 change->new_cp = new_cp;
4669 change->next_change = next_change;
4674 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4677 static struct iv_ca_delta *
4678 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4680 struct iv_ca_delta *last;
4688 for (last = l1; last->next_change; last = last->next_change)
4690 last->next_change = l2;
4695 /* Returns candidate by that USE is expressed in IVS. */
4697 static struct cost_pair *
4698 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4700 return ivs->cand_for_use[use->id];
4703 /* Reverse the list of changes DELTA, forming the inverse to it. */
4705 static struct iv_ca_delta *
4706 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4708 struct iv_ca_delta *act, *next, *prev = NULL;
4709 struct cost_pair *tmp;
4711 for (act = delta; act; act = next)
4713 next = act->next_change;
4714 act->next_change = prev;
4718 act->old_cp = act->new_cp;
4725 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4726 reverted instead. */
4729 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4730 struct iv_ca_delta *delta, bool forward)
4732 struct cost_pair *from, *to;
4733 struct iv_ca_delta *act;
4736 delta = iv_ca_delta_reverse (delta);
4738 for (act = delta; act; act = act->next_change)
4742 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4743 iv_ca_set_cp (data, ivs, act->use, to);
4747 iv_ca_delta_reverse (delta);
4750 /* Returns true if CAND is used in IVS. */
4753 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4755 return ivs->n_cand_uses[cand->id] > 0;
4758 /* Returns number of induction variable candidates in the set IVS. */
4761 iv_ca_n_cands (struct iv_ca *ivs)
4763 return ivs->n_cands;
4766 /* Free the list of changes DELTA. */
4769 iv_ca_delta_free (struct iv_ca_delta **delta)
4771 struct iv_ca_delta *act, *next;
4773 for (act = *delta; act; act = next)
4775 next = act->next_change;
4782 /* Allocates new iv candidates assignment. */
4784 static struct iv_ca *
4785 iv_ca_new (struct ivopts_data *data)
4787 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4791 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4792 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4793 nw->cands = BITMAP_ALLOC (NULL);
4796 nw->cand_use_cost = 0;
4798 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4804 /* Free memory occupied by the set IVS. */
4807 iv_ca_free (struct iv_ca **ivs)
4809 free ((*ivs)->cand_for_use);
4810 free ((*ivs)->n_cand_uses);
4811 BITMAP_FREE ((*ivs)->cands);
4812 free ((*ivs)->n_invariant_uses);
4817 /* Dumps IVS to FILE. */
4820 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4822 const char *pref = " invariants ";
4825 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4826 bitmap_print (file, ivs->cands, " candidates ","\n");
4828 for (i = 1; i <= data->max_inv_id; i++)
4829 if (ivs->n_invariant_uses[i])
4831 fprintf (file, "%s%d", pref, i);
4834 fprintf (file, "\n");
4837 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4838 new set, and store differences in DELTA. Number of induction variables
4839 in the new set is stored to N_IVS. */
4842 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4843 struct iv_cand *cand, struct iv_ca_delta **delta,
4848 struct cost_pair *old_cp, *new_cp;
4851 for (i = 0; i < ivs->upto; i++)
4853 use = iv_use (data, i);
4854 old_cp = iv_ca_cand_for_use (ivs, use);
4857 && old_cp->cand == cand)
4860 new_cp = get_use_iv_cost (data, use, cand);
4864 if (!iv_ca_has_deps (ivs, new_cp))
4867 if (!cheaper_cost_pair (new_cp, old_cp))
4870 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4873 iv_ca_delta_commit (data, ivs, *delta, true);
4874 cost = iv_ca_cost (ivs);
4876 *n_ivs = iv_ca_n_cands (ivs);
4877 iv_ca_delta_commit (data, ivs, *delta, false);
4882 /* Try narrowing set IVS by removing CAND. Return the cost of
4883 the new set and store the differences in DELTA. */
4886 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4887 struct iv_cand *cand, struct iv_ca_delta **delta)
4891 struct cost_pair *old_cp, *new_cp, *cp;
4893 struct iv_cand *cnd;
4897 for (i = 0; i < n_iv_uses (data); i++)
4899 use = iv_use (data, i);
4901 old_cp = iv_ca_cand_for_use (ivs, use);
4902 if (old_cp->cand != cand)
4907 if (data->consider_all_candidates)
4909 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4914 cnd = iv_cand (data, ci);
4916 cp = get_use_iv_cost (data, use, cnd);
4919 if (!iv_ca_has_deps (ivs, cp))
4922 if (!cheaper_cost_pair (cp, new_cp))
4930 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4935 cnd = iv_cand (data, ci);
4937 cp = get_use_iv_cost (data, use, cnd);
4940 if (!iv_ca_has_deps (ivs, cp))
4943 if (!cheaper_cost_pair (cp, new_cp))
4952 iv_ca_delta_free (delta);
4956 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4959 iv_ca_delta_commit (data, ivs, *delta, true);
4960 cost = iv_ca_cost (ivs);
4961 iv_ca_delta_commit (data, ivs, *delta, false);
4966 /* Try optimizing the set of candidates IVS by removing candidates different
4967 from to EXCEPT_CAND from it. Return cost of the new set, and store
4968 differences in DELTA. */
4971 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4972 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4975 struct iv_ca_delta *act_delta, *best_delta;
4976 unsigned i, best_cost, acost;
4977 struct iv_cand *cand;
4980 best_cost = iv_ca_cost (ivs);
4982 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4984 cand = iv_cand (data, i);
4986 if (cand == except_cand)
4989 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4991 if (acost < best_cost)
4994 iv_ca_delta_free (&best_delta);
4995 best_delta = act_delta;
4998 iv_ca_delta_free (&act_delta);
5007 /* Recurse to possibly remove other unnecessary ivs. */
5008 iv_ca_delta_commit (data, ivs, best_delta, true);
5009 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5010 iv_ca_delta_commit (data, ivs, best_delta, false);
5011 *delta = iv_ca_delta_join (best_delta, *delta);
5015 /* Tries to extend the sets IVS in the best possible way in order
5016 to express the USE. */
5019 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5022 unsigned best_cost, act_cost;
5025 struct iv_cand *cand;
5026 struct iv_ca_delta *best_delta = NULL, *act_delta;
5027 struct cost_pair *cp;
5029 iv_ca_add_use (data, ivs, use);
5030 best_cost = iv_ca_cost (ivs);
5032 cp = iv_ca_cand_for_use (ivs, use);
5035 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5036 iv_ca_set_no_cp (data, ivs, use);
5039 /* First try important candidates. Only if it fails, try the specific ones.
5040 Rationale -- in loops with many variables the best choice often is to use
5041 just one generic biv. If we added here many ivs specific to the uses,
5042 the optimization algorithm later would be likely to get stuck in a local
5043 minimum, thus causing us to create too many ivs. The approach from
5044 few ivs to more seems more likely to be successful -- starting from few
5045 ivs, replacing an expensive use by a specific iv should always be a
5047 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5049 cand = iv_cand (data, i);
5051 if (iv_ca_cand_used_p (ivs, cand))
5054 cp = get_use_iv_cost (data, use, cand);
5058 iv_ca_set_cp (data, ivs, use, cp);
5059 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5060 iv_ca_set_no_cp (data, ivs, use);
5061 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5063 if (act_cost < best_cost)
5065 best_cost = act_cost;
5067 iv_ca_delta_free (&best_delta);
5068 best_delta = act_delta;
5071 iv_ca_delta_free (&act_delta);
5074 if (best_cost == INFTY)
5076 for (i = 0; i < use->n_map_members; i++)
5078 cp = use->cost_map + i;
5083 /* Already tried this. */
5084 if (cand->important)
5087 if (iv_ca_cand_used_p (ivs, cand))
5091 iv_ca_set_cp (data, ivs, use, cp);
5092 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5093 iv_ca_set_no_cp (data, ivs, use);
5094 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5097 if (act_cost < best_cost)
5099 best_cost = act_cost;
5102 iv_ca_delta_free (&best_delta);
5103 best_delta = act_delta;
5106 iv_ca_delta_free (&act_delta);
5110 iv_ca_delta_commit (data, ivs, best_delta, true);
5111 iv_ca_delta_free (&best_delta);
5113 return (best_cost != INFTY);
5116 /* Finds an initial assignment of candidates to uses. */
5118 static struct iv_ca *
5119 get_initial_solution (struct ivopts_data *data)
5121 struct iv_ca *ivs = iv_ca_new (data);
5124 for (i = 0; i < n_iv_uses (data); i++)
5125 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5134 /* Tries to improve set of induction variables IVS. */
5137 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5139 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5140 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5141 struct iv_cand *cand;
5143 /* Try extending the set of induction variables by one. */
5144 for (i = 0; i < n_iv_cands (data); i++)
5146 cand = iv_cand (data, i);
5148 if (iv_ca_cand_used_p (ivs, cand))
5151 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5155 /* If we successfully added the candidate and the set is small enough,
5156 try optimizing it by removing other candidates. */
5157 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5159 iv_ca_delta_commit (data, ivs, act_delta, true);
5160 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5161 iv_ca_delta_commit (data, ivs, act_delta, false);
5162 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5165 if (acost < best_cost)
5168 iv_ca_delta_free (&best_delta);
5169 best_delta = act_delta;
5172 iv_ca_delta_free (&act_delta);
5177 /* Try removing the candidates from the set instead. */
5178 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5180 /* Nothing more we can do. */
5185 iv_ca_delta_commit (data, ivs, best_delta, true);
5186 gcc_assert (best_cost == iv_ca_cost (ivs));
5187 iv_ca_delta_free (&best_delta);
5191 /* Attempts to find the optimal set of induction variables. We do simple
5192 greedy heuristic -- we try to replace at most one candidate in the selected
5193 solution and remove the unused ivs while this improves the cost. */
5195 static struct iv_ca *
5196 find_optimal_iv_set (struct ivopts_data *data)
5202 /* Get the initial solution. */
5203 set = get_initial_solution (data);
5206 if (dump_file && (dump_flags & TDF_DETAILS))
5207 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5211 if (dump_file && (dump_flags & TDF_DETAILS))
5213 fprintf (dump_file, "Initial set of candidates:\n");
5214 iv_ca_dump (data, dump_file, set);
5217 while (try_improve_iv_set (data, set))
5219 if (dump_file && (dump_flags & TDF_DETAILS))
5221 fprintf (dump_file, "Improved to:\n");
5222 iv_ca_dump (data, dump_file, set);
5226 if (dump_file && (dump_flags & TDF_DETAILS))
5227 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5229 for (i = 0; i < n_iv_uses (data); i++)
5231 use = iv_use (data, i);
5232 use->selected = iv_ca_cand_for_use (set, use)->cand;
5238 /* Creates a new induction variable corresponding to CAND. */
5241 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5243 block_stmt_iterator incr_pos;
5253 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5257 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5262 /* Mark that the iv is preserved. */
5263 name_info (data, cand->var_before)->preserve_biv = true;
5264 name_info (data, cand->var_after)->preserve_biv = true;
5266 /* Rewrite the increment so that it uses var_before directly. */
5267 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5272 gimple_add_tmp_var (cand->var_before);
5273 add_referenced_tmp_var (cand->var_before);
5275 base = unshare_expr (cand->iv->base);
5277 create_iv (base, unshare_expr (cand->iv->step),
5278 cand->var_before, data->current_loop,
5279 &incr_pos, after, &cand->var_before, &cand->var_after);
5282 /* Creates new induction variables described in SET. */
5285 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5288 struct iv_cand *cand;
5291 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5293 cand = iv_cand (data, i);
5294 create_new_iv (data, cand);
5298 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5299 is true, remove also the ssa name defined by the statement. */
5302 remove_statement (tree stmt, bool including_defined_name)
5304 if (TREE_CODE (stmt) == PHI_NODE)
5306 if (!including_defined_name)
5308 /* Prevent the ssa name defined by the statement from being removed. */
5309 SET_PHI_RESULT (stmt, NULL);
5311 remove_phi_node (stmt, NULL_TREE);
5315 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5321 /* Rewrites USE (definition of iv used in a nonlinear expression)
5322 using candidate CAND. */
5325 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5326 struct iv_use *use, struct iv_cand *cand)
5329 tree op, stmts, tgt, ass;
5330 block_stmt_iterator bsi, pbsi;
5332 /* An important special case -- if we are asked to express value of
5333 the original iv by itself, just exit; there is no need to
5334 introduce a new computation (that might also need casting the
5335 variable to unsigned and back). */
5336 if (cand->pos == IP_ORIGINAL
5337 && cand->incremented_at == use->stmt)
5339 tree step, ctype, utype;
5340 enum tree_code incr_code = PLUS_EXPR;
5342 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5343 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5345 step = cand->iv->step;
5346 ctype = TREE_TYPE (step);
5347 utype = TREE_TYPE (cand->var_after);
5348 if (TREE_CODE (step) == NEGATE_EXPR)
5350 incr_code = MINUS_EXPR;
5351 step = TREE_OPERAND (step, 0);
5354 /* Check whether we may leave the computation unchanged.
5355 This is the case only if it does not rely on other
5356 computations in the loop -- otherwise, the computation
5357 we rely upon may be removed in remove_unused_ivs,
5358 thus leading to ICE. */
5359 op = TREE_OPERAND (use->stmt, 1);
5360 if (TREE_CODE (op) == PLUS_EXPR
5361 || TREE_CODE (op) == MINUS_EXPR)
5363 if (TREE_OPERAND (op, 0) == cand->var_before)
5364 op = TREE_OPERAND (op, 1);
5365 else if (TREE_CODE (op) == PLUS_EXPR
5366 && TREE_OPERAND (op, 1) == cand->var_before)
5367 op = TREE_OPERAND (op, 0);
5375 && (TREE_CODE (op) == INTEGER_CST
5376 || operand_equal_p (op, step, 0)))
5379 /* Otherwise, add the necessary computations to express
5381 op = fold_convert (ctype, cand->var_before);
5382 comp = fold_convert (utype,
5383 build2 (incr_code, ctype, op,
5384 unshare_expr (step)));
5387 comp = get_computation (data->current_loop, use, cand);
5389 switch (TREE_CODE (use->stmt))
5392 tgt = PHI_RESULT (use->stmt);
5394 /* If we should keep the biv, do not replace it. */
5395 if (name_info (data, tgt)->preserve_biv)
5398 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5399 while (!bsi_end_p (pbsi)
5400 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5408 tgt = TREE_OPERAND (use->stmt, 0);
5409 bsi = bsi_for_stmt (use->stmt);
5416 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5418 if (TREE_CODE (use->stmt) == PHI_NODE)
5421 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5422 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5423 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5424 remove_statement (use->stmt, false);
5425 SSA_NAME_DEF_STMT (tgt) = ass;
5430 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5431 TREE_OPERAND (use->stmt, 1) = op;
5435 /* Replaces ssa name in index IDX by its basic variable. Callback for
5439 idx_remove_ssa_names (tree base, tree *idx,
5440 void *data ATTRIBUTE_UNUSED)
5444 if (TREE_CODE (*idx) == SSA_NAME)
5445 *idx = SSA_NAME_VAR (*idx);
5447 if (TREE_CODE (base) == ARRAY_REF)
5449 op = &TREE_OPERAND (base, 2);
5451 && TREE_CODE (*op) == SSA_NAME)
5452 *op = SSA_NAME_VAR (*op);
5453 op = &TREE_OPERAND (base, 3);
5455 && TREE_CODE (*op) == SSA_NAME)
5456 *op = SSA_NAME_VAR (*op);
5462 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5465 unshare_and_remove_ssa_names (tree ref)
5467 ref = unshare_expr (ref);
5468 for_each_index (&ref, idx_remove_ssa_names, NULL);
5473 /* Extract the alias analysis info for the memory reference REF. There are
5474 several ways how this information may be stored and what precisely is
5475 its semantics depending on the type of the reference, but there always is
5476 somewhere hidden one _DECL node that is used to determine the set of
5477 virtual operands for the reference. The code below deciphers this jungle
5478 and extracts this single useful piece of information. */
5481 get_ref_tag (tree ref, tree orig)
5483 tree var = get_base_address (ref);
5485 unsigned HOST_WIDE_INT offset, size;
5487 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5488 if (okay_component_ref_for_subvars (sv, &offset, &size))
5491 if (handled_component_p (sv))
5492 return unshare_expr (sv);
5497 if (TREE_CODE (var) == INDIRECT_REF)
5499 /* In case the base is a dereference of a pointer, first check its name
5500 mem tag, and if it does not have one, use type mem tag. */
5501 var = TREE_OPERAND (var, 0);
5502 if (TREE_CODE (var) != SSA_NAME)
5505 if (SSA_NAME_PTR_INFO (var))
5507 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5512 var = SSA_NAME_VAR (var);
5513 tag = var_ann (var)->type_mem_tag;
5514 gcc_assert (tag != NULL_TREE);
5522 tag = var_ann (var)->type_mem_tag;
5530 /* Copies the reference information from OLD_REF to NEW_REF. */
5533 copy_ref_info (tree new_ref, tree old_ref)
5535 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5536 copy_mem_ref_info (new_ref, old_ref);
5539 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5540 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5544 /* Rewrites USE (address that is an iv) using candidate CAND. */
5547 rewrite_use_address (struct ivopts_data *data,
5548 struct iv_use *use, struct iv_cand *cand)
5550 struct affine_tree_combination aff;
5551 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5554 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5555 unshare_aff_combination (&aff);
5557 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5558 copy_ref_info (ref, *use->op_p);
5562 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5566 rewrite_use_compare (struct ivopts_data *data,
5567 struct iv_use *use, struct iv_cand *cand)
5570 tree *op_p, cond, op, stmts, bound;
5571 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5572 enum tree_code compare;
5573 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5578 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5579 tree var_type = TREE_TYPE (var);
5581 compare = iv_elimination_compare (data, use);
5582 bound = fold_convert (var_type, bound);
5583 op = force_gimple_operand (unshare_expr (bound), &stmts,
5587 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5589 *use->op_p = build2 (compare, boolean_type_node, var, op);
5590 update_stmt (use->stmt);
5594 /* The induction variable elimination failed; just express the original
5596 comp = get_computation (data->current_loop, use, cand);
5599 op_p = &TREE_OPERAND (cond, 0);
5600 if (TREE_CODE (*op_p) != SSA_NAME
5601 || zero_p (get_iv (data, *op_p)->step))
5602 op_p = &TREE_OPERAND (cond, 1);
5604 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5606 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5611 /* Ensure that operand *OP_P may be used at the end of EXIT without
5612 violating loop closed ssa form. */
5615 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5618 struct loop *def_loop;
5621 use = USE_FROM_PTR (op_p);
5622 if (TREE_CODE (use) != SSA_NAME)
5625 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5629 def_loop = def_bb->loop_father;
5630 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5633 /* Try finding a phi node that copies the value out of the loop. */
5634 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5635 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5640 /* Create such a phi node. */
5641 tree new_name = duplicate_ssa_name (use, NULL);
5643 phi = create_phi_node (new_name, exit->dest);
5644 SSA_NAME_DEF_STMT (new_name) = phi;
5645 add_phi_arg (phi, use, exit);
5648 SET_USE (op_p, PHI_RESULT (phi));
5651 /* Ensure that operands of STMT may be used at the end of EXIT without
5652 violating loop closed ssa form. */
5655 protect_loop_closed_ssa_form (edge exit, tree stmt)
5658 use_operand_p use_p;
5660 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5661 protect_loop_closed_ssa_form_use (exit, use_p);
5664 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5665 so that they are emitted on the correct place, and so that the loop closed
5666 ssa form is preserved. */
5669 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5671 tree_stmt_iterator tsi;
5672 block_stmt_iterator bsi;
5673 tree phi, stmt, def, next;
5675 if (!single_pred_p (exit->dest))
5676 split_loop_exit_edge (exit);
5678 /* Ensure there is label in exit->dest, so that we can
5680 tree_block_label (exit->dest);
5681 bsi = bsi_after_labels (exit->dest);
5683 if (TREE_CODE (stmts) == STATEMENT_LIST)
5685 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5687 tree stmt = tsi_stmt (tsi);
5688 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5689 protect_loop_closed_ssa_form (exit, stmt);
5694 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5695 protect_loop_closed_ssa_form (exit, stmts);
5701 for (phi = phi_nodes (exit->dest); phi; phi = next)
5703 next = PHI_CHAIN (phi);
5705 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5707 def = PHI_RESULT (phi);
5708 remove_statement (phi, false);
5709 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5711 SSA_NAME_DEF_STMT (def) = stmt;
5712 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5717 /* Rewrites the final value of USE (that is only needed outside of the loop)
5718 using candidate CAND. */
5721 rewrite_use_outer (struct ivopts_data *data,
5722 struct iv_use *use, struct iv_cand *cand)
5725 tree value, op, stmts, tgt;
5728 switch (TREE_CODE (use->stmt))
5731 tgt = PHI_RESULT (use->stmt);
5734 tgt = TREE_OPERAND (use->stmt, 0);
5740 exit = single_dom_exit (data->current_loop);
5746 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5747 value = unshare_expr (cp->value);
5750 value = get_computation_at (data->current_loop,
5751 use, cand, last_stmt (exit->src));
5753 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5755 /* If we will preserve the iv anyway and we would need to perform
5756 some computation to replace the final value, do nothing. */
5757 if (stmts && name_info (data, tgt)->preserve_biv)
5760 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5762 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5764 if (USE_FROM_PTR (use_p) == tgt)
5765 SET_USE (use_p, op);
5769 compute_phi_arg_on_exit (exit, stmts, op);
5771 /* Enable removal of the statement. We cannot remove it directly,
5772 since we may still need the aliasing information attached to the
5773 ssa name defined by it. */
5774 name_info (data, tgt)->iv->have_use_for = false;
5778 /* If the variable is going to be preserved anyway, there is nothing to
5780 if (name_info (data, tgt)->preserve_biv)
5783 /* Otherwise we just need to compute the iv. */
5784 rewrite_use_nonlinear_expr (data, use, cand);
5787 /* Rewrites USE using candidate CAND. */
5790 rewrite_use (struct ivopts_data *data,
5791 struct iv_use *use, struct iv_cand *cand)
5795 case USE_NONLINEAR_EXPR:
5796 rewrite_use_nonlinear_expr (data, use, cand);
5800 rewrite_use_outer (data, use, cand);
5804 rewrite_use_address (data, use, cand);
5808 rewrite_use_compare (data, use, cand);
5814 update_stmt (use->stmt);
5817 /* Rewrite the uses using the selected induction variables. */
5820 rewrite_uses (struct ivopts_data *data)
5823 struct iv_cand *cand;
5826 for (i = 0; i < n_iv_uses (data); i++)
5828 use = iv_use (data, i);
5829 cand = use->selected;
5832 rewrite_use (data, use, cand);
5836 /* Removes the ivs that are not used after rewriting. */
5839 remove_unused_ivs (struct ivopts_data *data)
5844 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5846 struct version_info *info;
5848 info = ver_info (data, j);
5850 && !zero_p (info->iv->step)
5852 && !info->iv->have_use_for
5853 && !info->preserve_biv)
5854 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5858 /* Frees data allocated by the optimization of a single loop. */
5861 free_loop_data (struct ivopts_data *data)
5867 htab_empty (data->niters);
5869 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5871 struct version_info *info;
5873 info = ver_info (data, i);
5877 info->has_nonlin_use = false;
5878 info->preserve_biv = false;
5881 bitmap_clear (data->relevant);
5882 bitmap_clear (data->important_candidates);
5884 for (i = 0; i < n_iv_uses (data); i++)
5886 struct iv_use *use = iv_use (data, i);
5889 BITMAP_FREE (use->related_cands);
5890 for (j = 0; j < use->n_map_members; j++)
5891 if (use->cost_map[j].depends_on)
5892 BITMAP_FREE (use->cost_map[j].depends_on);
5893 free (use->cost_map);
5896 VEC_truncate (iv_use_p, data->iv_uses, 0);
5898 for (i = 0; i < n_iv_cands (data); i++)
5900 struct iv_cand *cand = iv_cand (data, i);
5904 if (cand->depends_on)
5905 BITMAP_FREE (cand->depends_on);
5908 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5910 if (data->version_info_size < num_ssa_names)
5912 data->version_info_size = 2 * num_ssa_names;
5913 free (data->version_info);
5914 data->version_info = xcalloc (data->version_info_size,
5915 sizeof (struct version_info));
5918 data->max_inv_id = 0;
5920 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5921 SET_DECL_RTL (obj, NULL_RTX);
5923 VEC_truncate (tree, decl_rtl_to_reset, 0);
5926 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5930 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5934 for (i = 1; i < loops->num; i++)
5935 if (loops->parray[i])
5937 free (loops->parray[i]->aux);
5938 loops->parray[i]->aux = NULL;
5941 free_loop_data (data);
5942 free (data->version_info);
5943 BITMAP_FREE (data->relevant);
5944 BITMAP_FREE (data->important_candidates);
5945 htab_delete (data->niters);
5947 VEC_free (tree, heap, decl_rtl_to_reset);
5948 VEC_free (iv_use_p, heap, data->iv_uses);
5949 VEC_free (iv_cand_p, heap, data->iv_candidates);
5952 /* Optimizes the LOOP. Returns true if anything changed. */
5955 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5957 bool changed = false;
5958 struct iv_ca *iv_ca;
5961 data->current_loop = loop;
5963 if (dump_file && (dump_flags & TDF_DETAILS))
5965 fprintf (dump_file, "Processing loop %d\n", loop->num);
5967 exit = single_dom_exit (loop);
5970 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5971 exit->src->index, exit->dest->index);
5972 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5973 fprintf (dump_file, "\n");
5976 fprintf (dump_file, "\n");
5979 /* For each ssa name determines whether it behaves as an induction variable
5981 if (!find_induction_variables (data))
5984 /* Finds interesting uses (item 1). */
5985 find_interesting_uses (data);
5986 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5989 /* Finds candidates for the induction variables (item 2). */
5990 find_iv_candidates (data);
5992 /* Calculates the costs (item 3, part 1). */
5993 determine_use_iv_costs (data);
5994 determine_iv_costs (data);
5995 determine_set_costs (data);
5997 /* Find the optimal set of induction variables (item 3, part 2). */
5998 iv_ca = find_optimal_iv_set (data);
6003 /* Create the new induction variables (item 4, part 1). */
6004 create_new_ivs (data, iv_ca);
6005 iv_ca_free (&iv_ca);
6007 /* Rewrite the uses (item 4, part 2). */
6008 rewrite_uses (data);
6010 /* Remove the ivs that are unused after rewriting. */
6011 remove_unused_ivs (data);
6013 /* We have changed the structure of induction variables; it might happen
6014 that definitions in the scev database refer to some of them that were
6019 free_loop_data (data);
6024 /* Main entry point. Optimizes induction variables in LOOPS. */
6027 tree_ssa_iv_optimize (struct loops *loops)
6030 struct ivopts_data data;
6032 tree_ssa_iv_optimize_init (loops, &data);
6034 /* Optimize the loops starting with the innermost ones. */
6035 loop = loops->tree_root;
6039 /* Scan the loops, inner ones first. */
6040 while (loop != loops->tree_root)
6042 if (dump_file && (dump_flags & TDF_DETAILS))
6043 flow_loop_dump (loop, dump_file, NULL, 1);
6045 tree_ssa_iv_optimize_loop (&data, loop);
6057 tree_ssa_iv_optimize_finalize (loops, &data);