1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
187 /* The induction variable candidate. */
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use *iv_use_p;
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
219 /* The currently optimized loop. */
220 struct loop *current_loop;
222 /* Numbers of iterations for all exits of the current loop. */
223 struct pointer_map_t *niters;
225 /* Number of registers used in it. */
228 /* The size of version_info array allocated. */
229 unsigned version_info_size;
231 /* The array of information for the ssa names. */
232 struct version_info *version_info;
234 /* The bitmap of indices in version_info whose value was changed. */
237 /* The uses of induction variables. */
238 VEC(iv_use_p,heap) *iv_uses;
240 /* The candidates. */
241 VEC(iv_cand_p,heap) *iv_candidates;
243 /* A bitmap of important candidates. */
244 bitmap important_candidates;
246 /* The maximum invariant id. */
249 /* Whether to consider just related and important candidates when replacing a
251 bool consider_all_candidates;
253 /* Are we optimizing for speed? */
257 /* An assignment of iv candidates to uses. */
261 /* The number of uses covered by the assignment. */
264 /* Number of uses that cannot be expressed by the candidates in the set. */
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
273 /* The candidates used. */
276 /* The number of candidates in the set. */
279 /* Total number of registers needed. */
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
285 /* Total cost of candidates. */
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
291 /* Total cost of the assignment. */
295 /* Difference of two iv candidate assignments. */
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
305 /* A new assignment. */
306 struct cost_pair *new_cp;
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
312 /* Bound on number of candidates below that all candidates are considered. */
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
329 /* The list of trees for that the decl_rtl field must be reset is stored
332 static VEC(tree,heap) *decl_rtl_to_reset;
334 /* Number of uses recorded in DATA. */
336 static inline unsigned
337 n_iv_uses (struct ivopts_data *data)
339 return VEC_length (iv_use_p, data->iv_uses);
342 /* Ith use recorded in DATA. */
344 static inline struct iv_use *
345 iv_use (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_use_p, data->iv_uses, i);
350 /* Number of candidates recorded in DATA. */
352 static inline unsigned
353 n_iv_cands (struct ivopts_data *data)
355 return VEC_length (iv_cand_p, data->iv_candidates);
358 /* Ith candidate recorded in DATA. */
360 static inline struct iv_cand *
361 iv_cand (struct ivopts_data *data, unsigned i)
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
369 single_dom_exit (struct loop *loop)
371 edge exit = single_exit (loop);
376 if (!just_once_each_iteration_p (loop, exit->src))
382 /* Dumps information about the induction variable IV to FILE. */
384 extern void dump_iv (FILE *, struct iv *);
386 dump_iv (FILE *file, struct iv *iv)
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
424 fprintf (file, " is a biv\n");
427 /* Dumps information about the USE to FILE. */
429 extern void dump_use (FILE *, struct iv_use *);
431 dump_use (FILE *file, struct iv_use *use)
433 fprintf (file, "use %d\n", use->id);
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
442 fprintf (file, " address\n");
446 fprintf (file, " compare\n");
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
457 fprintf (file, " at position ");
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
462 dump_iv (file, use->iv);
464 if (use->related_cands)
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
471 /* Dumps information about the uses to FILE. */
473 extern void dump_uses (FILE *, struct ivopts_data *);
475 dump_uses (FILE *file, struct ivopts_data *data)
480 for (i = 0; i < n_iv_uses (data); i++)
482 use = iv_use (data, i);
484 dump_use (file, use);
485 fprintf (file, "\n");
489 /* Dumps information about induction variable candidate CAND to FILE. */
491 extern void dump_cand (FILE *, struct iv_cand *);
493 dump_cand (FILE *file, struct iv_cand *cand)
495 struct iv *iv = cand->iv;
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
500 if (cand->depends_on)
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
508 fprintf (file, " final value replacement\n");
515 fprintf (file, " incremented before exit test\n");
519 fprintf (file, " incremented at end\n");
523 fprintf (file, " original biv\n");
530 /* Returns the info for ssa version VER. */
532 static inline struct version_info *
533 ver_info (struct ivopts_data *data, unsigned ver)
535 return data->version_info + ver;
538 /* Returns the info for ssa name NAME. */
540 static inline struct version_info *
541 name_info (struct ivopts_data *data, tree name)
543 return ver_info (data, SSA_NAME_VERSION (name));
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
550 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
556 if (sbb == loop->latch)
562 return stmt == last_stmt (bb);
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
569 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
578 if (stmt_bb != cand_bb)
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
585 if (gsi_stmt (bsi) == cand->incremented_at)
587 if (gsi_stmt (bsi) == stmt)
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
596 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
604 return stmt_after_ip_normal_pos (loop, stmt);
607 return stmt_after_ip_original_pos (cand, stmt);
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
617 abnormal_ssa_name_p (tree exp)
622 if (TREE_CODE (exp) != SSA_NAME)
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
632 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
635 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
643 return !abnormal_ssa_name_p (*index);
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
650 contains_abnormal_ssa_name_p (tree expr)
653 enum tree_code_class codeclass;
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
698 niter_for_exit (struct ivopts_data *data, edge exit)
700 struct tree_niter_desc desc;
706 data->niters = pointer_map_create ();
710 slot = pointer_map_contains (data->niters, exit);
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
727 *pointer_map_insert (data->niters, exit) = niter;
730 niter = (tree) *slot;
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
740 niter_for_single_dom_exit (struct ivopts_data *data)
742 edge exit = single_dom_exit (data->current_loop);
747 return niter_for_exit (data, exit);
750 /* Initializes data structures used by the iv optimization pass, stored
754 tree_ssa_iv_optimize_init (struct ivopts_data *data)
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
812 return fold_convert (ptr_type_node, expr);
816 /* Allocates an induction variable with given initial value BASE and step STEP
820 alloc_iv (tree base, tree step)
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
826 iv->base_object = determine_base_object (base);
829 iv->have_use_for = false;
831 iv->ssa_name = NULL_TREE;
836 /* Sets STEP and BASE for induction variable IV. */
839 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
841 struct version_info *info = name_info (data, iv);
843 gcc_assert (!info->iv);
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
850 /* Finds induction variable declaration for VAR. */
853 get_iv (struct ivopts_data *data, tree var)
856 tree type = TREE_TYPE (var);
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
862 if (!name_info (data, var)->iv)
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
871 return name_info (data, var)->iv;
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
878 determine_biv_step (gimple phi)
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
884 if (!is_gimple_reg (name))
887 if (!simple_iv (loop, loop, name, &iv, true))
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
893 /* Finds basic ivs. */
896 find_bivs (struct ivopts_data *data)
899 tree step, type, base;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
906 phi = gsi_stmt (psi);
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
911 step = determine_biv_step (phi);
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
928 step = fold_convert (type, step);
931 set_iv (data, PHI_RESULT (phi), base, step);
938 /* Marks basic ivs. */
941 mark_bivs (struct ivopts_data *data)
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
948 gimple_stmt_iterator psi;
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
952 phi = gsi_stmt (psi);
954 iv = get_iv (data, PHI_RESULT (phi));
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
970 incr_iv->biv_p = true;
974 /* Checks whether STMT defines a linear induction variable and stores its
978 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
981 struct loop *loop = data->current_loop;
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
993 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
995 iv->base = expand_simple_operations (iv->base);
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
1004 /* Finds general ivs in statement STMT. */
1007 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1017 /* Finds general ivs in basic block BB. */
1020 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1022 gimple_stmt_iterator bsi;
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1028 /* Finds general ivs. */
1031 find_givs (struct ivopts_data *data)
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1046 find_induction_variables (struct ivopts_data *data)
1051 if (!find_bivs (data))
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1059 tree niter = niter_for_single_dom_exit (data);
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1068 fprintf (dump_file, "Induction variables:\n\n");
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1082 static struct iv_use *
1083 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1086 struct iv_use *use = XCNEW (struct iv_use);
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1097 iv->ssa_name = NULL_TREE;
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1112 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1115 struct version_info *info;
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1126 info = name_info (data, op);
1128 info->has_nonlin_use |= nonlinear_use;
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1134 /* Checks whether the use OP is interesting and if so, records it. */
1136 static struct iv_use *
1137 find_interesting_uses_op (struct ivopts_data *data, tree op)
1144 if (TREE_CODE (op) != SSA_NAME)
1147 iv = get_iv (data, op);
1151 if (iv->have_use_for)
1153 use = iv_use (data, iv->use_id);
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1159 if (integer_zerop (iv->step))
1161 record_invariant (data, op, true);
1164 iv->have_use_for = true;
1166 civ = XNEW (struct iv);
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1188 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1199 if (gimple_code (stmt) == GIMPLE_COND)
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1218 /* Exactly one of the compared values must be an iv, and the other one must
1223 if (integer_zerop (iv0->step))
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1233 *control_var = op0;;
1244 /* Checks whether the condition in STMT is interesting and if so,
1248 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1260 civ = XNEW (struct iv);
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1270 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1275 gcc_assert (loop_depth (loop) > 0);
1277 if (is_gimple_min_invariant (expr))
1280 if (TREE_CODE (expr) == SSA_NAME)
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1284 && flow_bb_inside_loop_p (loop, def_bb))
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1306 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1311 gcc_assert (loop_depth (loop) > 0);
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1328 struct ifs_ivopts_data
1330 struct ivopts_data *ivopts_data;
1336 idx_find_step (tree base, tree *idx, void *data)
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1347 /* If base is a component ref, require that the offset of the reference
1349 if (TREE_CODE (base) == COMPONENT_REF)
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1366 step = array_ref_element_size (base);
1367 lbound = array_ref_low_bound (base);
1369 if (!expr_invariant_in_loop_p (loop, step)
1370 || !expr_invariant_in_loop_p (loop, lbound))
1374 if (TREE_CODE (*idx) != SSA_NAME)
1377 iv = get_iv (dta->ivopts_data, *idx);
1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
1386 if (integer_zerop (iv->step))
1389 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1391 step = array_ref_element_size (base);
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step) != INTEGER_CST)
1398 /* The step for pointer arithmetics already is 1 byte. */
1399 step = build_int_cst (sizetype, 1);
1403 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1404 sizetype, &iv_base, &iv_step, dta->stmt,
1407 /* The index might wrap. */
1411 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1412 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1421 idx_record_use (tree base, tree *idx,
1424 struct ivopts_data *data = (struct ivopts_data *) vdata;
1425 find_interesting_uses_op (data, *idx);
1426 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1428 find_interesting_uses_op (data, array_ref_element_size (base));
1429 find_interesting_uses_op (data, array_ref_low_bound (base));
1434 /* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1440 constant_multiple_of (tree top, tree bot, double_int *mul)
1443 enum tree_code code;
1444 double_int res, p0, p1;
1445 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1450 if (operand_equal_p (top, bot, 0))
1452 *mul = double_int_one;
1456 code = TREE_CODE (top);
1460 mby = TREE_OPERAND (top, 1);
1461 if (TREE_CODE (mby) != INTEGER_CST)
1464 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1467 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1473 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1474 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1477 if (code == MINUS_EXPR)
1478 p1 = double_int_neg (p1);
1479 *mul = double_int_sext (double_int_add (p0, p1), precision);
1483 if (TREE_CODE (bot) != INTEGER_CST)
1486 p0 = double_int_sext (tree_to_double_int (top), precision);
1487 p1 = double_int_sext (tree_to_double_int (bot), precision);
1488 if (double_int_zero_p (p1))
1490 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1492 return double_int_zero_p (res);
1499 /* Returns true if memory reference REF with step STEP may be unaligned. */
1502 may_be_unaligned_p (tree ref, tree step)
1506 HOST_WIDE_INT bitsize;
1507 HOST_WIDE_INT bitpos;
1509 enum machine_mode mode;
1510 int unsignedp, volatilep;
1511 unsigned base_align;
1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1514 thus they are not misaligned. */
1515 if (TREE_CODE (ref) == TARGET_MEM_REF)
1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1522 &unsignedp, &volatilep, true);
1523 base_type = TREE_TYPE (base);
1524 base_align = TYPE_ALIGN (base_type);
1526 if (mode != BLKmode)
1528 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1530 if (base_align < mode_align
1531 || (bitpos % mode_align) != 0
1532 || (bitpos % BITS_PER_UNIT) != 0)
1536 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1539 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1546 /* Return true if EXPR may be non-addressable. */
1549 may_be_nonaddressable_p (tree expr)
1551 switch (TREE_CODE (expr))
1553 case TARGET_MEM_REF:
1554 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1555 target, thus they are always addressable. */
1559 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1560 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1562 case VIEW_CONVERT_EXPR:
1563 /* This kind of view-conversions may wrap non-addressable objects
1564 and make them look addressable. After some processing the
1565 non-addressability may be uncovered again, causing ADDR_EXPRs
1566 of inappropriate objects to be built. */
1567 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1568 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1571 /* ... fall through ... */
1574 case ARRAY_RANGE_REF:
1575 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1587 /* Finds addresses in *OP_P inside STMT. */
1590 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1592 tree base = *op_p, step = build_int_cst (sizetype, 0);
1594 struct ifs_ivopts_data ifs_ivopts_data;
1596 /* Do not play with volatile memory references. A bit too conservative,
1597 perhaps, but safe. */
1598 if (gimple_has_volatile_ops (stmt))
1601 /* Ignore bitfields for now. Not really something terribly complicated
1603 if (TREE_CODE (base) == BIT_FIELD_REF)
1606 base = unshare_expr (base);
1608 if (TREE_CODE (base) == TARGET_MEM_REF)
1610 tree type = build_pointer_type (TREE_TYPE (base));
1614 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1616 civ = get_iv (data, TMR_BASE (base));
1620 TMR_BASE (base) = civ->base;
1623 if (TMR_INDEX (base)
1624 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1626 civ = get_iv (data, TMR_INDEX (base));
1630 TMR_INDEX (base) = civ->base;
1635 if (TMR_STEP (base))
1636 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1638 step = fold_build2 (PLUS_EXPR, type, step, astep);
1642 if (integer_zerop (step))
1644 base = tree_mem_ref_addr (type, base);
1648 ifs_ivopts_data.ivopts_data = data;
1649 ifs_ivopts_data.stmt = stmt;
1650 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1651 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1652 || integer_zerop (ifs_ivopts_data.step))
1654 step = ifs_ivopts_data.step;
1656 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1657 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1659 /* Check that the base expression is addressable. This needs
1660 to be done after substituting bases of IVs into it. */
1661 if (may_be_nonaddressable_p (base))
1664 /* Moreover, on strict alignment platforms, check that it is
1665 sufficiently aligned. */
1666 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1669 base = build_fold_addr_expr (base);
1671 /* Substituting bases of IVs into the base expression might
1672 have caused folding opportunities. */
1673 if (TREE_CODE (base) == ADDR_EXPR)
1675 tree *ref = &TREE_OPERAND (base, 0);
1676 while (handled_component_p (*ref))
1677 ref = &TREE_OPERAND (*ref, 0);
1678 if (TREE_CODE (*ref) == INDIRECT_REF)
1680 tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1687 civ = alloc_iv (base, step);
1688 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1692 for_each_index (op_p, idx_record_use, data);
1695 /* Finds and records invariants used in STMT. */
1698 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1701 use_operand_p use_p;
1704 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1706 op = USE_FROM_PTR (use_p);
1707 record_invariant (data, op, false);
1711 /* Finds interesting uses of induction variables in the statement STMT. */
1714 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1717 tree op, *lhs, *rhs;
1719 use_operand_p use_p;
1720 enum tree_code code;
1722 find_invariants_stmt (data, stmt);
1724 if (gimple_code (stmt) == GIMPLE_COND)
1726 find_interesting_uses_cond (data, stmt);
1730 if (is_gimple_assign (stmt))
1732 lhs = gimple_assign_lhs_ptr (stmt);
1733 rhs = gimple_assign_rhs1_ptr (stmt);
1735 if (TREE_CODE (*lhs) == SSA_NAME)
1737 /* If the statement defines an induction variable, the uses are not
1738 interesting by themselves. */
1740 iv = get_iv (data, *lhs);
1742 if (iv && !integer_zerop (iv->step))
1746 code = gimple_assign_rhs_code (stmt);
1747 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1748 && (REFERENCE_CLASS_P (*rhs)
1749 || is_gimple_val (*rhs)))
1751 if (REFERENCE_CLASS_P (*rhs))
1752 find_interesting_uses_address (data, stmt, rhs);
1754 find_interesting_uses_op (data, *rhs);
1756 if (REFERENCE_CLASS_P (*lhs))
1757 find_interesting_uses_address (data, stmt, lhs);
1760 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1762 find_interesting_uses_cond (data, stmt);
1766 /* TODO -- we should also handle address uses of type
1768 memory = call (whatever);
1775 if (gimple_code (stmt) == GIMPLE_PHI
1776 && gimple_bb (stmt) == data->current_loop->header)
1778 iv = get_iv (data, PHI_RESULT (stmt));
1780 if (iv && !integer_zerop (iv->step))
1784 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1786 op = USE_FROM_PTR (use_p);
1788 if (TREE_CODE (op) != SSA_NAME)
1791 iv = get_iv (data, op);
1795 find_interesting_uses_op (data, op);
1799 /* Finds interesting uses of induction variables outside of loops
1800 on loop exit edge EXIT. */
1803 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1806 gimple_stmt_iterator psi;
1809 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1811 phi = gsi_stmt (psi);
1812 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1813 if (is_gimple_reg (def))
1814 find_interesting_uses_op (data, def);
1818 /* Finds uses of the induction variables that are interesting. */
1821 find_interesting_uses (struct ivopts_data *data)
1824 gimple_stmt_iterator bsi;
1825 basic_block *body = get_loop_body (data->current_loop);
1827 struct version_info *info;
1830 if (dump_file && (dump_flags & TDF_DETAILS))
1831 fprintf (dump_file, "Uses:\n\n");
1833 for (i = 0; i < data->current_loop->num_nodes; i++)
1838 FOR_EACH_EDGE (e, ei, bb->succs)
1839 if (e->dest != EXIT_BLOCK_PTR
1840 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1841 find_interesting_uses_outside (data, e);
1843 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1844 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1845 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1846 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1849 if (dump_file && (dump_flags & TDF_DETAILS))
1853 fprintf (dump_file, "\n");
1855 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1857 info = ver_info (data, i);
1860 fprintf (dump_file, " ");
1861 print_generic_expr (dump_file, info->name, TDF_SLIM);
1862 fprintf (dump_file, " is invariant (%d)%s\n",
1863 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1867 fprintf (dump_file, "\n");
1873 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1874 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1875 we are at the top-level of the processed address. */
1878 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1879 unsigned HOST_WIDE_INT *offset)
1881 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1882 enum tree_code code;
1883 tree type, orig_type = TREE_TYPE (expr);
1884 unsigned HOST_WIDE_INT off0, off1, st;
1885 tree orig_expr = expr;
1889 type = TREE_TYPE (expr);
1890 code = TREE_CODE (expr);
1896 if (!cst_and_fits_in_hwi (expr)
1897 || integer_zerop (expr))
1900 *offset = int_cst_value (expr);
1901 return build_int_cst (orig_type, 0);
1903 case POINTER_PLUS_EXPR:
1906 op0 = TREE_OPERAND (expr, 0);
1907 op1 = TREE_OPERAND (expr, 1);
1909 op0 = strip_offset_1 (op0, false, false, &off0);
1910 op1 = strip_offset_1 (op1, false, false, &off1);
1912 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1913 if (op0 == TREE_OPERAND (expr, 0)
1914 && op1 == TREE_OPERAND (expr, 1))
1917 if (integer_zerop (op1))
1919 else if (integer_zerop (op0))
1921 if (code == MINUS_EXPR)
1922 expr = fold_build1 (NEGATE_EXPR, type, op1);
1927 expr = fold_build2 (code, type, op0, op1);
1929 return fold_convert (orig_type, expr);
1932 case ARRAY_RANGE_REF:
1936 step = array_ref_element_size (expr);
1937 if (!cst_and_fits_in_hwi (step))
1940 st = int_cst_value (step);
1941 op1 = TREE_OPERAND (expr, 1);
1942 op1 = strip_offset_1 (op1, false, false, &off1);
1943 *offset = off1 * st;
1946 && integer_zerop (op1))
1948 /* Strip the component reference completely. */
1949 op0 = TREE_OPERAND (expr, 0);
1950 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1960 tmp = component_ref_field_offset (expr);
1962 && cst_and_fits_in_hwi (tmp))
1964 /* Strip the component reference completely. */
1965 op0 = TREE_OPERAND (expr, 0);
1966 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1967 *offset = off0 + int_cst_value (tmp);
1973 op0 = TREE_OPERAND (expr, 0);
1974 op0 = strip_offset_1 (op0, true, true, &off0);
1977 if (op0 == TREE_OPERAND (expr, 0))
1980 expr = build_fold_addr_expr (op0);
1981 return fold_convert (orig_type, expr);
1984 inside_addr = false;
1991 /* Default handling of expressions for that we want to recurse into
1992 the first operand. */
1993 op0 = TREE_OPERAND (expr, 0);
1994 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1997 if (op0 == TREE_OPERAND (expr, 0)
1998 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2001 expr = copy_node (expr);
2002 TREE_OPERAND (expr, 0) = op0;
2004 TREE_OPERAND (expr, 1) = op1;
2006 /* Inside address, we might strip the top level component references,
2007 thus changing type of the expression. Handling of ADDR_EXPR
2009 expr = fold_convert (orig_type, expr);
2014 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2017 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2019 return strip_offset_1 (expr, false, false, offset);
2022 /* Returns variant of TYPE that can be used as base for different uses.
2023 We return unsigned type with the same precision, which avoids problems
2027 generic_type_for (tree type)
2029 if (POINTER_TYPE_P (type))
2030 return unsigned_type_for (type);
2032 if (TYPE_UNSIGNED (type))
2035 return unsigned_type_for (type);
2038 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2039 the bitmap to that we should store it. */
2041 static struct ivopts_data *fd_ivopts_data;
2043 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2045 bitmap *depends_on = (bitmap *) data;
2046 struct version_info *info;
2048 if (TREE_CODE (*expr_p) != SSA_NAME)
2050 info = name_info (fd_ivopts_data, *expr_p);
2052 if (!info->inv_id || info->has_nonlin_use)
2056 *depends_on = BITMAP_ALLOC (NULL);
2057 bitmap_set_bit (*depends_on, info->inv_id);
2062 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2063 position to POS. If USE is not NULL, the candidate is set as related to
2064 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2065 replacement of the final value of the iv by a direct computation. */
2067 static struct iv_cand *
2068 add_candidate_1 (struct ivopts_data *data,
2069 tree base, tree step, bool important, enum iv_position pos,
2070 struct iv_use *use, gimple incremented_at)
2073 struct iv_cand *cand = NULL;
2074 tree type, orig_type;
2078 orig_type = TREE_TYPE (base);
2079 type = generic_type_for (orig_type);
2080 if (type != orig_type)
2082 base = fold_convert (type, base);
2083 step = fold_convert (type, step);
2087 for (i = 0; i < n_iv_cands (data); i++)
2089 cand = iv_cand (data, i);
2091 if (cand->pos != pos)
2094 if (cand->incremented_at != incremented_at)
2108 if (operand_equal_p (base, cand->iv->base, 0)
2109 && operand_equal_p (step, cand->iv->step, 0))
2113 if (i == n_iv_cands (data))
2115 cand = XCNEW (struct iv_cand);
2121 cand->iv = alloc_iv (base, step);
2124 if (pos != IP_ORIGINAL && cand->iv)
2126 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2127 cand->var_after = cand->var_before;
2129 cand->important = important;
2130 cand->incremented_at = incremented_at;
2131 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2134 && TREE_CODE (step) != INTEGER_CST)
2136 fd_ivopts_data = data;
2137 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2140 if (dump_file && (dump_flags & TDF_DETAILS))
2141 dump_cand (dump_file, cand);
2144 if (important && !cand->important)
2146 cand->important = true;
2147 if (dump_file && (dump_flags & TDF_DETAILS))
2148 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2153 bitmap_set_bit (use->related_cands, i);
2154 if (dump_file && (dump_flags & TDF_DETAILS))
2155 fprintf (dump_file, "Candidate %d is related to use %d\n",
2162 /* Returns true if incrementing the induction variable at the end of the LOOP
2165 The purpose is to avoid splitting latch edge with a biv increment, thus
2166 creating a jump, possibly confusing other optimization passes and leaving
2167 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2168 is not available (so we do not have a better alternative), or if the latch
2169 edge is already nonempty. */
2172 allow_ip_end_pos_p (struct loop *loop)
2174 if (!ip_normal_pos (loop))
2177 if (!empty_block_p (ip_end_pos (loop)))
2183 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2184 position to POS. If USE is not NULL, the candidate is set as related to
2185 it. The candidate computation is scheduled on all available positions. */
2188 add_candidate (struct ivopts_data *data,
2189 tree base, tree step, bool important, struct iv_use *use)
2191 if (ip_normal_pos (data->current_loop))
2192 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2193 if (ip_end_pos (data->current_loop)
2194 && allow_ip_end_pos_p (data->current_loop))
2195 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2198 /* Add a standard "0 + 1 * iteration" iv candidate for a
2199 type with SIZE bits. */
2202 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2205 tree type = lang_hooks.types.type_for_size (size, true);
2206 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2210 /* Adds standard iv candidates. */
2213 add_standard_iv_candidates (struct ivopts_data *data)
2215 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2217 /* The same for a double-integer type if it is still fast enough. */
2218 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2219 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2223 /* Adds candidates bases on the old induction variable IV. */
2226 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2230 struct iv_cand *cand;
2232 add_candidate (data, iv->base, iv->step, true, NULL);
2234 /* The same, but with initial value zero. */
2235 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2236 add_candidate (data, size_int (0), iv->step, true, NULL);
2238 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2239 iv->step, true, NULL);
2241 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2242 if (gimple_code (phi) == GIMPLE_PHI)
2244 /* Additionally record the possibility of leaving the original iv
2246 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2247 cand = add_candidate_1 (data,
2248 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2249 SSA_NAME_DEF_STMT (def));
2250 cand->var_before = iv->ssa_name;
2251 cand->var_after = def;
2255 /* Adds candidates based on the old induction variables. */
2258 add_old_ivs_candidates (struct ivopts_data *data)
2264 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2266 iv = ver_info (data, i)->iv;
2267 if (iv && iv->biv_p && !integer_zerop (iv->step))
2268 add_old_iv_candidates (data, iv);
2272 /* Adds candidates based on the value of the induction variable IV and USE. */
2275 add_iv_value_candidates (struct ivopts_data *data,
2276 struct iv *iv, struct iv_use *use)
2278 unsigned HOST_WIDE_INT offset;
2282 add_candidate (data, iv->base, iv->step, false, use);
2284 /* The same, but with initial value zero. Make such variable important,
2285 since it is generic enough so that possibly many uses may be based
2287 basetype = TREE_TYPE (iv->base);
2288 if (POINTER_TYPE_P (basetype))
2289 basetype = sizetype;
2290 add_candidate (data, build_int_cst (basetype, 0),
2291 iv->step, true, use);
2293 /* Third, try removing the constant offset. Make sure to even
2294 add a candidate for &a[0] vs. (T *)&a. */
2295 base = strip_offset (iv->base, &offset);
2297 || base != iv->base)
2298 add_candidate (data, base, iv->step, false, use);
2301 /* Adds candidates based on the uses. */
2304 add_derived_ivs_candidates (struct ivopts_data *data)
2308 for (i = 0; i < n_iv_uses (data); i++)
2310 struct iv_use *use = iv_use (data, i);
2317 case USE_NONLINEAR_EXPR:
2320 /* Just add the ivs based on the value of the iv used here. */
2321 add_iv_value_candidates (data, use->iv, use);
2330 /* Record important candidates and add them to related_cands bitmaps
2334 record_important_candidates (struct ivopts_data *data)
2339 for (i = 0; i < n_iv_cands (data); i++)
2341 struct iv_cand *cand = iv_cand (data, i);
2343 if (cand->important)
2344 bitmap_set_bit (data->important_candidates, i);
2347 data->consider_all_candidates = (n_iv_cands (data)
2348 <= CONSIDER_ALL_CANDIDATES_BOUND);
2350 if (data->consider_all_candidates)
2352 /* We will not need "related_cands" bitmaps in this case,
2353 so release them to decrease peak memory consumption. */
2354 for (i = 0; i < n_iv_uses (data); i++)
2356 use = iv_use (data, i);
2357 BITMAP_FREE (use->related_cands);
2362 /* Add important candidates to the related_cands bitmaps. */
2363 for (i = 0; i < n_iv_uses (data); i++)
2364 bitmap_ior_into (iv_use (data, i)->related_cands,
2365 data->important_candidates);
2369 /* Finds the candidates for the induction variables. */
2372 find_iv_candidates (struct ivopts_data *data)
2374 /* Add commonly used ivs. */
2375 add_standard_iv_candidates (data);
2377 /* Add old induction variables. */
2378 add_old_ivs_candidates (data);
2380 /* Add induction variables derived from uses. */
2381 add_derived_ivs_candidates (data);
2383 /* Record the important candidates. */
2384 record_important_candidates (data);
2387 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2388 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2389 we allocate a simple list to every use. */
2392 alloc_use_cost_map (struct ivopts_data *data)
2394 unsigned i, size, s, j;
2396 for (i = 0; i < n_iv_uses (data); i++)
2398 struct iv_use *use = iv_use (data, i);
2401 if (data->consider_all_candidates)
2402 size = n_iv_cands (data);
2406 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2411 /* Round up to the power of two, so that moduling by it is fast. */
2412 for (size = 1; size < s; size <<= 1)
2416 use->n_map_members = size;
2417 use->cost_map = XCNEWVEC (struct cost_pair, size);
2421 /* Returns description of computation cost of expression whose runtime
2422 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2425 new_cost (unsigned runtime, unsigned complexity)
2429 cost.cost = runtime;
2430 cost.complexity = complexity;
2435 /* Adds costs COST1 and COST2. */
2438 add_costs (comp_cost cost1, comp_cost cost2)
2440 cost1.cost += cost2.cost;
2441 cost1.complexity += cost2.complexity;
2445 /* Subtracts costs COST1 and COST2. */
2448 sub_costs (comp_cost cost1, comp_cost cost2)
2450 cost1.cost -= cost2.cost;
2451 cost1.complexity -= cost2.complexity;
2456 /* Returns a negative number if COST1 < COST2, a positive number if
2457 COST1 > COST2, and 0 if COST1 = COST2. */
2460 compare_costs (comp_cost cost1, comp_cost cost2)
2462 if (cost1.cost == cost2.cost)
2463 return cost1.complexity - cost2.complexity;
2465 return cost1.cost - cost2.cost;
2468 /* Returns true if COST is infinite. */
2471 infinite_cost_p (comp_cost cost)
2473 return cost.cost == INFTY;
2476 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2477 on invariants DEPENDS_ON and that the value used in expressing it
2481 set_use_iv_cost (struct ivopts_data *data,
2482 struct iv_use *use, struct iv_cand *cand,
2483 comp_cost cost, bitmap depends_on, tree value)
2487 if (infinite_cost_p (cost))
2489 BITMAP_FREE (depends_on);
2493 if (data->consider_all_candidates)
2495 use->cost_map[cand->id].cand = cand;
2496 use->cost_map[cand->id].cost = cost;
2497 use->cost_map[cand->id].depends_on = depends_on;
2498 use->cost_map[cand->id].value = value;
2502 /* n_map_members is a power of two, so this computes modulo. */
2503 s = cand->id & (use->n_map_members - 1);
2504 for (i = s; i < use->n_map_members; i++)
2505 if (!use->cost_map[i].cand)
2507 for (i = 0; i < s; i++)
2508 if (!use->cost_map[i].cand)
2514 use->cost_map[i].cand = cand;
2515 use->cost_map[i].cost = cost;
2516 use->cost_map[i].depends_on = depends_on;
2517 use->cost_map[i].value = value;
2520 /* Gets cost of (USE, CANDIDATE) pair. */
2522 static struct cost_pair *
2523 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2524 struct iv_cand *cand)
2527 struct cost_pair *ret;
2532 if (data->consider_all_candidates)
2534 ret = use->cost_map + cand->id;
2541 /* n_map_members is a power of two, so this computes modulo. */
2542 s = cand->id & (use->n_map_members - 1);
2543 for (i = s; i < use->n_map_members; i++)
2544 if (use->cost_map[i].cand == cand)
2545 return use->cost_map + i;
2547 for (i = 0; i < s; i++)
2548 if (use->cost_map[i].cand == cand)
2549 return use->cost_map + i;
2554 /* Returns estimate on cost of computing SEQ. */
2557 seq_cost (rtx seq, bool speed)
2562 for (; seq; seq = NEXT_INSN (seq))
2564 set = single_set (seq);
2566 cost += rtx_cost (set, SET,speed);
2574 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2576 produce_memory_decl_rtl (tree obj, int *regno)
2581 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2583 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2584 x = gen_rtx_SYMBOL_REF (Pmode, name);
2585 SET_SYMBOL_REF_DECL (x, obj);
2586 x = gen_rtx_MEM (DECL_MODE (obj), x);
2587 targetm.encode_section_info (obj, x, true);
2591 x = gen_raw_REG (Pmode, (*regno)++);
2592 x = gen_rtx_MEM (DECL_MODE (obj), x);
2598 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2599 walk_tree. DATA contains the actual fake register number. */
2602 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2604 tree obj = NULL_TREE;
2606 int *regno = (int *) data;
2608 switch (TREE_CODE (*expr_p))
2611 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2612 handled_component_p (*expr_p);
2613 expr_p = &TREE_OPERAND (*expr_p, 0))
2616 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2617 x = produce_memory_decl_rtl (obj, regno);
2622 obj = SSA_NAME_VAR (*expr_p);
2623 if (!DECL_RTL_SET_P (obj))
2624 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2633 if (DECL_RTL_SET_P (obj))
2636 if (DECL_MODE (obj) == BLKmode)
2637 x = produce_memory_decl_rtl (obj, regno);
2639 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2649 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2650 SET_DECL_RTL (obj, x);
2656 /* Determines cost of the computation of EXPR. */
2659 computation_cost (tree expr, bool speed)
2662 tree type = TREE_TYPE (expr);
2664 /* Avoid using hard regs in ways which may be unsupported. */
2665 int regno = LAST_VIRTUAL_REGISTER + 1;
2666 enum function_frequency real_frequency = cfun->function_frequency;
2668 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2669 crtl->maybe_hot_insn_p = speed;
2670 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2672 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2675 default_rtl_profile ();
2676 cfun->function_frequency = real_frequency;
2678 cost = seq_cost (seq, speed);
2680 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2685 /* Returns variable containing the value of candidate CAND at statement AT. */
2688 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2690 if (stmt_after_increment (loop, cand, stmt))
2691 return cand->var_after;
2693 return cand->var_before;
2696 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2697 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2700 tree_int_cst_sign_bit (const_tree t)
2702 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2703 unsigned HOST_WIDE_INT w;
2705 if (bitno < HOST_BITS_PER_WIDE_INT)
2706 w = TREE_INT_CST_LOW (t);
2709 w = TREE_INT_CST_HIGH (t);
2710 bitno -= HOST_BITS_PER_WIDE_INT;
2713 return (w >> bitno) & 1;
2716 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2717 same precision that is at least as wide as the precision of TYPE, stores
2718 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2722 determine_common_wider_type (tree *a, tree *b)
2724 tree wider_type = NULL;
2726 tree atype = TREE_TYPE (*a);
2728 if (CONVERT_EXPR_P (*a))
2730 suba = TREE_OPERAND (*a, 0);
2731 wider_type = TREE_TYPE (suba);
2732 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2738 if (CONVERT_EXPR_P (*b))
2740 subb = TREE_OPERAND (*b, 0);
2741 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2752 /* Determines the expression by that USE is expressed from induction variable
2753 CAND at statement AT in LOOP. The expression is stored in a decomposed
2754 form into AFF. Returns false if USE cannot be expressed using CAND. */
2757 get_computation_aff (struct loop *loop,
2758 struct iv_use *use, struct iv_cand *cand, gimple at,
2759 struct affine_tree_combination *aff)
2761 tree ubase = use->iv->base;
2762 tree ustep = use->iv->step;
2763 tree cbase = cand->iv->base;
2764 tree cstep = cand->iv->step, cstep_common;
2765 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2766 tree common_type, var;
2768 aff_tree cbase_aff, var_aff;
2771 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2773 /* We do not have a precision to express the values of use. */
2777 var = var_at_stmt (loop, cand, at);
2778 uutype = unsigned_type_for (utype);
2780 /* If the conversion is not noop, perform it. */
2781 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2783 cstep = fold_convert (uutype, cstep);
2784 cbase = fold_convert (uutype, cbase);
2785 var = fold_convert (uutype, var);
2788 if (!constant_multiple_of (ustep, cstep, &rat))
2791 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2792 type, we achieve better folding by computing their difference in this
2793 wider type, and cast the result to UUTYPE. We do not need to worry about
2794 overflows, as all the arithmetics will in the end be performed in UUTYPE
2796 common_type = determine_common_wider_type (&ubase, &cbase);
2798 /* use = ubase - ratio * cbase + ratio * var. */
2799 tree_to_aff_combination (ubase, common_type, aff);
2800 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2801 tree_to_aff_combination (var, uutype, &var_aff);
2803 /* We need to shift the value if we are after the increment. */
2804 if (stmt_after_increment (loop, cand, at))
2808 if (common_type != uutype)
2809 cstep_common = fold_convert (common_type, cstep);
2811 cstep_common = cstep;
2813 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2814 aff_combination_add (&cbase_aff, &cstep_aff);
2817 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2818 aff_combination_add (aff, &cbase_aff);
2819 if (common_type != uutype)
2820 aff_combination_convert (aff, uutype);
2822 aff_combination_scale (&var_aff, rat);
2823 aff_combination_add (aff, &var_aff);
2828 /* Determines the expression by that USE is expressed from induction variable
2829 CAND at statement AT in LOOP. The computation is unshared. */
2832 get_computation_at (struct loop *loop,
2833 struct iv_use *use, struct iv_cand *cand, gimple at)
2836 tree type = TREE_TYPE (use->iv->base);
2838 if (!get_computation_aff (loop, use, cand, at, &aff))
2840 unshare_aff_combination (&aff);
2841 return fold_convert (type, aff_combination_to_tree (&aff));
2844 /* Determines the expression by that USE is expressed from induction variable
2845 CAND in LOOP. The computation is unshared. */
2848 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2850 return get_computation_at (loop, use, cand, use->stmt);
2853 /* Returns cost of addition in MODE. */
2856 add_cost (enum machine_mode mode, bool speed)
2858 static unsigned costs[NUM_MACHINE_MODES];
2866 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2867 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2868 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2873 cost = seq_cost (seq, speed);
2879 if (dump_file && (dump_flags & TDF_DETAILS))
2880 fprintf (dump_file, "Addition in %s costs %d\n",
2881 GET_MODE_NAME (mode), cost);
2885 /* Entry in a hashtable of already known costs for multiplication. */
2888 HOST_WIDE_INT cst; /* The constant to multiply by. */
2889 enum machine_mode mode; /* In mode. */
2890 unsigned cost; /* The cost. */
2893 /* Counts hash value for the ENTRY. */
2896 mbc_entry_hash (const void *entry)
2898 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2900 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2903 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2906 mbc_entry_eq (const void *entry1, const void *entry2)
2908 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2909 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2911 return (e1->mode == e2->mode
2912 && e1->cst == e2->cst);
2915 /* Returns cost of multiplication by constant CST in MODE. */
2918 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2920 static htab_t costs;
2921 struct mbc_entry **cached, act;
2926 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2930 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2932 return (*cached)->cost;
2934 *cached = XNEW (struct mbc_entry);
2935 (*cached)->mode = mode;
2936 (*cached)->cst = cst;
2939 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2940 gen_int_mode (cst, mode), NULL_RTX, 0);
2944 cost = seq_cost (seq, speed);
2946 if (dump_file && (dump_flags & TDF_DETAILS))
2947 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2948 (int) cst, GET_MODE_NAME (mode), cost);
2950 (*cached)->cost = cost;
2955 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2956 validity for a memory reference accessing memory of mode MODE. */
2959 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2961 #define MAX_RATIO 128
2962 static sbitmap valid_mult[MAX_MACHINE_MODE];
2964 if (!valid_mult[mode])
2966 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2970 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2971 sbitmap_zero (valid_mult[mode]);
2972 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2973 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2975 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2976 if (memory_address_p (mode, addr))
2977 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2980 if (dump_file && (dump_flags & TDF_DETAILS))
2982 fprintf (dump_file, " allowed multipliers:");
2983 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2984 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2985 fprintf (dump_file, " %d", (int) i);
2986 fprintf (dump_file, "\n");
2987 fprintf (dump_file, "\n");
2991 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2994 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2997 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2998 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2999 variable is omitted. Compute the cost for a memory reference that accesses
3000 a memory location of mode MEM_MODE.
3002 TODO -- there must be some better way. This all is quite crude. */
3005 get_address_cost (bool symbol_present, bool var_present,
3006 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3007 enum machine_mode mem_mode,
3010 static bool initialized[MAX_MACHINE_MODE];
3011 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3012 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3013 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3014 unsigned cost, acost, complexity;
3015 bool offset_p, ratio_p;
3016 HOST_WIDE_INT s_offset;
3017 unsigned HOST_WIDE_INT mask;
3020 if (!initialized[mem_mode])
3023 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3024 int old_cse_not_expected;
3025 unsigned sym_p, var_p, off_p, rat_p, add_c;
3026 rtx seq, addr, base;
3029 initialized[mem_mode] = true;
3031 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3033 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3034 for (i = start; i <= 1 << 20; i <<= 1)
3036 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3037 if (!memory_address_p (mem_mode, addr))
3040 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3041 off[mem_mode] = max_offset[mem_mode];
3043 for (i = start; i <= 1 << 20; i <<= 1)
3045 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3046 if (!memory_address_p (mem_mode, addr))
3049 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3051 if (dump_file && (dump_flags & TDF_DETAILS))
3053 fprintf (dump_file, "get_address_cost:\n");
3054 fprintf (dump_file, " min offset %s %d\n",
3055 GET_MODE_NAME (mem_mode),
3056 (int) min_offset[mem_mode]);
3057 fprintf (dump_file, " max offset %s %d\n",
3058 GET_MODE_NAME (mem_mode),
3059 (int) max_offset[mem_mode]);
3063 for (i = 2; i <= MAX_RATIO; i++)
3064 if (multiplier_allowed_in_address_p (i, mem_mode))
3070 /* Compute the cost of various addressing modes. */
3072 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3073 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3075 for (i = 0; i < 16; i++)
3078 var_p = (i >> 1) & 1;
3079 off_p = (i >> 2) & 1;
3080 rat_p = (i >> 3) & 1;
3084 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3085 gen_int_mode (rat[mem_mode], Pmode));
3088 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3092 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3093 /* ??? We can run into trouble with some backends by presenting
3094 it with symbols which haven't been properly passed through
3095 targetm.encode_section_info. By setting the local bit, we
3096 enhance the probability of things working. */
3097 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3100 base = gen_rtx_fmt_e (CONST, Pmode,
3101 gen_rtx_fmt_ee (PLUS, Pmode,
3103 gen_int_mode (off[mem_mode],
3107 base = gen_int_mode (off[mem_mode], Pmode);
3112 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3115 /* To avoid splitting addressing modes, pretend that no cse will
3117 old_cse_not_expected = cse_not_expected;
3118 cse_not_expected = true;
3119 addr = memory_address (mem_mode, addr);
3120 cse_not_expected = old_cse_not_expected;
3124 acost = seq_cost (seq, speed);
3125 acost += address_cost (addr, mem_mode, speed);
3129 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3132 /* On some targets, it is quite expensive to load symbol to a register,
3133 which makes addresses that contain symbols look much more expensive.
3134 However, the symbol will have to be loaded in any case before the
3135 loop (and quite likely we have it in register already), so it does not
3136 make much sense to penalize them too heavily. So make some final
3137 tweaks for the SYMBOL_PRESENT modes:
3139 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3140 var is cheaper, use this mode with small penalty.
3141 If VAR_PRESENT is true, try whether the mode with
3142 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3143 if this is the case, use it. */
3144 add_c = add_cost (Pmode, speed);
3145 for (i = 0; i < 8; i++)
3148 off_p = (i >> 1) & 1;
3149 rat_p = (i >> 2) & 1;
3151 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3155 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3156 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3159 if (dump_file && (dump_flags & TDF_DETAILS))
3161 fprintf (dump_file, "Address costs:\n");
3163 for (i = 0; i < 16; i++)
3166 var_p = (i >> 1) & 1;
3167 off_p = (i >> 2) & 1;
3168 rat_p = (i >> 3) & 1;
3170 fprintf (dump_file, " ");
3172 fprintf (dump_file, "sym + ");
3174 fprintf (dump_file, "var + ");
3176 fprintf (dump_file, "cst + ");
3178 fprintf (dump_file, "rat * ");
3180 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3181 fprintf (dump_file, "index costs %d\n", acost);
3183 fprintf (dump_file, "\n");
3187 bits = GET_MODE_BITSIZE (Pmode);
3188 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3190 if ((offset >> (bits - 1) & 1))
3195 offset_p = (s_offset != 0
3196 && min_offset[mem_mode] <= s_offset
3197 && s_offset <= max_offset[mem_mode]);
3198 ratio_p = (ratio != 1
3199 && multiplier_allowed_in_address_p (ratio, mem_mode));
3201 if (ratio != 1 && !ratio_p)
3202 cost += multiply_by_cost (ratio, Pmode, speed);
3204 if (s_offset && !offset_p && !symbol_present)
3205 cost += add_cost (Pmode, speed);
3207 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3208 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3209 return new_cost (cost + acost, complexity);
3212 /* Estimates cost of forcing expression EXPR into a variable. */
3215 force_expr_to_var_cost (tree expr, bool speed)
3217 static bool costs_initialized = false;
3218 static unsigned integer_cost [2];
3219 static unsigned symbol_cost [2];
3220 static unsigned address_cost [2];
3222 comp_cost cost0, cost1, cost;
3223 enum machine_mode mode;
3225 if (!costs_initialized)
3227 tree type = build_pointer_type (integer_type_node);
3232 var = create_tmp_var_raw (integer_type_node, "test_var");
3233 TREE_STATIC (var) = 1;
3234 x = produce_memory_decl_rtl (var, NULL);
3235 SET_DECL_RTL (var, x);
3237 addr = build1 (ADDR_EXPR, type, var);
3240 for (i = 0; i < 2; i++)
3242 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3245 symbol_cost[i] = computation_cost (addr, i) + 1;
3248 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3250 build_int_cst (sizetype, 2000)), i) + 1;
3251 if (dump_file && (dump_flags & TDF_DETAILS))
3253 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3254 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3255 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3256 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3257 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3258 fprintf (dump_file, "\n");
3262 costs_initialized = true;
3267 if (SSA_VAR_P (expr))
3270 if (is_gimple_min_invariant (expr))
3272 if (TREE_CODE (expr) == INTEGER_CST)
3273 return new_cost (integer_cost [speed], 0);
3275 if (TREE_CODE (expr) == ADDR_EXPR)
3277 tree obj = TREE_OPERAND (expr, 0);
3279 if (TREE_CODE (obj) == VAR_DECL
3280 || TREE_CODE (obj) == PARM_DECL
3281 || TREE_CODE (obj) == RESULT_DECL)
3282 return new_cost (symbol_cost [speed], 0);
3285 return new_cost (address_cost [speed], 0);
3288 switch (TREE_CODE (expr))
3290 case POINTER_PLUS_EXPR:
3294 op0 = TREE_OPERAND (expr, 0);
3295 op1 = TREE_OPERAND (expr, 1);
3299 if (is_gimple_val (op0))
3302 cost0 = force_expr_to_var_cost (op0, speed);
3304 if (is_gimple_val (op1))
3307 cost1 = force_expr_to_var_cost (op1, speed);
3312 /* Just an arbitrary value, FIXME. */
3313 return new_cost (target_spill_cost[speed], 0);
3316 mode = TYPE_MODE (TREE_TYPE (expr));
3317 switch (TREE_CODE (expr))
3319 case POINTER_PLUS_EXPR:
3322 cost = new_cost (add_cost (mode, speed), 0);
3326 if (cst_and_fits_in_hwi (op0))
3327 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3328 else if (cst_and_fits_in_hwi (op1))
3329 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3331 return new_cost (target_spill_cost [speed], 0);
3338 cost = add_costs (cost, cost0);
3339 cost = add_costs (cost, cost1);
3341 /* Bound the cost by target_spill_cost. The parts of complicated
3342 computations often are either loop invariant or at least can
3343 be shared between several iv uses, so letting this grow without
3344 limits would not give reasonable results. */
3345 if (cost.cost > target_spill_cost [speed])
3346 cost.cost = target_spill_cost [speed];
3351 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3352 invariants the computation depends on. */
3355 force_var_cost (struct ivopts_data *data,
3356 tree expr, bitmap *depends_on)
3360 fd_ivopts_data = data;
3361 walk_tree (&expr, find_depends, depends_on, NULL);
3364 return force_expr_to_var_cost (expr, data->speed);
3367 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3368 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3369 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3370 invariants the computation depends on. */
3373 split_address_cost (struct ivopts_data *data,
3374 tree addr, bool *symbol_present, bool *var_present,
3375 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3378 HOST_WIDE_INT bitsize;
3379 HOST_WIDE_INT bitpos;
3381 enum machine_mode mode;
3382 int unsignedp, volatilep;
3384 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3385 &unsignedp, &volatilep, false);
3388 || bitpos % BITS_PER_UNIT != 0
3389 || TREE_CODE (core) != VAR_DECL)
3391 *symbol_present = false;
3392 *var_present = true;
3393 fd_ivopts_data = data;
3394 walk_tree (&addr, find_depends, depends_on, NULL);
3395 return new_cost (target_spill_cost[data->speed], 0);
3398 *offset += bitpos / BITS_PER_UNIT;
3399 if (TREE_STATIC (core)
3400 || DECL_EXTERNAL (core))
3402 *symbol_present = true;
3403 *var_present = false;
3407 *symbol_present = false;
3408 *var_present = true;
3412 /* Estimates cost of expressing difference of addresses E1 - E2 as
3413 var + symbol + offset. The value of offset is added to OFFSET,
3414 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3415 part is missing. DEPENDS_ON is a set of the invariants the computation
3419 ptr_difference_cost (struct ivopts_data *data,
3420 tree e1, tree e2, bool *symbol_present, bool *var_present,
3421 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3423 HOST_WIDE_INT diff = 0;
3425 bool speed = optimize_loop_for_speed_p (data->current_loop);
3427 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3429 if (ptr_difference_const (e1, e2, &diff))
3432 *symbol_present = false;
3433 *var_present = false;
3437 if (integer_zerop (e2))
3438 return split_address_cost (data, TREE_OPERAND (e1, 0),
3439 symbol_present, var_present, offset, depends_on);
3441 *symbol_present = false;
3442 *var_present = true;
3444 cost = force_var_cost (data, e1, depends_on);
3445 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3446 cost.cost += add_cost (Pmode, speed);
3451 /* Estimates cost of expressing difference E1 - E2 as
3452 var + symbol + offset. The value of offset is added to OFFSET,
3453 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3454 part is missing. DEPENDS_ON is a set of the invariants the computation
3458 difference_cost (struct ivopts_data *data,
3459 tree e1, tree e2, bool *symbol_present, bool *var_present,
3460 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3463 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3464 unsigned HOST_WIDE_INT off1, off2;
3466 e1 = strip_offset (e1, &off1);
3467 e2 = strip_offset (e2, &off2);
3468 *offset += off1 - off2;
3473 if (TREE_CODE (e1) == ADDR_EXPR)
3474 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3476 *symbol_present = false;
3478 if (operand_equal_p (e1, e2, 0))
3480 *var_present = false;
3483 *var_present = true;
3484 if (integer_zerop (e2))
3485 return force_var_cost (data, e1, depends_on);
3487 if (integer_zerop (e1))
3489 cost = force_var_cost (data, e2, depends_on);
3490 cost.cost += multiply_by_cost (-1, mode, data->speed);
3495 cost = force_var_cost (data, e1, depends_on);
3496 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3497 cost.cost += add_cost (mode, data->speed);
3502 /* Determines the cost of the computation by that USE is expressed
3503 from induction variable CAND. If ADDRESS_P is true, we just need
3504 to create an address from it, otherwise we want to get it into
3505 register. A set of invariants we depend on is stored in
3506 DEPENDS_ON. AT is the statement at that the value is computed. */
3509 get_computation_cost_at (struct ivopts_data *data,
3510 struct iv_use *use, struct iv_cand *cand,
3511 bool address_p, bitmap *depends_on, gimple at)
3513 tree ubase = use->iv->base, ustep = use->iv->step;
3515 tree utype = TREE_TYPE (ubase), ctype;
3516 unsigned HOST_WIDE_INT cstepi, offset = 0;
3517 HOST_WIDE_INT ratio, aratio;
3518 bool var_present, symbol_present;
3522 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3526 /* Only consider real candidates. */
3528 return infinite_cost;
3530 cbase = cand->iv->base;
3531 cstep = cand->iv->step;
3532 ctype = TREE_TYPE (cbase);
3534 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3536 /* We do not have a precision to express the values of use. */
3537 return infinite_cost;
3542 /* Do not try to express address of an object with computation based
3543 on address of a different object. This may cause problems in rtl
3544 level alias analysis (that does not expect this to be happening,
3545 as this is illegal in C), and would be unlikely to be useful
3547 if (use->iv->base_object
3548 && cand->iv->base_object
3549 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3550 return infinite_cost;
3553 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3555 /* TODO -- add direct handling of this case. */
3559 /* CSTEPI is removed from the offset in case statement is after the
3560 increment. If the step is not constant, we use zero instead.
3561 This is a bit imprecise (there is the extra addition), but
3562 redundancy elimination is likely to transform the code so that
3563 it uses value of the variable before increment anyway,
3564 so it is not that much unrealistic. */
3565 if (cst_and_fits_in_hwi (cstep))
3566 cstepi = int_cst_value (cstep);
3570 if (!constant_multiple_of (ustep, cstep, &rat))
3571 return infinite_cost;
3573 if (double_int_fits_in_shwi_p (rat))
3574 ratio = double_int_to_shwi (rat);
3576 return infinite_cost;
3578 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3579 or ratio == 1, it is better to handle this like
3581 ubase - ratio * cbase + ratio * var
3583 (also holds in the case ratio == -1, TODO. */
3585 if (cst_and_fits_in_hwi (cbase))
3587 offset = - ratio * int_cst_value (cbase);
3588 cost = difference_cost (data,
3589 ubase, build_int_cst (utype, 0),
3590 &symbol_present, &var_present, &offset,
3593 else if (ratio == 1)
3595 cost = difference_cost (data,
3597 &symbol_present, &var_present, &offset,
3602 cost = force_var_cost (data, cbase, depends_on);
3603 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3604 cost = add_costs (cost,
3605 difference_cost (data,
3606 ubase, build_int_cst (utype, 0),
3607 &symbol_present, &var_present,
3608 &offset, depends_on));
3611 /* If we are after the increment, the value of the candidate is higher by
3613 if (stmt_after_increment (data->current_loop, cand, at))
3614 offset -= ratio * cstepi;
3616 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3617 (symbol/var/const parts may be omitted). If we are looking for an address,
3618 find the cost of addressing this. */
3620 return add_costs (cost, get_address_cost (symbol_present, var_present,
3622 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
3624 /* Otherwise estimate the costs for computing the expression. */
3625 aratio = ratio > 0 ? ratio : -ratio;
3626 if (!symbol_present && !var_present && !offset)
3629 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3635 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3639 /* Symbol + offset should be compile-time computable. */
3640 && (symbol_present || offset))
3643 /* Having offset does not affect runtime cost in case it is added to
3644 symbol, but it increases complexity. */
3648 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3653 /* Just get the expression, expand it and measure the cost. */
3654 tree comp = get_computation_at (data->current_loop, use, cand, at);
3657 return infinite_cost;
3660 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3662 return new_cost (computation_cost (comp, speed), 0);
3666 /* Determines the cost of the computation by that USE is expressed
3667 from induction variable CAND. If ADDRESS_P is true, we just need
3668 to create an address from it, otherwise we want to get it into
3669 register. A set of invariants we depend on is stored in
3673 get_computation_cost (struct ivopts_data *data,
3674 struct iv_use *use, struct iv_cand *cand,
3675 bool address_p, bitmap *depends_on)
3677 return get_computation_cost_at (data,
3678 use, cand, address_p, depends_on, use->stmt);
3681 /* Determines cost of basing replacement of USE on CAND in a generic
3685 determine_use_iv_cost_generic (struct ivopts_data *data,
3686 struct iv_use *use, struct iv_cand *cand)
3691 /* The simple case first -- if we need to express value of the preserved
3692 original biv, the cost is 0. This also prevents us from counting the
3693 cost of increment twice -- once at this use and once in the cost of
3695 if (cand->pos == IP_ORIGINAL
3696 && cand->incremented_at == use->stmt)
3698 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3702 cost = get_computation_cost (data, use, cand, false, &depends_on);
3703 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3705 return !infinite_cost_p (cost);
3708 /* Determines cost of basing replacement of USE on CAND in an address. */
3711 determine_use_iv_cost_address (struct ivopts_data *data,
3712 struct iv_use *use, struct iv_cand *cand)
3715 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3717 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3719 return !infinite_cost_p (cost);
3722 /* Computes value of candidate CAND at position AT in iteration NITER, and
3723 stores it to VAL. */
3726 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3729 aff_tree step, delta, nit;
3730 struct iv *iv = cand->iv;
3731 tree type = TREE_TYPE (iv->base);
3732 tree steptype = type;
3733 if (POINTER_TYPE_P (type))
3734 steptype = sizetype;
3736 tree_to_aff_combination (iv->step, steptype, &step);
3737 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3738 aff_combination_convert (&nit, steptype);
3739 aff_combination_mult (&nit, &step, &delta);
3740 if (stmt_after_increment (loop, cand, at))
3741 aff_combination_add (&delta, &step);
3743 tree_to_aff_combination (iv->base, type, val);
3744 aff_combination_add (val, &delta);
3747 /* Returns period of induction variable iv. */
3750 iv_period (struct iv *iv)
3752 tree step = iv->step, period, type;
3755 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3757 /* Period of the iv is gcd (step, type range). Since type range is power
3758 of two, it suffices to determine the maximum power of two that divides
3760 pow2div = num_ending_zeros (step);
3761 type = unsigned_type_for (TREE_TYPE (step));
3763 period = build_low_bits_mask (type,
3764 (TYPE_PRECISION (type)
3765 - tree_low_cst (pow2div, 1)));
3770 /* Returns the comparison operator used when eliminating the iv USE. */
3772 static enum tree_code
3773 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3775 struct loop *loop = data->current_loop;
3779 ex_bb = gimple_bb (use->stmt);
3780 exit = EDGE_SUCC (ex_bb, 0);
3781 if (flow_bb_inside_loop_p (loop, exit->dest))
3782 exit = EDGE_SUCC (ex_bb, 1);
3784 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3787 /* Check whether it is possible to express the condition in USE by comparison
3788 of candidate CAND. If so, store the value compared with to BOUND. */
3791 may_eliminate_iv (struct ivopts_data *data,
3792 struct iv_use *use, struct iv_cand *cand, tree *bound)
3797 struct loop *loop = data->current_loop;
3800 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3803 /* For now works only for exits that dominate the loop latch.
3804 TODO: extend to other conditions inside loop body. */
3805 ex_bb = gimple_bb (use->stmt);
3806 if (use->stmt != last_stmt (ex_bb)
3807 || gimple_code (use->stmt) != GIMPLE_COND
3808 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3811 exit = EDGE_SUCC (ex_bb, 0);
3812 if (flow_bb_inside_loop_p (loop, exit->dest))
3813 exit = EDGE_SUCC (ex_bb, 1);
3814 if (flow_bb_inside_loop_p (loop, exit->dest))
3817 nit = niter_for_exit (data, exit);
3821 /* Determine whether we can use the variable to test the exit condition.
3822 This is the case iff the period of the induction variable is greater
3823 than the number of iterations for which the exit condition is true. */
3824 period = iv_period (cand->iv);
3826 /* If the number of iterations is constant, compare against it directly. */
3827 if (TREE_CODE (nit) == INTEGER_CST)
3829 if (!tree_int_cst_lt (nit, period))
3833 /* If not, and if this is the only possible exit of the loop, see whether
3834 we can get a conservative estimate on the number of iterations of the
3835 entire loop and compare against that instead. */
3836 else if (loop_only_exit_p (loop, exit))
3838 double_int period_value, max_niter;
3839 if (!estimated_loop_iterations (loop, true, &max_niter))
3841 period_value = tree_to_double_int (period);
3842 if (double_int_ucmp (max_niter, period_value) >= 0)
3846 /* Otherwise, punt. */
3850 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3852 *bound = aff_combination_to_tree (&bnd);
3853 /* It is unlikely that computing the number of iterations using division
3854 would be more profitable than keeping the original induction variable. */
3855 if (expression_expensive_p (*bound))
3860 /* Determines cost of basing replacement of USE on CAND in a condition. */
3863 determine_use_iv_cost_condition (struct ivopts_data *data,
3864 struct iv_use *use, struct iv_cand *cand)
3866 tree bound = NULL_TREE;
3868 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3869 comp_cost elim_cost, express_cost, cost;
3872 /* Only consider real candidates. */
3875 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3879 /* Try iv elimination. */
3880 if (may_eliminate_iv (data, use, cand, &bound))
3882 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3883 /* The bound is a loop invariant, so it will be only computed
3885 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3888 elim_cost = infinite_cost;
3890 /* Try expressing the original giv. If it is compared with an invariant,
3891 note that we cannot get rid of it. */
3892 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3895 express_cost = get_computation_cost (data, use, cand, false,
3896 &depends_on_express);
3897 fd_ivopts_data = data;
3898 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3900 /* Choose the better approach, preferring the eliminated IV. */
3901 if (compare_costs (elim_cost, express_cost) <= 0)
3904 depends_on = depends_on_elim;
3905 depends_on_elim = NULL;
3909 cost = express_cost;
3910 depends_on = depends_on_express;
3911 depends_on_express = NULL;
3915 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3917 if (depends_on_elim)
3918 BITMAP_FREE (depends_on_elim);
3919 if (depends_on_express)
3920 BITMAP_FREE (depends_on_express);
3922 return !infinite_cost_p (cost);
3925 /* Determines cost of basing replacement of USE on CAND. Returns false
3926 if USE cannot be based on CAND. */
3929 determine_use_iv_cost (struct ivopts_data *data,
3930 struct iv_use *use, struct iv_cand *cand)
3934 case USE_NONLINEAR_EXPR:
3935 return determine_use_iv_cost_generic (data, use, cand);
3938 return determine_use_iv_cost_address (data, use, cand);
3941 return determine_use_iv_cost_condition (data, use, cand);
3948 /* Determines costs of basing the use of the iv on an iv candidate. */
3951 determine_use_iv_costs (struct ivopts_data *data)
3955 struct iv_cand *cand;
3956 bitmap to_clear = BITMAP_ALLOC (NULL);
3958 alloc_use_cost_map (data);
3960 for (i = 0; i < n_iv_uses (data); i++)
3962 use = iv_use (data, i);
3964 if (data->consider_all_candidates)
3966 for (j = 0; j < n_iv_cands (data); j++)
3968 cand = iv_cand (data, j);
3969 determine_use_iv_cost (data, use, cand);
3976 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3978 cand = iv_cand (data, j);
3979 if (!determine_use_iv_cost (data, use, cand))
3980 bitmap_set_bit (to_clear, j);
3983 /* Remove the candidates for that the cost is infinite from
3984 the list of related candidates. */
3985 bitmap_and_compl_into (use->related_cands, to_clear);
3986 bitmap_clear (to_clear);
3990 BITMAP_FREE (to_clear);
3992 if (dump_file && (dump_flags & TDF_DETAILS))
3994 fprintf (dump_file, "Use-candidate costs:\n");
3996 for (i = 0; i < n_iv_uses (data); i++)
3998 use = iv_use (data, i);
4000 fprintf (dump_file, "Use %d:\n", i);
4001 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4002 for (j = 0; j < use->n_map_members; j++)
4004 if (!use->cost_map[j].cand
4005 || infinite_cost_p (use->cost_map[j].cost))
4008 fprintf (dump_file, " %d\t%d\t%d\t",
4009 use->cost_map[j].cand->id,
4010 use->cost_map[j].cost.cost,
4011 use->cost_map[j].cost.complexity);
4012 if (use->cost_map[j].depends_on)
4013 bitmap_print (dump_file,
4014 use->cost_map[j].depends_on, "","");
4015 fprintf (dump_file, "\n");
4018 fprintf (dump_file, "\n");
4020 fprintf (dump_file, "\n");
4024 /* Determines cost of the candidate CAND. */
4027 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4029 comp_cost cost_base;
4030 unsigned cost, cost_step;
4039 /* There are two costs associated with the candidate -- its increment
4040 and its initialization. The second is almost negligible for any loop
4041 that rolls enough, so we take it just very little into account. */
4043 base = cand->iv->base;
4044 cost_base = force_var_cost (data, base, NULL);
4045 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4047 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4049 /* Prefer the original ivs unless we may gain something by replacing it.
4050 The reason is to make debugging simpler; so this is not relevant for
4051 artificial ivs created by other optimization passes. */
4052 if (cand->pos != IP_ORIGINAL
4053 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4056 /* Prefer not to insert statements into latch unless there are some
4057 already (so that we do not create unnecessary jumps). */
4058 if (cand->pos == IP_END
4059 && empty_block_p (ip_end_pos (data->current_loop)))
4065 /* Determines costs of computation of the candidates. */
4068 determine_iv_costs (struct ivopts_data *data)
4072 if (dump_file && (dump_flags & TDF_DETAILS))
4074 fprintf (dump_file, "Candidate costs:\n");
4075 fprintf (dump_file, " cand\tcost\n");
4078 for (i = 0; i < n_iv_cands (data); i++)
4080 struct iv_cand *cand = iv_cand (data, i);
4082 determine_iv_cost (data, cand);
4084 if (dump_file && (dump_flags & TDF_DETAILS))
4085 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4088 if (dump_file && (dump_flags & TDF_DETAILS))
4089 fprintf (dump_file, "\n");
4092 /* Calculates cost for having SIZE induction variables. */
4095 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4097 /* We add size to the cost, so that we prefer eliminating ivs
4099 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4102 /* For each size of the induction variable set determine the penalty. */
4105 determine_set_costs (struct ivopts_data *data)
4109 gimple_stmt_iterator psi;
4111 struct loop *loop = data->current_loop;
4114 /* We use the following model (definitely improvable, especially the
4115 cost function -- TODO):
4117 We estimate the number of registers available (using MD data), name it A.
4119 We estimate the number of registers used by the loop, name it U. This
4120 number is obtained as the number of loop phi nodes (not counting virtual
4121 registers and bivs) + the number of variables from outside of the loop.
4123 We set a reserve R (free regs that are used for temporary computations,
4124 etc.). For now the reserve is a constant 3.
4126 Let I be the number of induction variables.
4128 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4129 make a lot of ivs without a reason).
4130 -- if A - R < U + I <= A, the cost is I * PRES_COST
4131 -- if U + I > A, the cost is I * PRES_COST and
4132 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4134 if (dump_file && (dump_flags & TDF_DETAILS))
4136 fprintf (dump_file, "Global costs:\n");
4137 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4138 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4139 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4143 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4145 phi = gsi_stmt (psi);
4146 op = PHI_RESULT (phi);
4148 if (!is_gimple_reg (op))
4151 if (get_iv (data, op))
4157 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4159 struct version_info *info = ver_info (data, j);
4161 if (info->inv_id && info->has_nonlin_use)
4165 data->regs_used = n;
4166 if (dump_file && (dump_flags & TDF_DETAILS))
4167 fprintf (dump_file, " regs_used %d\n", n);
4169 if (dump_file && (dump_flags & TDF_DETAILS))
4171 fprintf (dump_file, " cost for size:\n");
4172 fprintf (dump_file, " ivs\tcost\n");
4173 for (j = 0; j <= 2 * target_avail_regs; j++)
4174 fprintf (dump_file, " %d\t%d\n", j,
4175 ivopts_global_cost_for_size (data, j));
4176 fprintf (dump_file, "\n");
4180 /* Returns true if A is a cheaper cost pair than B. */
4183 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4193 cmp = compare_costs (a->cost, b->cost);
4200 /* In case the costs are the same, prefer the cheaper candidate. */
4201 if (a->cand->cost < b->cand->cost)
4207 /* Computes the cost field of IVS structure. */
4210 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4212 comp_cost cost = ivs->cand_use_cost;
4213 cost.cost += ivs->cand_cost;
4214 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4219 /* Remove invariants in set INVS to set IVS. */
4222 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4230 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4232 ivs->n_invariant_uses[iid]--;
4233 if (ivs->n_invariant_uses[iid] == 0)
4238 /* Set USE not to be expressed by any candidate in IVS. */
4241 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4244 unsigned uid = use->id, cid;
4245 struct cost_pair *cp;
4247 cp = ivs->cand_for_use[uid];
4253 ivs->cand_for_use[uid] = NULL;
4254 ivs->n_cand_uses[cid]--;
4256 if (ivs->n_cand_uses[cid] == 0)
4258 bitmap_clear_bit (ivs->cands, cid);
4259 /* Do not count the pseudocandidates. */
4263 ivs->cand_cost -= cp->cand->cost;
4265 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4268 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4270 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4271 iv_ca_recount_cost (data, ivs);
4274 /* Add invariants in set INVS to set IVS. */
4277 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4285 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4287 ivs->n_invariant_uses[iid]++;
4288 if (ivs->n_invariant_uses[iid] == 1)
4293 /* Set cost pair for USE in set IVS to CP. */
4296 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4297 struct iv_use *use, struct cost_pair *cp)
4299 unsigned uid = use->id, cid;
4301 if (ivs->cand_for_use[uid] == cp)
4304 if (ivs->cand_for_use[uid])
4305 iv_ca_set_no_cp (data, ivs, use);
4312 ivs->cand_for_use[uid] = cp;
4313 ivs->n_cand_uses[cid]++;
4314 if (ivs->n_cand_uses[cid] == 1)
4316 bitmap_set_bit (ivs->cands, cid);
4317 /* Do not count the pseudocandidates. */
4321 ivs->cand_cost += cp->cand->cost;
4323 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4326 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4327 iv_ca_set_add_invariants (ivs, cp->depends_on);
4328 iv_ca_recount_cost (data, ivs);
4332 /* Extend set IVS by expressing USE by some of the candidates in it
4336 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4339 struct cost_pair *best_cp = NULL, *cp;
4343 gcc_assert (ivs->upto >= use->id);
4345 if (ivs->upto == use->id)
4351 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4353 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4355 if (cheaper_cost_pair (cp, best_cp))
4359 iv_ca_set_cp (data, ivs, use, best_cp);
4362 /* Get cost for assignment IVS. */
4365 iv_ca_cost (struct iv_ca *ivs)
4367 /* This was a conditional expression but it triggered a bug in
4370 return infinite_cost;
4375 /* Returns true if all dependences of CP are among invariants in IVS. */
4378 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4383 if (!cp->depends_on)
4386 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4388 if (ivs->n_invariant_uses[i] == 0)
4395 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4396 it before NEXT_CHANGE. */
4398 static struct iv_ca_delta *
4399 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4400 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4402 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4405 change->old_cp = old_cp;
4406 change->new_cp = new_cp;
4407 change->next_change = next_change;
4412 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4415 static struct iv_ca_delta *
4416 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4418 struct iv_ca_delta *last;
4426 for (last = l1; last->next_change; last = last->next_change)
4428 last->next_change = l2;
4433 /* Returns candidate by that USE is expressed in IVS. */
4435 static struct cost_pair *
4436 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4438 return ivs->cand_for_use[use->id];
4441 /* Reverse the list of changes DELTA, forming the inverse to it. */
4443 static struct iv_ca_delta *
4444 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4446 struct iv_ca_delta *act, *next, *prev = NULL;
4447 struct cost_pair *tmp;
4449 for (act = delta; act; act = next)
4451 next = act->next_change;
4452 act->next_change = prev;
4456 act->old_cp = act->new_cp;
4463 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4464 reverted instead. */
4467 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4468 struct iv_ca_delta *delta, bool forward)
4470 struct cost_pair *from, *to;
4471 struct iv_ca_delta *act;
4474 delta = iv_ca_delta_reverse (delta);
4476 for (act = delta; act; act = act->next_change)
4480 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4481 iv_ca_set_cp (data, ivs, act->use, to);
4485 iv_ca_delta_reverse (delta);
4488 /* Returns true if CAND is used in IVS. */
4491 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4493 return ivs->n_cand_uses[cand->id] > 0;
4496 /* Returns number of induction variable candidates in the set IVS. */
4499 iv_ca_n_cands (struct iv_ca *ivs)
4501 return ivs->n_cands;
4504 /* Free the list of changes DELTA. */
4507 iv_ca_delta_free (struct iv_ca_delta **delta)
4509 struct iv_ca_delta *act, *next;
4511 for (act = *delta; act; act = next)
4513 next = act->next_change;
4520 /* Allocates new iv candidates assignment. */
4522 static struct iv_ca *
4523 iv_ca_new (struct ivopts_data *data)
4525 struct iv_ca *nw = XNEW (struct iv_ca);
4529 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4530 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4531 nw->cands = BITMAP_ALLOC (NULL);
4534 nw->cand_use_cost = zero_cost;
4536 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4537 nw->cost = zero_cost;
4542 /* Free memory occupied by the set IVS. */
4545 iv_ca_free (struct iv_ca **ivs)
4547 free ((*ivs)->cand_for_use);
4548 free ((*ivs)->n_cand_uses);
4549 BITMAP_FREE ((*ivs)->cands);
4550 free ((*ivs)->n_invariant_uses);
4555 /* Dumps IVS to FILE. */
4558 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4560 const char *pref = " invariants ";
4562 comp_cost cost = iv_ca_cost (ivs);
4564 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4565 bitmap_print (file, ivs->cands, " candidates ","\n");
4567 for (i = 1; i <= data->max_inv_id; i++)
4568 if (ivs->n_invariant_uses[i])
4570 fprintf (file, "%s%d", pref, i);
4573 fprintf (file, "\n");
4576 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4577 new set, and store differences in DELTA. Number of induction variables
4578 in the new set is stored to N_IVS. */
4581 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4582 struct iv_cand *cand, struct iv_ca_delta **delta,
4588 struct cost_pair *old_cp, *new_cp;
4591 for (i = 0; i < ivs->upto; i++)
4593 use = iv_use (data, i);
4594 old_cp = iv_ca_cand_for_use (ivs, use);
4597 && old_cp->cand == cand)
4600 new_cp = get_use_iv_cost (data, use, cand);
4604 if (!iv_ca_has_deps (ivs, new_cp))
4607 if (!cheaper_cost_pair (new_cp, old_cp))
4610 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4613 iv_ca_delta_commit (data, ivs, *delta, true);
4614 cost = iv_ca_cost (ivs);
4616 *n_ivs = iv_ca_n_cands (ivs);
4617 iv_ca_delta_commit (data, ivs, *delta, false);
4622 /* Try narrowing set IVS by removing CAND. Return the cost of
4623 the new set and store the differences in DELTA. */
4626 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4627 struct iv_cand *cand, struct iv_ca_delta **delta)
4631 struct cost_pair *old_cp, *new_cp, *cp;
4633 struct iv_cand *cnd;
4637 for (i = 0; i < n_iv_uses (data); i++)
4639 use = iv_use (data, i);
4641 old_cp = iv_ca_cand_for_use (ivs, use);
4642 if (old_cp->cand != cand)
4647 if (data->consider_all_candidates)
4649 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4654 cnd = iv_cand (data, ci);
4656 cp = get_use_iv_cost (data, use, cnd);
4659 if (!iv_ca_has_deps (ivs, cp))
4662 if (!cheaper_cost_pair (cp, new_cp))
4670 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4675 cnd = iv_cand (data, ci);
4677 cp = get_use_iv_cost (data, use, cnd);
4680 if (!iv_ca_has_deps (ivs, cp))
4683 if (!cheaper_cost_pair (cp, new_cp))
4692 iv_ca_delta_free (delta);
4693 return infinite_cost;
4696 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4699 iv_ca_delta_commit (data, ivs, *delta, true);
4700 cost = iv_ca_cost (ivs);
4701 iv_ca_delta_commit (data, ivs, *delta, false);
4706 /* Try optimizing the set of candidates IVS by removing candidates different
4707 from to EXCEPT_CAND from it. Return cost of the new set, and store
4708 differences in DELTA. */
4711 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4712 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4715 struct iv_ca_delta *act_delta, *best_delta;
4717 comp_cost best_cost, acost;
4718 struct iv_cand *cand;
4721 best_cost = iv_ca_cost (ivs);
4723 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4725 cand = iv_cand (data, i);
4727 if (cand == except_cand)
4730 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4732 if (compare_costs (acost, best_cost) < 0)
4735 iv_ca_delta_free (&best_delta);
4736 best_delta = act_delta;
4739 iv_ca_delta_free (&act_delta);
4748 /* Recurse to possibly remove other unnecessary ivs. */
4749 iv_ca_delta_commit (data, ivs, best_delta, true);
4750 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4751 iv_ca_delta_commit (data, ivs, best_delta, false);
4752 *delta = iv_ca_delta_join (best_delta, *delta);
4756 /* Tries to extend the sets IVS in the best possible way in order
4757 to express the USE. */
4760 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4763 comp_cost best_cost, act_cost;
4766 struct iv_cand *cand;
4767 struct iv_ca_delta *best_delta = NULL, *act_delta;
4768 struct cost_pair *cp;
4770 iv_ca_add_use (data, ivs, use);
4771 best_cost = iv_ca_cost (ivs);
4773 cp = iv_ca_cand_for_use (ivs, use);
4776 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4777 iv_ca_set_no_cp (data, ivs, use);
4780 /* First try important candidates not based on any memory object. Only if
4781 this fails, try the specific ones. Rationale -- in loops with many
4782 variables the best choice often is to use just one generic biv. If we
4783 added here many ivs specific to the uses, the optimization algorithm later
4784 would be likely to get stuck in a local minimum, thus causing us to create
4785 too many ivs. The approach from few ivs to more seems more likely to be
4786 successful -- starting from few ivs, replacing an expensive use by a
4787 specific iv should always be a win. */
4788 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4790 cand = iv_cand (data, i);
4792 if (cand->iv->base_object != NULL_TREE)
4795 if (iv_ca_cand_used_p (ivs, cand))
4798 cp = get_use_iv_cost (data, use, cand);
4802 iv_ca_set_cp (data, ivs, use, cp);
4803 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4804 iv_ca_set_no_cp (data, ivs, use);
4805 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4807 if (compare_costs (act_cost, best_cost) < 0)
4809 best_cost = act_cost;
4811 iv_ca_delta_free (&best_delta);
4812 best_delta = act_delta;
4815 iv_ca_delta_free (&act_delta);
4818 if (infinite_cost_p (best_cost))
4820 for (i = 0; i < use->n_map_members; i++)
4822 cp = use->cost_map + i;
4827 /* Already tried this. */
4828 if (cand->important && cand->iv->base_object == NULL_TREE)
4831 if (iv_ca_cand_used_p (ivs, cand))
4835 iv_ca_set_cp (data, ivs, use, cp);
4836 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4837 iv_ca_set_no_cp (data, ivs, use);
4838 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4841 if (compare_costs (act_cost, best_cost) < 0)
4843 best_cost = act_cost;
4846 iv_ca_delta_free (&best_delta);
4847 best_delta = act_delta;
4850 iv_ca_delta_free (&act_delta);
4854 iv_ca_delta_commit (data, ivs, best_delta, true);
4855 iv_ca_delta_free (&best_delta);
4857 return !infinite_cost_p (best_cost);
4860 /* Finds an initial assignment of candidates to uses. */
4862 static struct iv_ca *
4863 get_initial_solution (struct ivopts_data *data)
4865 struct iv_ca *ivs = iv_ca_new (data);
4868 for (i = 0; i < n_iv_uses (data); i++)
4869 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4878 /* Tries to improve set of induction variables IVS. */
4881 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4884 comp_cost acost, best_cost = iv_ca_cost (ivs);
4885 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4886 struct iv_cand *cand;
4888 /* Try extending the set of induction variables by one. */
4889 for (i = 0; i < n_iv_cands (data); i++)
4891 cand = iv_cand (data, i);
4893 if (iv_ca_cand_used_p (ivs, cand))
4896 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4900 /* If we successfully added the candidate and the set is small enough,
4901 try optimizing it by removing other candidates. */
4902 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4904 iv_ca_delta_commit (data, ivs, act_delta, true);
4905 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4906 iv_ca_delta_commit (data, ivs, act_delta, false);
4907 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4910 if (compare_costs (acost, best_cost) < 0)
4913 iv_ca_delta_free (&best_delta);
4914 best_delta = act_delta;
4917 iv_ca_delta_free (&act_delta);
4922 /* Try removing the candidates from the set instead. */
4923 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4925 /* Nothing more we can do. */
4930 iv_ca_delta_commit (data, ivs, best_delta, true);
4931 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4932 iv_ca_delta_free (&best_delta);
4936 /* Attempts to find the optimal set of induction variables. We do simple
4937 greedy heuristic -- we try to replace at most one candidate in the selected
4938 solution and remove the unused ivs while this improves the cost. */
4940 static struct iv_ca *
4941 find_optimal_iv_set (struct ivopts_data *data)
4947 /* Get the initial solution. */
4948 set = get_initial_solution (data);
4951 if (dump_file && (dump_flags & TDF_DETAILS))
4952 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4956 if (dump_file && (dump_flags & TDF_DETAILS))
4958 fprintf (dump_file, "Initial set of candidates:\n");
4959 iv_ca_dump (data, dump_file, set);
4962 while (try_improve_iv_set (data, set))
4964 if (dump_file && (dump_flags & TDF_DETAILS))
4966 fprintf (dump_file, "Improved to:\n");
4967 iv_ca_dump (data, dump_file, set);
4971 if (dump_file && (dump_flags & TDF_DETAILS))
4973 comp_cost cost = iv_ca_cost (set);
4974 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4977 for (i = 0; i < n_iv_uses (data); i++)
4979 use = iv_use (data, i);
4980 use->selected = iv_ca_cand_for_use (set, use)->cand;
4986 /* Creates a new induction variable corresponding to CAND. */
4989 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4991 gimple_stmt_iterator incr_pos;
5001 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5005 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5010 /* Mark that the iv is preserved. */
5011 name_info (data, cand->var_before)->preserve_biv = true;
5012 name_info (data, cand->var_after)->preserve_biv = true;
5014 /* Rewrite the increment so that it uses var_before directly. */
5015 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5020 gimple_add_tmp_var (cand->var_before);
5021 add_referenced_var (cand->var_before);
5023 base = unshare_expr (cand->iv->base);
5025 create_iv (base, unshare_expr (cand->iv->step),
5026 cand->var_before, data->current_loop,
5027 &incr_pos, after, &cand->var_before, &cand->var_after);
5030 /* Creates new induction variables described in SET. */
5033 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5036 struct iv_cand *cand;
5039 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5041 cand = iv_cand (data, i);
5042 create_new_iv (data, cand);
5046 /* Returns the phi-node in BB with result RESULT. */
5049 get_phi_with_result (basic_block bb, tree result)
5051 gimple_stmt_iterator i = gsi_start_phis (bb);
5053 for (; !gsi_end_p (i); gsi_next (&i))
5054 if (gimple_phi_result (gsi_stmt (i)) == result)
5055 return gsi_stmt (i);
5062 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5063 is true, remove also the ssa name defined by the statement. */
5066 remove_statement (gimple stmt, bool including_defined_name)
5068 if (gimple_code (stmt) == GIMPLE_PHI)
5070 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5071 gimple_phi_result (stmt));
5072 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5073 remove_phi_node (&bsi, including_defined_name);
5077 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5078 gsi_remove (&bsi, true);
5079 release_defs (stmt);
5083 /* Rewrites USE (definition of iv used in a nonlinear expression)
5084 using candidate CAND. */
5087 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5088 struct iv_use *use, struct iv_cand *cand)
5093 gimple_stmt_iterator bsi;
5095 /* An important special case -- if we are asked to express value of
5096 the original iv by itself, just exit; there is no need to
5097 introduce a new computation (that might also need casting the
5098 variable to unsigned and back). */
5099 if (cand->pos == IP_ORIGINAL
5100 && cand->incremented_at == use->stmt)
5102 tree step, ctype, utype;
5103 enum tree_code incr_code = PLUS_EXPR, old_code;
5105 gcc_assert (is_gimple_assign (use->stmt));
5106 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5108 step = cand->iv->step;
5109 ctype = TREE_TYPE (step);
5110 utype = TREE_TYPE (cand->var_after);
5111 if (TREE_CODE (step) == NEGATE_EXPR)
5113 incr_code = MINUS_EXPR;
5114 step = TREE_OPERAND (step, 0);
5117 /* Check whether we may leave the computation unchanged.
5118 This is the case only if it does not rely on other
5119 computations in the loop -- otherwise, the computation
5120 we rely upon may be removed in remove_unused_ivs,
5121 thus leading to ICE. */
5122 old_code = gimple_assign_rhs_code (use->stmt);
5123 if (old_code == PLUS_EXPR
5124 || old_code == MINUS_EXPR
5125 || old_code == POINTER_PLUS_EXPR)
5127 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5128 op = gimple_assign_rhs2 (use->stmt);
5129 else if (old_code != MINUS_EXPR
5130 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5131 op = gimple_assign_rhs1 (use->stmt);
5139 && (TREE_CODE (op) == INTEGER_CST
5140 || operand_equal_p (op, step, 0)))
5143 /* Otherwise, add the necessary computations to express
5145 op = fold_convert (ctype, cand->var_before);
5146 comp = fold_convert (utype,
5147 build2 (incr_code, ctype, op,
5148 unshare_expr (step)));
5152 comp = get_computation (data->current_loop, use, cand);
5153 gcc_assert (comp != NULL_TREE);
5156 switch (gimple_code (use->stmt))
5159 tgt = PHI_RESULT (use->stmt);
5161 /* If we should keep the biv, do not replace it. */
5162 if (name_info (data, tgt)->preserve_biv)
5165 bsi = gsi_after_labels (gimple_bb (use->stmt));
5169 tgt = gimple_assign_lhs (use->stmt);
5170 bsi = gsi_for_stmt (use->stmt);
5177 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5178 true, GSI_SAME_STMT);
5180 if (gimple_code (use->stmt) == GIMPLE_PHI)
5182 ass = gimple_build_assign (tgt, op);
5183 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5184 remove_statement (use->stmt, false);
5188 gimple_assign_set_rhs_from_tree (&bsi, op);
5189 use->stmt = gsi_stmt (bsi);
5193 /* Replaces ssa name in index IDX by its basic variable. Callback for
5197 idx_remove_ssa_names (tree base, tree *idx,
5198 void *data ATTRIBUTE_UNUSED)
5202 if (TREE_CODE (*idx) == SSA_NAME)
5203 *idx = SSA_NAME_VAR (*idx);
5205 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5207 op = &TREE_OPERAND (base, 2);
5209 && TREE_CODE (*op) == SSA_NAME)
5210 *op = SSA_NAME_VAR (*op);
5211 op = &TREE_OPERAND (base, 3);
5213 && TREE_CODE (*op) == SSA_NAME)
5214 *op = SSA_NAME_VAR (*op);
5220 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5223 unshare_and_remove_ssa_names (tree ref)
5225 ref = unshare_expr (ref);
5226 for_each_index (&ref, idx_remove_ssa_names, NULL);
5231 /* Extract the alias analysis info for the memory reference REF. There are
5232 several ways how this information may be stored and what precisely is
5233 its semantics depending on the type of the reference, but there always is
5234 somewhere hidden one _DECL node that is used to determine the set of
5235 virtual operands for the reference. The code below deciphers this jungle
5236 and extracts this single useful piece of information. */
5239 get_ref_tag (tree ref, tree orig)
5241 tree var = get_base_address (ref);
5242 tree aref = NULL_TREE, tag, sv;
5243 HOST_WIDE_INT offset, size, maxsize;
5245 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5247 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5255 if (TREE_CODE (var) == INDIRECT_REF)
5257 /* If the base is a dereference of a pointer, first check its name memory
5258 tag. If it does not have one, use its symbol memory tag. */
5259 var = TREE_OPERAND (var, 0);
5260 if (TREE_CODE (var) != SSA_NAME)
5263 if (SSA_NAME_PTR_INFO (var))
5265 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5270 var = SSA_NAME_VAR (var);
5271 tag = symbol_mem_tag (var);
5272 gcc_assert (tag != NULL_TREE);
5280 tag = symbol_mem_tag (var);
5288 /* Copies the reference information from OLD_REF to NEW_REF. */
5291 copy_ref_info (tree new_ref, tree old_ref)
5293 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5294 copy_mem_ref_info (new_ref, old_ref);
5297 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5298 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5299 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5300 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5304 /* Rewrites USE (address that is an iv) using candidate CAND. */
5307 rewrite_use_address (struct ivopts_data *data,
5308 struct iv_use *use, struct iv_cand *cand)
5311 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5315 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5317 unshare_aff_combination (&aff);
5319 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5320 copy_ref_info (ref, *use->op_p);
5324 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5328 rewrite_use_compare (struct ivopts_data *data,
5329 struct iv_use *use, struct iv_cand *cand)
5331 tree comp, *var_p, op, bound;
5332 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5333 enum tree_code compare;
5334 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5340 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5341 tree var_type = TREE_TYPE (var);
5344 compare = iv_elimination_compare (data, use);
5345 bound = unshare_expr (fold_convert (var_type, bound));
5346 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5348 gsi_insert_seq_on_edge_immediate (
5349 loop_preheader_edge (data->current_loop),
5352 gimple_cond_set_lhs (use->stmt, var);
5353 gimple_cond_set_code (use->stmt, compare);
5354 gimple_cond_set_rhs (use->stmt, op);
5358 /* The induction variable elimination failed; just express the original
5360 comp = get_computation (data->current_loop, use, cand);
5361 gcc_assert (comp != NULL_TREE);
5363 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5366 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5367 true, GSI_SAME_STMT);
5370 /* Rewrites USE using candidate CAND. */
5373 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5375 push_stmt_changes (&use->stmt);
5379 case USE_NONLINEAR_EXPR:
5380 rewrite_use_nonlinear_expr (data, use, cand);
5384 rewrite_use_address (data, use, cand);
5388 rewrite_use_compare (data, use, cand);
5395 pop_stmt_changes (&use->stmt);
5398 /* Rewrite the uses using the selected induction variables. */
5401 rewrite_uses (struct ivopts_data *data)
5404 struct iv_cand *cand;
5407 for (i = 0; i < n_iv_uses (data); i++)
5409 use = iv_use (data, i);
5410 cand = use->selected;
5413 rewrite_use (data, use, cand);
5417 /* Removes the ivs that are not used after rewriting. */
5420 remove_unused_ivs (struct ivopts_data *data)
5425 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5427 struct version_info *info;
5429 info = ver_info (data, j);
5431 && !integer_zerop (info->iv->step)
5433 && !info->iv->have_use_for
5434 && !info->preserve_biv)
5435 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5439 /* Frees data allocated by the optimization of a single loop. */
5442 free_loop_data (struct ivopts_data *data)
5450 pointer_map_destroy (data->niters);
5451 data->niters = NULL;
5454 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5456 struct version_info *info;
5458 info = ver_info (data, i);
5462 info->has_nonlin_use = false;
5463 info->preserve_biv = false;
5466 bitmap_clear (data->relevant);
5467 bitmap_clear (data->important_candidates);
5469 for (i = 0; i < n_iv_uses (data); i++)
5471 struct iv_use *use = iv_use (data, i);
5474 BITMAP_FREE (use->related_cands);
5475 for (j = 0; j < use->n_map_members; j++)
5476 if (use->cost_map[j].depends_on)
5477 BITMAP_FREE (use->cost_map[j].depends_on);
5478 free (use->cost_map);
5481 VEC_truncate (iv_use_p, data->iv_uses, 0);
5483 for (i = 0; i < n_iv_cands (data); i++)
5485 struct iv_cand *cand = iv_cand (data, i);
5489 if (cand->depends_on)
5490 BITMAP_FREE (cand->depends_on);
5493 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5495 if (data->version_info_size < num_ssa_names)
5497 data->version_info_size = 2 * num_ssa_names;
5498 free (data->version_info);
5499 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5502 data->max_inv_id = 0;
5504 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5505 SET_DECL_RTL (obj, NULL_RTX);
5507 VEC_truncate (tree, decl_rtl_to_reset, 0);
5510 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5514 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5516 free_loop_data (data);
5517 free (data->version_info);
5518 BITMAP_FREE (data->relevant);
5519 BITMAP_FREE (data->important_candidates);
5521 VEC_free (tree, heap, decl_rtl_to_reset);
5522 VEC_free (iv_use_p, heap, data->iv_uses);
5523 VEC_free (iv_cand_p, heap, data->iv_candidates);
5526 /* Optimizes the LOOP. Returns true if anything changed. */
5529 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5531 bool changed = false;
5532 struct iv_ca *iv_ca;
5535 gcc_assert (!data->niters);
5536 data->current_loop = loop;
5537 data->speed = optimize_loop_for_speed_p (loop);
5539 if (dump_file && (dump_flags & TDF_DETAILS))
5541 fprintf (dump_file, "Processing loop %d\n", loop->num);
5543 exit = single_dom_exit (loop);
5546 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5547 exit->src->index, exit->dest->index);
5548 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5549 fprintf (dump_file, "\n");
5552 fprintf (dump_file, "\n");
5555 /* For each ssa name determines whether it behaves as an induction variable
5557 if (!find_induction_variables (data))
5560 /* Finds interesting uses (item 1). */
5561 find_interesting_uses (data);
5562 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5565 /* Finds candidates for the induction variables (item 2). */
5566 find_iv_candidates (data);
5568 /* Calculates the costs (item 3, part 1). */
5569 determine_use_iv_costs (data);
5570 determine_iv_costs (data);
5571 determine_set_costs (data);
5573 /* Find the optimal set of induction variables (item 3, part 2). */
5574 iv_ca = find_optimal_iv_set (data);
5579 /* Create the new induction variables (item 4, part 1). */
5580 create_new_ivs (data, iv_ca);
5581 iv_ca_free (&iv_ca);
5583 /* Rewrite the uses (item 4, part 2). */
5584 rewrite_uses (data);
5586 /* Remove the ivs that are unused after rewriting. */
5587 remove_unused_ivs (data);
5589 /* We have changed the structure of induction variables; it might happen
5590 that definitions in the scev database refer to some of them that were
5595 free_loop_data (data);
5600 /* Main entry point. Optimizes induction variables in loops. */
5603 tree_ssa_iv_optimize (void)
5606 struct ivopts_data data;
5609 tree_ssa_iv_optimize_init (&data);
5611 /* Optimize the loops starting with the innermost ones. */
5612 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5614 if (dump_file && (dump_flags & TDF_DETAILS))
5615 flow_loop_dump (loop, dump_file, NULL, 1);
5617 tree_ssa_iv_optimize_loop (&data, loop);
5620 tree_ssa_iv_optimize_finalize (&data);