Import gcc-4.4.1
[dragonfly.git] / contrib / gcc-4.4 / gcc / tree-ssa-loop-ivopts.c
CommitLineData
c251ad9e
SS
1/* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 3, or (at your option) any
10later version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT
13ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
70#include "rtl.h"
71#include "tm_p.h"
72#include "hard-reg-set.h"
73#include "basic-block.h"
74#include "output.h"
75#include "diagnostic.h"
76#include "tree-flow.h"
77#include "tree-dump.h"
78#include "timevar.h"
79#include "cfgloop.h"
80#include "varray.h"
81#include "expr.h"
82#include "tree-pass.h"
83#include "ggc.h"
84#include "insn-config.h"
85#include "recog.h"
86#include "pointer-set.h"
87#include "hashtab.h"
88#include "tree-chrec.h"
89#include "tree-scalar-evolution.h"
90#include "cfgloop.h"
91#include "params.h"
92#include "langhooks.h"
93#include "tree-affine.h"
94#include "target.h"
95
96/* The infinite cost. */
97#define INFTY 10000000
98
99/* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101#define AVG_LOOP_NITER(LOOP) 5
102
103
104/* Representation of the induction variable. */
105struct iv
106{
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
114};
115
116/* Per-ssa version information (induction variable descriptions, etc.). */
117struct version_info
118{
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
125};
126
127/* Types of uses. */
128enum use_type
129{
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
133};
134
135/* Cost of a computation. */
136typedef struct
137{
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143} comp_cost;
144
145static const comp_cost zero_cost = {0, 0};
146static const comp_cost infinite_cost = {INFTY, INFTY};
147
148/* The candidate - cost pair. */
149struct cost_pair
150{
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
158};
159
160/* Use. */
161struct iv_use
162{
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
170
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
174
175 struct iv_cand *selected;
176 /* The selected candidate. */
177};
178
179/* The position where the iv is computed. */
180enum iv_position
181{
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
185};
186
187/* The induction variable candidate. */
188struct iv_cand
189{
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
195 incremented. */
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */
205};
206
207/* The data used by the induction variable optimizations. */
208
209typedef struct iv_use *iv_use_p;
210DEF_VEC_P(iv_use_p);
211DEF_VEC_ALLOC_P(iv_use_p,heap);
212
213typedef struct iv_cand *iv_cand_p;
214DEF_VEC_P(iv_cand_p);
215DEF_VEC_ALLOC_P(iv_cand_p,heap);
216
217struct ivopts_data
218{
219 /* The currently optimized loop. */
220 struct loop *current_loop;
221
222 /* Numbers of iterations for all exits of the current loop. */
223 struct pointer_map_t *niters;
224
225 /* Number of registers used in it. */
226 unsigned regs_used;
227
228 /* The size of version_info array allocated. */
229 unsigned version_info_size;
230
231 /* The array of information for the ssa names. */
232 struct version_info *version_info;
233
234 /* The bitmap of indices in version_info whose value was changed. */
235 bitmap relevant;
236
237 /* The uses of induction variables. */
238 VEC(iv_use_p,heap) *iv_uses;
239
240 /* The candidates. */
241 VEC(iv_cand_p,heap) *iv_candidates;
242
243 /* A bitmap of important candidates. */
244 bitmap important_candidates;
245
246 /* The maximum invariant id. */
247 unsigned max_inv_id;
248
249 /* Whether to consider just related and important candidates when replacing a
250 use. */
251 bool consider_all_candidates;
252
253 /* Are we optimizing for speed? */
254 bool speed;
255};
256
257/* An assignment of iv candidates to uses. */
258
259struct iv_ca
260{
261 /* The number of uses covered by the assignment. */
262 unsigned upto;
263
264 /* Number of uses that cannot be expressed by the candidates in the set. */
265 unsigned bad_uses;
266
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
269
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
272
273 /* The candidates used. */
274 bitmap cands;
275
276 /* The number of candidates in the set. */
277 unsigned n_cands;
278
279 /* Total number of registers needed. */
280 unsigned n_regs;
281
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
284
285 /* Total cost of candidates. */
286 unsigned cand_cost;
287
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
290
291 /* Total cost of the assignment. */
292 comp_cost cost;
293};
294
295/* Difference of two iv candidate assignments. */
296
297struct iv_ca_delta
298{
299 /* Changed use. */
300 struct iv_use *use;
301
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
304
305 /* A new assignment. */
306 struct cost_pair *new_cp;
307
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
310};
311
312/* Bound on number of candidates below that all candidates are considered. */
313
314#define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
316
317/* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
319
320#define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
322
323/* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
325
326#define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
328
329/* The list of trees for that the decl_rtl field must be reset is stored
330 here. */
331
332static VEC(tree,heap) *decl_rtl_to_reset;
333
334/* Number of uses recorded in DATA. */
335
336static inline unsigned
337n_iv_uses (struct ivopts_data *data)
338{
339 return VEC_length (iv_use_p, data->iv_uses);
340}
341
342/* Ith use recorded in DATA. */
343
344static inline struct iv_use *
345iv_use (struct ivopts_data *data, unsigned i)
346{
347 return VEC_index (iv_use_p, data->iv_uses, i);
348}
349
350/* Number of candidates recorded in DATA. */
351
352static inline unsigned
353n_iv_cands (struct ivopts_data *data)
354{
355 return VEC_length (iv_cand_p, data->iv_candidates);
356}
357
358/* Ith candidate recorded in DATA. */
359
360static inline struct iv_cand *
361iv_cand (struct ivopts_data *data, unsigned i)
362{
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
364}
365
366/* The single loop exit if it dominates the latch, NULL otherwise. */
367
368edge
369single_dom_exit (struct loop *loop)
370{
371 edge exit = single_exit (loop);
372
373 if (!exit)
374 return NULL;
375
376 if (!just_once_each_iteration_p (loop, exit->src))
377 return NULL;
378
379 return exit;
380}
381
382/* Dumps information about the induction variable IV to FILE. */
383
384extern void dump_iv (FILE *, struct iv *);
385void
386dump_iv (FILE *file, struct iv *iv)
387{
388 if (iv->ssa_name)
389 {
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
393 }
394
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
398
399 if (iv->step)
400 {
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
404
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
408 }
409 else
410 {
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
416 if (iv->base_object)
417 {
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
421 }
422
423 if (iv->biv_p)
424 fprintf (file, " is a biv\n");
425}
426
427/* Dumps information about the USE to FILE. */
428
429extern void dump_use (FILE *, struct iv_use *);
430void
431dump_use (FILE *file, struct iv_use *use)
432{
433 fprintf (file, "use %d\n", use->id);
434
435 switch (use->type)
436 {
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
439 break;
440
441 case USE_ADDRESS:
442 fprintf (file, " address\n");
443 break;
444
445 case USE_COMPARE:
446 fprintf (file, " compare\n");
447 break;
448
449 default:
450 gcc_unreachable ();
451 }
452
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
456
457 fprintf (file, " at position ");
458 if (use->op_p)
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
461
462 dump_iv (file, use->iv);
463
464 if (use->related_cands)
465 {
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
468 }
469}
470
471/* Dumps information about the uses to FILE. */
472
473extern void dump_uses (FILE *, struct ivopts_data *);
474void
475dump_uses (FILE *file, struct ivopts_data *data)
476{
477 unsigned i;
478 struct iv_use *use;
479
480 for (i = 0; i < n_iv_uses (data); i++)
481 {
482 use = iv_use (data, i);
483
484 dump_use (file, use);
485 fprintf (file, "\n");
486 }
487}
488
489/* Dumps information about induction variable candidate CAND to FILE. */
490
491extern void dump_cand (FILE *, struct iv_cand *);
492void
493dump_cand (FILE *file, struct iv_cand *cand)
494{
495 struct iv *iv = cand->iv;
496
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
499
500 if (cand->depends_on)
501 {
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
504 }
505
506 if (!iv)
507 {
508 fprintf (file, " final value replacement\n");
509 return;
510 }
511
512 switch (cand->pos)
513 {
514 case IP_NORMAL:
515 fprintf (file, " incremented before exit test\n");
516 break;
517
518 case IP_END:
519 fprintf (file, " incremented at end\n");
520 break;
521
522 case IP_ORIGINAL:
523 fprintf (file, " original biv\n");
524 break;
525 }
526
527 dump_iv (file, iv);
528}
529
530/* Returns the info for ssa version VER. */
531
532static inline struct version_info *
533ver_info (struct ivopts_data *data, unsigned ver)
534{
535 return data->version_info + ver;
536}
537
538/* Returns the info for ssa name NAME. */
539
540static inline struct version_info *
541name_info (struct ivopts_data *data, tree name)
542{
543 return ver_info (data, SSA_NAME_VERSION (name));
544}
545
546/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
547 emitted in LOOP. */
548
549static bool
550stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
551{
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
553
554 gcc_assert (bb);
555
556 if (sbb == loop->latch)
557 return true;
558
559 if (sbb != bb)
560 return false;
561
562 return stmt == last_stmt (bb);
563}
564
565/* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
567
568static bool
569stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
570{
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
574
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
576 return false;
577
578 if (stmt_bb != cand_bb)
579 return true;
580
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
584 {
585 if (gsi_stmt (bsi) == cand->incremented_at)
586 return false;
587 if (gsi_stmt (bsi) == stmt)
588 return true;
589 }
590}
591
592/* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
594
595static bool
596stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
597{
598 switch (cand->pos)
599 {
600 case IP_END:
601 return false;
602
603 case IP_NORMAL:
604 return stmt_after_ip_normal_pos (loop, stmt);
605
606 case IP_ORIGINAL:
607 return stmt_after_ip_original_pos (cand, stmt);
608
609 default:
610 gcc_unreachable ();
611 }
612}
613
614/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
615
616static bool
617abnormal_ssa_name_p (tree exp)
618{
619 if (!exp)
620 return false;
621
622 if (TREE_CODE (exp) != SSA_NAME)
623 return false;
624
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
626}
627
628/* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
630
631static bool
632idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
634{
635 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
636 {
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
638 return false;
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
640 return false;
641 }
642
643 return !abnormal_ssa_name_p (*index);
644}
645
646/* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
648
649bool
650contains_abnormal_ssa_name_p (tree expr)
651{
652 enum tree_code code;
653 enum tree_code_class codeclass;
654
655 if (!expr)
656 return false;
657
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
660
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
663
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
666 return false;
667
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
671 NULL);
672
673 switch (codeclass)
674 {
675 case tcc_binary:
676 case tcc_comparison:
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
678 return true;
679
680 /* Fallthru. */
681 case tcc_unary:
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
683 return true;
684
685 break;
686
687 default:
688 gcc_unreachable ();
689 }
690
691 return false;
692}
693
694/* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
696
697static tree
698niter_for_exit (struct ivopts_data *data, edge exit)
699{
700 struct tree_niter_desc desc;
701 tree niter;
702 void **slot;
703
704 if (!data->niters)
705 {
706 data->niters = pointer_map_create ();
707 slot = NULL;
708 }
709 else
710 slot = pointer_map_contains (data->niters, exit);
711
712 if (!slot)
713 {
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
720 exit, &desc, true)
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
723 niter = desc.niter;
724 else
725 niter = NULL_TREE;
726
727 *pointer_map_insert (data->niters, exit) = niter;
728 }
729 else
730 niter = (tree) *slot;
731
732 return niter;
733}
734
735/* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
737 goes wrong. */
738
739static tree
740niter_for_single_dom_exit (struct ivopts_data *data)
741{
742 edge exit = single_dom_exit (data->current_loop);
743
744 if (!exit)
745 return NULL;
746
747 return niter_for_exit (data, exit);
748}
749
750/* Initializes data structures used by the iv optimization pass, stored
751 in DATA. */
752
753static void
754tree_ssa_iv_optimize_init (struct ivopts_data *data)
755{
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
761 data->niters = NULL;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
765}
766
767/* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
769
770static tree
771determine_base_object (tree expr)
772{
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj;
775
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
781
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
783 return NULL_TREE;
784
785 switch (code)
786 {
787 case INTEGER_CST:
788 return NULL_TREE;
789
790 case ADDR_EXPR:
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
793
794 if (!base)
795 return expr;
796
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
799
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
802
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
805
806 case PLUS_EXPR:
807 case MINUS_EXPR:
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 gcc_unreachable ();
810
811 default:
812 return fold_convert (ptr_type_node, expr);
813 }
814}
815
816/* Allocates an induction variable with given initial value BASE and step STEP
817 for loop LOOP. */
818
819static struct iv *
820alloc_iv (tree base, tree step)
821{
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
824
825 iv->base = base;
826 iv->base_object = determine_base_object (base);
827 iv->step = step;
828 iv->biv_p = false;
829 iv->have_use_for = false;
830 iv->use_id = 0;
831 iv->ssa_name = NULL_TREE;
832
833 return iv;
834}
835
836/* Sets STEP and BASE for induction variable IV. */
837
838static void
839set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
840{
841 struct version_info *info = name_info (data, iv);
842
843 gcc_assert (!info->iv);
844
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
848}
849
850/* Finds induction variable declaration for VAR. */
851
852static struct iv *
853get_iv (struct ivopts_data *data, tree var)
854{
855 basic_block bb;
856 tree type = TREE_TYPE (var);
857
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
860 return NULL;
861
862 if (!name_info (data, var)->iv)
863 {
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
865
866 if (!bb
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
869 }
870
871 return name_info (data, var)->iv;
872}
873
874/* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
876
877static tree
878determine_biv_step (gimple phi)
879{
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
882 affine_iv iv;
883
884 if (!is_gimple_reg (name))
885 return NULL_TREE;
886
887 if (!simple_iv (loop, loop, name, &iv, true))
888 return NULL_TREE;
889
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
891}
892
893/* Finds basic ivs. */
894
895static bool
896find_bivs (struct ivopts_data *data)
897{
898 gimple phi;
899 tree step, type, base;
900 bool found = false;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
903
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
905 {
906 phi = gsi_stmt (psi);
907
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
909 continue;
910
911 step = determine_biv_step (phi);
912 if (!step)
913 continue;
914
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
919 continue;
920
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
923 if (step)
924 {
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
927 else
928 step = fold_convert (type, step);
929 }
930
931 set_iv (data, PHI_RESULT (phi), base, step);
932 found = true;
933 }
934
935 return found;
936}
937
938/* Marks basic ivs. */
939
940static void
941mark_bivs (struct ivopts_data *data)
942{
943 gimple phi;
944 tree var;
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
947 basic_block incr_bb;
948 gimple_stmt_iterator psi;
949
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
951 {
952 phi = gsi_stmt (psi);
953
954 iv = get_iv (data, PHI_RESULT (phi));
955 if (!iv)
956 continue;
957
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
960 if (!incr_iv)
961 continue;
962
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
967 continue;
968
969 iv->biv_p = true;
970 incr_iv->biv_p = true;
971 }
972}
973
974/* Checks whether STMT defines a linear induction variable and stores its
975 parameters to IV. */
976
977static bool
978find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
979{
980 tree lhs;
981 struct loop *loop = data->current_loop;
982
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
985
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
987 return false;
988
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
991 return false;
992
993 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
994 return false;
995 iv->base = expand_simple_operations (iv->base);
996
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
999 return false;
1000
1001 return true;
1002}
1003
1004/* Finds general ivs in statement STMT. */
1005
1006static void
1007find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1008{
1009 affine_iv iv;
1010
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1012 return;
1013
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1015}
1016
1017/* Finds general ivs in basic block BB. */
1018
1019static void
1020find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1021{
1022 gimple_stmt_iterator bsi;
1023
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1026}
1027
1028/* Finds general ivs. */
1029
1030static void
1031find_givs (struct ivopts_data *data)
1032{
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1035 unsigned i;
1036
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1039 free (body);
1040}
1041
1042/* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1044
1045static bool
1046find_induction_variables (struct ivopts_data *data)
1047{
1048 unsigned i;
1049 bitmap_iterator bi;
1050
1051 if (!find_bivs (data))
1052 return false;
1053
1054 find_givs (data);
1055 mark_bivs (data);
1056
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1058 {
1059 tree niter = niter_for_single_dom_exit (data);
1060
1061 if (niter)
1062 {
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1066 };
1067
1068 fprintf (dump_file, "Induction variables:\n\n");
1069
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1071 {
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1074 }
1075 }
1076
1077 return true;
1078}
1079
1080/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1081
1082static struct iv_use *
1083record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1085{
1086 struct iv_use *use = XCNEW (struct iv_use);
1087
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1090 use->iv = iv;
1091 use->stmt = stmt;
1092 use->op_p = use_p;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1094
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1096 caller. */
1097 iv->ssa_name = NULL_TREE;
1098
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1101
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1103
1104 return use;
1105}
1106
1107/* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1110
1111static void
1112record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1113{
1114 basic_block bb;
1115 struct version_info *info;
1116
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1119 return;
1120
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1122 if (bb
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1124 return;
1125
1126 info = name_info (data, op);
1127 info->name = op;
1128 info->has_nonlin_use |= nonlinear_use;
1129 if (!info->inv_id)
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1132}
1133
1134/* Checks whether the use OP is interesting and if so, records it. */
1135
1136static struct iv_use *
1137find_interesting_uses_op (struct ivopts_data *data, tree op)
1138{
1139 struct iv *iv;
1140 struct iv *civ;
1141 gimple stmt;
1142 struct iv_use *use;
1143
1144 if (TREE_CODE (op) != SSA_NAME)
1145 return NULL;
1146
1147 iv = get_iv (data, op);
1148 if (!iv)
1149 return NULL;
1150
1151 if (iv->have_use_for)
1152 {
1153 use = iv_use (data, iv->use_id);
1154
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1156 return use;
1157 }
1158
1159 if (integer_zerop (iv->step))
1160 {
1161 record_invariant (data, op, true);
1162 return NULL;
1163 }
1164 iv->have_use_for = true;
1165
1166 civ = XNEW (struct iv);
1167 *civ = *iv;
1168
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1172
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1175
1176 return use;
1177}
1178
1179/* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1186
1187static bool
1188extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1191{
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1194 static tree zero;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1197 bool ret = false;
1198
1199 if (gimple_code (stmt) == GIMPLE_COND)
1200 {
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1203 }
1204 else
1205 {
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1208 }
1209
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1212
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1217
1218 /* Exactly one of the compared values must be an iv, and the other one must
1219 be an invariant. */
1220 if (!iv0 || !iv1)
1221 goto end;
1222
1223 if (integer_zerop (iv0->step))
1224 {
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1228 }
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1230
1231end:
1232 if (control_var)
1233 *control_var = op0;;
1234 if (iv_var)
1235 *iv_var = iv0;;
1236 if (bound)
1237 *bound = op1;
1238 if (iv_bound)
1239 *iv_bound = iv1;
1240
1241 return ret;
1242}
1243
1244/* Checks whether the condition in STMT is interesting and if so,
1245 records it. */
1246
1247static void
1248find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1249{
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1252
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1254 {
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1257 return;
1258 }
1259
1260 civ = XNEW (struct iv);
1261 *civ = *var_iv;
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1263}
1264
1265/* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1268
1269bool
1270expr_invariant_in_loop_p (struct loop *loop, tree expr)
1271{
1272 basic_block def_bb;
1273 unsigned i, len;
1274
1275 gcc_assert (loop_depth (loop) > 0);
1276
1277 if (is_gimple_min_invariant (expr))
1278 return true;
1279
1280 if (TREE_CODE (expr) == SSA_NAME)
1281 {
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1283 if (def_bb
1284 && flow_bb_inside_loop_p (loop, def_bb))
1285 return false;
1286
1287 return true;
1288 }
1289
1290 if (!EXPR_P (expr))
1291 return false;
1292
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1296 return false;
1297
1298 return true;
1299}
1300
1301/* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1304
1305bool
1306stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1307{
1308 unsigned i;
1309 tree lhs;
1310
1311 gcc_assert (loop_depth (loop) > 0);
1312
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1315 {
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1318 return false;
1319 }
1320
1321 return true;
1322}
1323
1324/* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1327
1328struct ifs_ivopts_data
1329{
1330 struct ivopts_data *ivopts_data;
1331 gimple stmt;
1332 tree step;
1333};
1334
1335static bool
1336idx_find_step (tree base, tree *idx, void *data)
1337{
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1339 struct iv *iv;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1342
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1345 return false;
1346
1347 /* If base is a component ref, require that the offset of the reference
1348 be invariant. */
1349 if (TREE_CODE (base) == COMPONENT_REF)
1350 {
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1353 }
1354
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1360 {
1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1364 return false;
1365
1366 step = array_ref_element_size (base);
1367 lbound = array_ref_low_bound (base);
1368
1369 if (!expr_invariant_in_loop_p (loop, step)
1370 || !expr_invariant_in_loop_p (loop, lbound))
1371 return false;
1372 }
1373
1374 if (TREE_CODE (*idx) != SSA_NAME)
1375 return true;
1376
1377 iv = get_iv (dta->ivopts_data, *idx);
1378 if (!iv)
1379 return false;
1380
1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
1384 *idx = iv->base;
1385
1386 if (integer_zerop (iv->step))
1387 return true;
1388
1389 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1390 {
1391 step = array_ref_element_size (base);
1392
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step) != INTEGER_CST)
1395 return false;
1396 }
1397 else
1398 /* The step for pointer arithmetics already is 1 byte. */
1399 step = build_int_cst (sizetype, 1);
1400
1401 iv_base = iv->base;
1402 iv_step = iv->step;
1403 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1404 sizetype, &iv_base, &iv_step, dta->stmt,
1405 false))
1406 {
1407 /* The index might wrap. */
1408 return false;
1409 }
1410
1411 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1412 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1413
1414 return true;
1415}
1416
1417/* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1419
1420static bool
1421idx_record_use (tree base, tree *idx,
1422 void *vdata)
1423{
1424 struct ivopts_data *data = (struct ivopts_data *) vdata;
1425 find_interesting_uses_op (data, *idx);
1426 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1427 {
1428 find_interesting_uses_op (data, array_ref_element_size (base));
1429 find_interesting_uses_op (data, array_ref_low_bound (base));
1430 }
1431 return true;
1432}
1433
1434/* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1438
1439static bool
1440constant_multiple_of (tree top, tree bot, double_int *mul)
1441{
1442 tree mby;
1443 enum tree_code code;
1444 double_int res, p0, p1;
1445 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1446
1447 STRIP_NOPS (top);
1448 STRIP_NOPS (bot);
1449
1450 if (operand_equal_p (top, bot, 0))
1451 {
1452 *mul = double_int_one;
1453 return true;
1454 }
1455
1456 code = TREE_CODE (top);
1457 switch (code)
1458 {
1459 case MULT_EXPR:
1460 mby = TREE_OPERAND (top, 1);
1461 if (TREE_CODE (mby) != INTEGER_CST)
1462 return false;
1463
1464 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1465 return false;
1466
1467 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1468 precision);
1469 return true;
1470
1471 case PLUS_EXPR:
1472 case MINUS_EXPR:
1473 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1474 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1475 return false;
1476
1477 if (code == MINUS_EXPR)
1478 p1 = double_int_neg (p1);
1479 *mul = double_int_sext (double_int_add (p0, p1), precision);
1480 return true;
1481
1482 case INTEGER_CST:
1483 if (TREE_CODE (bot) != INTEGER_CST)
1484 return false;
1485
1486 p0 = double_int_sext (tree_to_double_int (top), precision);
1487 p1 = double_int_sext (tree_to_double_int (bot), precision);
1488 if (double_int_zero_p (p1))
1489 return false;
1490 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1491 precision);
1492 return double_int_zero_p (res);
1493
1494 default:
1495 return false;
1496 }
1497}
1498
1499/* Returns true if memory reference REF with step STEP may be unaligned. */
1500
1501static bool
1502may_be_unaligned_p (tree ref, tree step)
1503{
1504 tree base;
1505 tree base_type;
1506 HOST_WIDE_INT bitsize;
1507 HOST_WIDE_INT bitpos;
1508 tree toffset;
1509 enum machine_mode mode;
1510 int unsignedp, volatilep;
1511 unsigned base_align;
1512
1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1514 thus they are not misaligned. */
1515 if (TREE_CODE (ref) == TARGET_MEM_REF)
1516 return false;
1517
1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1522 &unsignedp, &volatilep, true);
1523 base_type = TREE_TYPE (base);
1524 base_align = TYPE_ALIGN (base_type);
1525
1526 if (mode != BLKmode)
1527 {
1528 double_int mul;
1529 tree al = build_int_cst (TREE_TYPE (step),
1530 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1531
1532 if (base_align < GET_MODE_ALIGNMENT (mode)
1533 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1534 || bitpos % BITS_PER_UNIT != 0)
1535 return true;
1536
1537 if (!constant_multiple_of (step, al, &mul))
1538 return true;
1539 }
1540
1541 return false;
1542}
1543
1544/* Return true if EXPR may be non-addressable. */
1545
1546static bool
1547may_be_nonaddressable_p (tree expr)
1548{
1549 switch (TREE_CODE (expr))
1550 {
1551 case TARGET_MEM_REF:
1552 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1553 target, thus they are always addressable. */
1554 return false;
1555
1556 case COMPONENT_REF:
1557 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1558 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1559
1560 case VIEW_CONVERT_EXPR:
1561 /* This kind of view-conversions may wrap non-addressable objects
1562 and make them look addressable. After some processing the
1563 non-addressability may be uncovered again, causing ADDR_EXPRs
1564 of inappropriate objects to be built. */
1565 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1566 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1567 return true;
1568
1569 /* ... fall through ... */
1570
1571 case ARRAY_REF:
1572 case ARRAY_RANGE_REF:
1573 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1574
1575 CASE_CONVERT:
1576 return true;
1577
1578 default:
1579 break;
1580 }
1581
1582 return false;
1583}
1584
1585/* Finds addresses in *OP_P inside STMT. */
1586
1587static void
1588find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1589{
1590 tree base = *op_p, step = build_int_cst (sizetype, 0);
1591 struct iv *civ;
1592 struct ifs_ivopts_data ifs_ivopts_data;
1593
1594 /* Do not play with volatile memory references. A bit too conservative,
1595 perhaps, but safe. */
1596 if (gimple_has_volatile_ops (stmt))
1597 goto fail;
1598
1599 /* Ignore bitfields for now. Not really something terribly complicated
1600 to handle. TODO. */
1601 if (TREE_CODE (base) == BIT_FIELD_REF)
1602 goto fail;
1603
1604 base = unshare_expr (base);
1605
1606 if (TREE_CODE (base) == TARGET_MEM_REF)
1607 {
1608 tree type = build_pointer_type (TREE_TYPE (base));
1609 tree astep;
1610
1611 if (TMR_BASE (base)
1612 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1613 {
1614 civ = get_iv (data, TMR_BASE (base));
1615 if (!civ)
1616 goto fail;
1617
1618 TMR_BASE (base) = civ->base;
1619 step = civ->step;
1620 }
1621 if (TMR_INDEX (base)
1622 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1623 {
1624 civ = get_iv (data, TMR_INDEX (base));
1625 if (!civ)
1626 goto fail;
1627
1628 TMR_INDEX (base) = civ->base;
1629 astep = civ->step;
1630
1631 if (astep)
1632 {
1633 if (TMR_STEP (base))
1634 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1635
1636 step = fold_build2 (PLUS_EXPR, type, step, astep);
1637 }
1638 }
1639
1640 if (integer_zerop (step))
1641 goto fail;
1642 base = tree_mem_ref_addr (type, base);
1643 }
1644 else
1645 {
1646 ifs_ivopts_data.ivopts_data = data;
1647 ifs_ivopts_data.stmt = stmt;
1648 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1649 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1650 || integer_zerop (ifs_ivopts_data.step))
1651 goto fail;
1652 step = ifs_ivopts_data.step;
1653
1654 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1655 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1656
1657 /* Check that the base expression is addressable. This needs
1658 to be done after substituting bases of IVs into it. */
1659 if (may_be_nonaddressable_p (base))
1660 goto fail;
1661
1662 /* Moreover, on strict alignment platforms, check that it is
1663 sufficiently aligned. */
1664 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1665 goto fail;
1666
1667 base = build_fold_addr_expr (base);
1668
1669 /* Substituting bases of IVs into the base expression might
1670 have caused folding opportunities. */
1671 if (TREE_CODE (base) == ADDR_EXPR)
1672 {
1673 tree *ref = &TREE_OPERAND (base, 0);
1674 while (handled_component_p (*ref))
1675 ref = &TREE_OPERAND (*ref, 0);
1676 if (TREE_CODE (*ref) == INDIRECT_REF)
1677 *ref = fold_indirect_ref (*ref);
1678 }
1679 }
1680
1681 civ = alloc_iv (base, step);
1682 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1683 return;
1684
1685fail:
1686 for_each_index (op_p, idx_record_use, data);
1687}
1688
1689/* Finds and records invariants used in STMT. */
1690
1691static void
1692find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1693{
1694 ssa_op_iter iter;
1695 use_operand_p use_p;
1696 tree op;
1697
1698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1699 {
1700 op = USE_FROM_PTR (use_p);
1701 record_invariant (data, op, false);
1702 }
1703}
1704
1705/* Finds interesting uses of induction variables in the statement STMT. */
1706
1707static void
1708find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1709{
1710 struct iv *iv;
1711 tree op, *lhs, *rhs;
1712 ssa_op_iter iter;
1713 use_operand_p use_p;
1714 enum tree_code code;
1715
1716 find_invariants_stmt (data, stmt);
1717
1718 if (gimple_code (stmt) == GIMPLE_COND)
1719 {
1720 find_interesting_uses_cond (data, stmt);
1721 return;
1722 }
1723
1724 if (is_gimple_assign (stmt))
1725 {
1726 lhs = gimple_assign_lhs_ptr (stmt);
1727 rhs = gimple_assign_rhs1_ptr (stmt);
1728
1729 if (TREE_CODE (*lhs) == SSA_NAME)
1730 {
1731 /* If the statement defines an induction variable, the uses are not
1732 interesting by themselves. */
1733
1734 iv = get_iv (data, *lhs);
1735
1736 if (iv && !integer_zerop (iv->step))
1737 return;
1738 }
1739
1740 code = gimple_assign_rhs_code (stmt);
1741 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1742 && (REFERENCE_CLASS_P (*rhs)
1743 || is_gimple_val (*rhs)))
1744 {
1745 if (REFERENCE_CLASS_P (*rhs))
1746 find_interesting_uses_address (data, stmt, rhs);
1747 else
1748 find_interesting_uses_op (data, *rhs);
1749
1750 if (REFERENCE_CLASS_P (*lhs))
1751 find_interesting_uses_address (data, stmt, lhs);
1752 return;
1753 }
1754 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1755 {
1756 find_interesting_uses_cond (data, stmt);
1757 return;
1758 }
1759
1760 /* TODO -- we should also handle address uses of type
1761
1762 memory = call (whatever);
1763
1764 and
1765
1766 call (memory). */
1767 }
1768
1769 if (gimple_code (stmt) == GIMPLE_PHI
1770 && gimple_bb (stmt) == data->current_loop->header)
1771 {
1772 iv = get_iv (data, PHI_RESULT (stmt));
1773
1774 if (iv && !integer_zerop (iv->step))
1775 return;
1776 }
1777
1778 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1779 {
1780 op = USE_FROM_PTR (use_p);
1781
1782 if (TREE_CODE (op) != SSA_NAME)
1783 continue;
1784
1785 iv = get_iv (data, op);
1786 if (!iv)
1787 continue;
1788
1789 find_interesting_uses_op (data, op);
1790 }
1791}
1792
1793/* Finds interesting uses of induction variables outside of loops
1794 on loop exit edge EXIT. */
1795
1796static void
1797find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1798{
1799 gimple phi;
1800 gimple_stmt_iterator psi;
1801 tree def;
1802
1803 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1804 {
1805 phi = gsi_stmt (psi);
1806 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1807 if (is_gimple_reg (def))
1808 find_interesting_uses_op (data, def);
1809 }
1810}
1811
1812/* Finds uses of the induction variables that are interesting. */
1813
1814static void
1815find_interesting_uses (struct ivopts_data *data)
1816{
1817 basic_block bb;
1818 gimple_stmt_iterator bsi;
1819 basic_block *body = get_loop_body (data->current_loop);
1820 unsigned i;
1821 struct version_info *info;
1822 edge e;
1823
1824 if (dump_file && (dump_flags & TDF_DETAILS))
1825 fprintf (dump_file, "Uses:\n\n");
1826
1827 for (i = 0; i < data->current_loop->num_nodes; i++)
1828 {
1829 edge_iterator ei;
1830 bb = body[i];
1831
1832 FOR_EACH_EDGE (e, ei, bb->succs)
1833 if (e->dest != EXIT_BLOCK_PTR
1834 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1835 find_interesting_uses_outside (data, e);
1836
1837 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1838 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1839 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1840 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1841 }
1842
1843 if (dump_file && (dump_flags & TDF_DETAILS))
1844 {
1845 bitmap_iterator bi;
1846
1847 fprintf (dump_file, "\n");
1848
1849 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1850 {
1851 info = ver_info (data, i);
1852 if (info->inv_id)
1853 {
1854 fprintf (dump_file, " ");
1855 print_generic_expr (dump_file, info->name, TDF_SLIM);
1856 fprintf (dump_file, " is invariant (%d)%s\n",
1857 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1858 }
1859 }
1860
1861 fprintf (dump_file, "\n");
1862 }
1863
1864 free (body);
1865}
1866
1867/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1868 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1869 we are at the top-level of the processed address. */
1870
1871static tree
1872strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1873 unsigned HOST_WIDE_INT *offset)
1874{
1875 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1876 enum tree_code code;
1877 tree type, orig_type = TREE_TYPE (expr);
1878 unsigned HOST_WIDE_INT off0, off1, st;
1879 tree orig_expr = expr;
1880
1881 STRIP_NOPS (expr);
1882
1883 type = TREE_TYPE (expr);
1884 code = TREE_CODE (expr);
1885 *offset = 0;
1886
1887 switch (code)
1888 {
1889 case INTEGER_CST:
1890 if (!cst_and_fits_in_hwi (expr)
1891 || integer_zerop (expr))
1892 return orig_expr;
1893
1894 *offset = int_cst_value (expr);
1895 return build_int_cst (orig_type, 0);
1896
1897 case POINTER_PLUS_EXPR:
1898 case PLUS_EXPR:
1899 case MINUS_EXPR:
1900 op0 = TREE_OPERAND (expr, 0);
1901 op1 = TREE_OPERAND (expr, 1);
1902
1903 op0 = strip_offset_1 (op0, false, false, &off0);
1904 op1 = strip_offset_1 (op1, false, false, &off1);
1905
1906 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1907 if (op0 == TREE_OPERAND (expr, 0)
1908 && op1 == TREE_OPERAND (expr, 1))
1909 return orig_expr;
1910
1911 if (integer_zerop (op1))
1912 expr = op0;
1913 else if (integer_zerop (op0))
1914 {
1915 if (code == MINUS_EXPR)
1916 expr = fold_build1 (NEGATE_EXPR, type, op1);
1917 else
1918 expr = op1;
1919 }
1920 else
1921 expr = fold_build2 (code, type, op0, op1);
1922
1923 return fold_convert (orig_type, expr);
1924
1925 case ARRAY_REF:
1926 case ARRAY_RANGE_REF:
1927 if (!inside_addr)
1928 return orig_expr;
1929
1930 step = array_ref_element_size (expr);
1931 if (!cst_and_fits_in_hwi (step))
1932 break;
1933
1934 st = int_cst_value (step);
1935 op1 = TREE_OPERAND (expr, 1);
1936 op1 = strip_offset_1 (op1, false, false, &off1);
1937 *offset = off1 * st;
1938
1939 if (top_compref
1940 && integer_zerop (op1))
1941 {
1942 /* Strip the component reference completely. */
1943 op0 = TREE_OPERAND (expr, 0);
1944 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1945 *offset += off0;
1946 return op0;
1947 }
1948 break;
1949
1950 case COMPONENT_REF:
1951 if (!inside_addr)
1952 return orig_expr;
1953
1954 tmp = component_ref_field_offset (expr);
1955 if (top_compref
1956 && cst_and_fits_in_hwi (tmp))
1957 {
1958 /* Strip the component reference completely. */
1959 op0 = TREE_OPERAND (expr, 0);
1960 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1961 *offset = off0 + int_cst_value (tmp);
1962 return op0;
1963 }
1964 break;
1965
1966 case ADDR_EXPR:
1967 op0 = TREE_OPERAND (expr, 0);
1968 op0 = strip_offset_1 (op0, true, true, &off0);
1969 *offset += off0;
1970
1971 if (op0 == TREE_OPERAND (expr, 0))
1972 return orig_expr;
1973
1974 expr = build_fold_addr_expr (op0);
1975 return fold_convert (orig_type, expr);
1976
1977 case INDIRECT_REF:
1978 inside_addr = false;
1979 break;
1980
1981 default:
1982 return orig_expr;
1983 }
1984
1985 /* Default handling of expressions for that we want to recurse into
1986 the first operand. */
1987 op0 = TREE_OPERAND (expr, 0);
1988 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1989 *offset += off0;
1990
1991 if (op0 == TREE_OPERAND (expr, 0)
1992 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1993 return orig_expr;
1994
1995 expr = copy_node (expr);
1996 TREE_OPERAND (expr, 0) = op0;
1997 if (op1)
1998 TREE_OPERAND (expr, 1) = op1;
1999
2000 /* Inside address, we might strip the top level component references,
2001 thus changing type of the expression. Handling of ADDR_EXPR
2002 will fix that. */
2003 expr = fold_convert (orig_type, expr);
2004
2005 return expr;
2006}
2007
2008/* Strips constant offsets from EXPR and stores them to OFFSET. */
2009
2010static tree
2011strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2012{
2013 return strip_offset_1 (expr, false, false, offset);
2014}
2015
2016/* Returns variant of TYPE that can be used as base for different uses.
2017 We return unsigned type with the same precision, which avoids problems
2018 with overflows. */
2019
2020static tree
2021generic_type_for (tree type)
2022{
2023 if (POINTER_TYPE_P (type))
2024 return unsigned_type_for (type);
2025
2026 if (TYPE_UNSIGNED (type))
2027 return type;
2028
2029 return unsigned_type_for (type);
2030}
2031
2032/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2033 the bitmap to that we should store it. */
2034
2035static struct ivopts_data *fd_ivopts_data;
2036static tree
2037find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2038{
2039 bitmap *depends_on = (bitmap *) data;
2040 struct version_info *info;
2041
2042 if (TREE_CODE (*expr_p) != SSA_NAME)
2043 return NULL_TREE;
2044 info = name_info (fd_ivopts_data, *expr_p);
2045
2046 if (!info->inv_id || info->has_nonlin_use)
2047 return NULL_TREE;
2048
2049 if (!*depends_on)
2050 *depends_on = BITMAP_ALLOC (NULL);
2051 bitmap_set_bit (*depends_on, info->inv_id);
2052
2053 return NULL_TREE;
2054}
2055
2056/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2057 position to POS. If USE is not NULL, the candidate is set as related to
2058 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2059 replacement of the final value of the iv by a direct computation. */
2060
2061static struct iv_cand *
2062add_candidate_1 (struct ivopts_data *data,
2063 tree base, tree step, bool important, enum iv_position pos,
2064 struct iv_use *use, gimple incremented_at)
2065{
2066 unsigned i;
2067 struct iv_cand *cand = NULL;
2068 tree type, orig_type;
2069
2070 if (base)
2071 {
2072 orig_type = TREE_TYPE (base);
2073 type = generic_type_for (orig_type);
2074 if (type != orig_type)
2075 {
2076 base = fold_convert (type, base);
2077 step = fold_convert (type, step);
2078 }
2079 }
2080
2081 for (i = 0; i < n_iv_cands (data); i++)
2082 {
2083 cand = iv_cand (data, i);
2084
2085 if (cand->pos != pos)
2086 continue;
2087
2088 if (cand->incremented_at != incremented_at)
2089 continue;
2090
2091 if (!cand->iv)
2092 {
2093 if (!base && !step)
2094 break;
2095
2096 continue;
2097 }
2098
2099 if (!base && !step)
2100 continue;
2101
2102 if (operand_equal_p (base, cand->iv->base, 0)
2103 && operand_equal_p (step, cand->iv->step, 0))
2104 break;
2105 }
2106
2107 if (i == n_iv_cands (data))
2108 {
2109 cand = XCNEW (struct iv_cand);
2110 cand->id = i;
2111
2112 if (!base && !step)
2113 cand->iv = NULL;
2114 else
2115 cand->iv = alloc_iv (base, step);
2116
2117 cand->pos = pos;
2118 if (pos != IP_ORIGINAL && cand->iv)
2119 {
2120 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2121 cand->var_after = cand->var_before;
2122 }
2123 cand->important = important;
2124 cand->incremented_at = incremented_at;
2125 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2126
2127 if (step
2128 && TREE_CODE (step) != INTEGER_CST)
2129 {
2130 fd_ivopts_data = data;
2131 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2132 }
2133
2134 if (dump_file && (dump_flags & TDF_DETAILS))
2135 dump_cand (dump_file, cand);
2136 }
2137
2138 if (important && !cand->important)
2139 {
2140 cand->important = true;
2141 if (dump_file && (dump_flags & TDF_DETAILS))
2142 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2143 }
2144
2145 if (use)
2146 {
2147 bitmap_set_bit (use->related_cands, i);
2148 if (dump_file && (dump_flags & TDF_DETAILS))
2149 fprintf (dump_file, "Candidate %d is related to use %d\n",
2150 cand->id, use->id);
2151 }
2152
2153 return cand;
2154}
2155
2156/* Returns true if incrementing the induction variable at the end of the LOOP
2157 is allowed.
2158
2159 The purpose is to avoid splitting latch edge with a biv increment, thus
2160 creating a jump, possibly confusing other optimization passes and leaving
2161 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2162 is not available (so we do not have a better alternative), or if the latch
2163 edge is already nonempty. */
2164
2165static bool
2166allow_ip_end_pos_p (struct loop *loop)
2167{
2168 if (!ip_normal_pos (loop))
2169 return true;
2170
2171 if (!empty_block_p (ip_end_pos (loop)))
2172 return true;
2173
2174 return false;
2175}
2176
2177/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2178 position to POS. If USE is not NULL, the candidate is set as related to
2179 it. The candidate computation is scheduled on all available positions. */
2180
2181static void
2182add_candidate (struct ivopts_data *data,
2183 tree base, tree step, bool important, struct iv_use *use)
2184{
2185 if (ip_normal_pos (data->current_loop))
2186 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2187 if (ip_end_pos (data->current_loop)
2188 && allow_ip_end_pos_p (data->current_loop))
2189 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2190}
2191
2192/* Add a standard "0 + 1 * iteration" iv candidate for a
2193 type with SIZE bits. */
2194
2195static void
2196add_standard_iv_candidates_for_size (struct ivopts_data *data,
2197 unsigned int size)
2198{
2199 tree type = lang_hooks.types.type_for_size (size, true);
2200 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2201 true, NULL);
2202}
2203
2204/* Adds standard iv candidates. */
2205
2206static void
2207add_standard_iv_candidates (struct ivopts_data *data)
2208{
2209 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2210
2211 /* The same for a double-integer type if it is still fast enough. */
2212 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2213 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2214}
2215
2216
2217/* Adds candidates bases on the old induction variable IV. */
2218
2219static void
2220add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2221{
2222 gimple phi;
2223 tree def;
2224 struct iv_cand *cand;
2225
2226 add_candidate (data, iv->base, iv->step, true, NULL);
2227
2228 /* The same, but with initial value zero. */
2229 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2230 add_candidate (data, size_int (0), iv->step, true, NULL);
2231 else
2232 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2233 iv->step, true, NULL);
2234
2235 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2236 if (gimple_code (phi) == GIMPLE_PHI)
2237 {
2238 /* Additionally record the possibility of leaving the original iv
2239 untouched. */
2240 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2241 cand = add_candidate_1 (data,
2242 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2243 SSA_NAME_DEF_STMT (def));
2244 cand->var_before = iv->ssa_name;
2245 cand->var_after = def;
2246 }
2247}
2248
2249/* Adds candidates based on the old induction variables. */
2250
2251static void
2252add_old_ivs_candidates (struct ivopts_data *data)
2253{
2254 unsigned i;
2255 struct iv *iv;
2256 bitmap_iterator bi;
2257
2258 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2259 {
2260 iv = ver_info (data, i)->iv;
2261 if (iv && iv->biv_p && !integer_zerop (iv->step))
2262 add_old_iv_candidates (data, iv);
2263 }
2264}
2265
2266/* Adds candidates based on the value of the induction variable IV and USE. */
2267
2268static void
2269add_iv_value_candidates (struct ivopts_data *data,
2270 struct iv *iv, struct iv_use *use)
2271{
2272 unsigned HOST_WIDE_INT offset;
2273 tree base;
2274 tree basetype;
2275
2276 add_candidate (data, iv->base, iv->step, false, use);
2277
2278 /* The same, but with initial value zero. Make such variable important,
2279 since it is generic enough so that possibly many uses may be based
2280 on it. */
2281 basetype = TREE_TYPE (iv->base);
2282 if (POINTER_TYPE_P (basetype))
2283 basetype = sizetype;
2284 add_candidate (data, build_int_cst (basetype, 0),
2285 iv->step, true, use);
2286
2287 /* Third, try removing the constant offset. Make sure to even
2288 add a candidate for &a[0] vs. (T *)&a. */
2289 base = strip_offset (iv->base, &offset);
2290 if (offset
2291 || base != iv->base)
2292 add_candidate (data, base, iv->step, false, use);
2293}
2294
2295/* Adds candidates based on the uses. */
2296
2297static void
2298add_derived_ivs_candidates (struct ivopts_data *data)
2299{
2300 unsigned i;
2301
2302 for (i = 0; i < n_iv_uses (data); i++)
2303 {
2304 struct iv_use *use = iv_use (data, i);
2305
2306 if (!use)
2307 continue;
2308
2309 switch (use->type)
2310 {
2311 case USE_NONLINEAR_EXPR:
2312 case USE_COMPARE:
2313 case USE_ADDRESS:
2314 /* Just add the ivs based on the value of the iv used here. */
2315 add_iv_value_candidates (data, use->iv, use);
2316 break;
2317
2318 default:
2319 gcc_unreachable ();
2320 }
2321 }
2322}
2323
2324/* Record important candidates and add them to related_cands bitmaps
2325 if needed. */
2326
2327static void
2328record_important_candidates (struct ivopts_data *data)
2329{
2330 unsigned i;
2331 struct iv_use *use;
2332
2333 for (i = 0; i < n_iv_cands (data); i++)
2334 {
2335 struct iv_cand *cand = iv_cand (data, i);
2336
2337 if (cand->important)
2338 bitmap_set_bit (data->important_candidates, i);
2339 }
2340
2341 data->consider_all_candidates = (n_iv_cands (data)
2342 <= CONSIDER_ALL_CANDIDATES_BOUND);
2343
2344 if (data->consider_all_candidates)
2345 {
2346 /* We will not need "related_cands" bitmaps in this case,
2347 so release them to decrease peak memory consumption. */
2348 for (i = 0; i < n_iv_uses (data); i++)
2349 {
2350 use = iv_use (data, i);
2351 BITMAP_FREE (use->related_cands);
2352 }
2353 }
2354 else
2355 {
2356 /* Add important candidates to the related_cands bitmaps. */
2357 for (i = 0; i < n_iv_uses (data); i++)
2358 bitmap_ior_into (iv_use (data, i)->related_cands,
2359 data->important_candidates);
2360 }
2361}
2362
2363/* Finds the candidates for the induction variables. */
2364
2365static void
2366find_iv_candidates (struct ivopts_data *data)
2367{
2368 /* Add commonly used ivs. */
2369 add_standard_iv_candidates (data);
2370
2371 /* Add old induction variables. */
2372 add_old_ivs_candidates (data);
2373
2374 /* Add induction variables derived from uses. */
2375 add_derived_ivs_candidates (data);
2376
2377 /* Record the important candidates. */
2378 record_important_candidates (data);
2379}
2380
2381/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2382 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2383 we allocate a simple list to every use. */
2384
2385static void
2386alloc_use_cost_map (struct ivopts_data *data)
2387{
2388 unsigned i, size, s, j;
2389
2390 for (i = 0; i < n_iv_uses (data); i++)
2391 {
2392 struct iv_use *use = iv_use (data, i);
2393 bitmap_iterator bi;
2394
2395 if (data->consider_all_candidates)
2396 size = n_iv_cands (data);
2397 else
2398 {
2399 s = 0;
2400 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2401 {
2402 s++;
2403 }
2404
2405 /* Round up to the power of two, so that moduling by it is fast. */
2406 for (size = 1; size < s; size <<= 1)
2407 continue;
2408 }
2409
2410 use->n_map_members = size;
2411 use->cost_map = XCNEWVEC (struct cost_pair, size);
2412 }
2413}
2414
2415/* Returns description of computation cost of expression whose runtime
2416 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2417
2418static comp_cost
2419new_cost (unsigned runtime, unsigned complexity)
2420{
2421 comp_cost cost;
2422
2423 cost.cost = runtime;
2424 cost.complexity = complexity;
2425
2426 return cost;
2427}
2428
2429/* Adds costs COST1 and COST2. */
2430
2431static comp_cost
2432add_costs (comp_cost cost1, comp_cost cost2)
2433{
2434 cost1.cost += cost2.cost;
2435 cost1.complexity += cost2.complexity;
2436
2437 return cost1;
2438}
2439/* Subtracts costs COST1 and COST2. */
2440
2441static comp_cost
2442sub_costs (comp_cost cost1, comp_cost cost2)
2443{
2444 cost1.cost -= cost2.cost;
2445 cost1.complexity -= cost2.complexity;
2446
2447 return cost1;
2448}
2449
2450/* Returns a negative number if COST1 < COST2, a positive number if
2451 COST1 > COST2, and 0 if COST1 = COST2. */
2452
2453static int
2454compare_costs (comp_cost cost1, comp_cost cost2)
2455{
2456 if (cost1.cost == cost2.cost)
2457 return cost1.complexity - cost2.complexity;
2458
2459 return cost1.cost - cost2.cost;
2460}
2461
2462/* Returns true if COST is infinite. */
2463
2464static bool
2465infinite_cost_p (comp_cost cost)
2466{
2467 return cost.cost == INFTY;
2468}
2469
2470/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2471 on invariants DEPENDS_ON and that the value used in expressing it
2472 is VALUE.*/
2473
2474static void
2475set_use_iv_cost (struct ivopts_data *data,
2476 struct iv_use *use, struct iv_cand *cand,
2477 comp_cost cost, bitmap depends_on, tree value)
2478{
2479 unsigned i, s;
2480
2481 if (infinite_cost_p (cost))
2482 {
2483 BITMAP_FREE (depends_on);
2484 return;
2485 }
2486
2487 if (data->consider_all_candidates)
2488 {
2489 use->cost_map[cand->id].cand = cand;
2490 use->cost_map[cand->id].cost = cost;
2491 use->cost_map[cand->id].depends_on = depends_on;
2492 use->cost_map[cand->id].value = value;
2493 return;
2494 }
2495
2496 /* n_map_members is a power of two, so this computes modulo. */
2497 s = cand->id & (use->n_map_members - 1);
2498 for (i = s; i < use->n_map_members; i++)
2499 if (!use->cost_map[i].cand)
2500 goto found;
2501 for (i = 0; i < s; i++)
2502 if (!use->cost_map[i].cand)
2503 goto found;
2504
2505 gcc_unreachable ();
2506
2507found:
2508 use->cost_map[i].cand = cand;
2509 use->cost_map[i].cost = cost;
2510 use->cost_map[i].depends_on = depends_on;
2511 use->cost_map[i].value = value;
2512}
2513
2514/* Gets cost of (USE, CANDIDATE) pair. */
2515
2516static struct cost_pair *
2517get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2518 struct iv_cand *cand)
2519{
2520 unsigned i, s;
2521 struct cost_pair *ret;
2522
2523 if (!cand)
2524 return NULL;
2525
2526 if (data->consider_all_candidates)
2527 {
2528 ret = use->cost_map + cand->id;
2529 if (!ret->cand)
2530 return NULL;
2531
2532 return ret;
2533 }
2534
2535 /* n_map_members is a power of two, so this computes modulo. */
2536 s = cand->id & (use->n_map_members - 1);
2537 for (i = s; i < use->n_map_members; i++)
2538 if (use->cost_map[i].cand == cand)
2539 return use->cost_map + i;
2540
2541 for (i = 0; i < s; i++)
2542 if (use->cost_map[i].cand == cand)
2543 return use->cost_map + i;
2544
2545 return NULL;
2546}
2547
2548/* Returns estimate on cost of computing SEQ. */
2549
2550static unsigned
2551seq_cost (rtx seq, bool speed)
2552{
2553 unsigned cost = 0;
2554 rtx set;
2555
2556 for (; seq; seq = NEXT_INSN (seq))
2557 {
2558 set = single_set (seq);
2559 if (set)
2560 cost += rtx_cost (set, SET,speed);
2561 else
2562 cost++;
2563 }
2564
2565 return cost;
2566}
2567
2568/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2569static rtx
2570produce_memory_decl_rtl (tree obj, int *regno)
2571{
2572 rtx x;
2573
2574 gcc_assert (obj);
2575 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2576 {
2577 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2578 x = gen_rtx_SYMBOL_REF (Pmode, name);
2579 SET_SYMBOL_REF_DECL (x, obj);
2580 x = gen_rtx_MEM (DECL_MODE (obj), x);
2581 targetm.encode_section_info (obj, x, true);
2582 }
2583 else
2584 {
2585 x = gen_raw_REG (Pmode, (*regno)++);
2586 x = gen_rtx_MEM (DECL_MODE (obj), x);
2587 }
2588
2589 return x;
2590}
2591
2592/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2593 walk_tree. DATA contains the actual fake register number. */
2594
2595static tree
2596prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2597{
2598 tree obj = NULL_TREE;
2599 rtx x = NULL_RTX;
2600 int *regno = (int *) data;
2601
2602 switch (TREE_CODE (*expr_p))
2603 {
2604 case ADDR_EXPR:
2605 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2606 handled_component_p (*expr_p);
2607 expr_p = &TREE_OPERAND (*expr_p, 0))
2608 continue;
2609 obj = *expr_p;
2610 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2611 x = produce_memory_decl_rtl (obj, regno);
2612 break;
2613
2614 case SSA_NAME:
2615 *ws = 0;
2616 obj = SSA_NAME_VAR (*expr_p);
2617 if (!DECL_RTL_SET_P (obj))
2618 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2619 break;
2620
2621 case VAR_DECL:
2622 case PARM_DECL:
2623 case RESULT_DECL:
2624 *ws = 0;
2625 obj = *expr_p;
2626
2627 if (DECL_RTL_SET_P (obj))
2628 break;
2629
2630 if (DECL_MODE (obj) == BLKmode)
2631 x = produce_memory_decl_rtl (obj, regno);
2632 else
2633 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2634
2635 break;
2636
2637 default:
2638 break;
2639 }
2640
2641 if (x)
2642 {
2643 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2644 SET_DECL_RTL (obj, x);
2645 }
2646
2647 return NULL_TREE;
2648}
2649
2650/* Determines cost of the computation of EXPR. */
2651
2652static unsigned
2653computation_cost (tree expr, bool speed)
2654{
2655 rtx seq, rslt;
2656 tree type = TREE_TYPE (expr);
2657 unsigned cost;
2658 /* Avoid using hard regs in ways which may be unsupported. */
2659 int regno = LAST_VIRTUAL_REGISTER + 1;
2660 enum function_frequency real_frequency = cfun->function_frequency;
2661
2662 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2663 crtl->maybe_hot_insn_p = speed;
2664 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2665 start_sequence ();
2666 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2667 seq = get_insns ();
2668 end_sequence ();
2669 default_rtl_profile ();
2670 cfun->function_frequency = real_frequency;
2671
2672 cost = seq_cost (seq, speed);
2673 if (MEM_P (rslt))
2674 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2675
2676 return cost;
2677}
2678
2679/* Returns variable containing the value of candidate CAND at statement AT. */
2680
2681static tree
2682var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2683{
2684 if (stmt_after_increment (loop, cand, stmt))
2685 return cand->var_after;
2686 else
2687 return cand->var_before;
2688}
2689
2690/* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2691 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2692
2693int
2694tree_int_cst_sign_bit (const_tree t)
2695{
2696 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2697 unsigned HOST_WIDE_INT w;
2698
2699 if (bitno < HOST_BITS_PER_WIDE_INT)
2700 w = TREE_INT_CST_LOW (t);
2701 else
2702 {
2703 w = TREE_INT_CST_HIGH (t);
2704 bitno -= HOST_BITS_PER_WIDE_INT;
2705 }
2706
2707 return (w >> bitno) & 1;
2708}
2709
2710/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2711 same precision that is at least as wide as the precision of TYPE, stores
2712 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2713 type of A and B. */
2714
2715static tree
2716determine_common_wider_type (tree *a, tree *b)
2717{
2718 tree wider_type = NULL;
2719 tree suba, subb;
2720 tree atype = TREE_TYPE (*a);
2721
2722 if (CONVERT_EXPR_P (*a))
2723 {
2724 suba = TREE_OPERAND (*a, 0);
2725 wider_type = TREE_TYPE (suba);
2726 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2727 return atype;
2728 }
2729 else
2730 return atype;
2731
2732 if (CONVERT_EXPR_P (*b))
2733 {
2734 subb = TREE_OPERAND (*b, 0);
2735 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2736 return atype;
2737 }
2738 else
2739 return atype;
2740
2741 *a = suba;
2742 *b = subb;
2743 return wider_type;
2744}
2745
2746/* Determines the expression by that USE is expressed from induction variable
2747 CAND at statement AT in LOOP. The expression is stored in a decomposed
2748 form into AFF. Returns false if USE cannot be expressed using CAND. */
2749
2750static bool
2751get_computation_aff (struct loop *loop,
2752 struct iv_use *use, struct iv_cand *cand, gimple at,
2753 struct affine_tree_combination *aff)
2754{
2755 tree ubase = use->iv->base;
2756 tree ustep = use->iv->step;
2757 tree cbase = cand->iv->base;
2758 tree cstep = cand->iv->step, cstep_common;
2759 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2760 tree common_type, var;
2761 tree uutype;
2762 aff_tree cbase_aff, var_aff;
2763 double_int rat;
2764
2765 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2766 {
2767 /* We do not have a precision to express the values of use. */
2768 return false;
2769 }
2770
2771 var = var_at_stmt (loop, cand, at);
2772 uutype = unsigned_type_for (utype);
2773
2774 /* If the conversion is not noop, perform it. */
2775 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2776 {
2777 cstep = fold_convert (uutype, cstep);
2778 cbase = fold_convert (uutype, cbase);
2779 var = fold_convert (uutype, var);
2780 }
2781
2782 if (!constant_multiple_of (ustep, cstep, &rat))
2783 return false;
2784
2785 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2786 type, we achieve better folding by computing their difference in this
2787 wider type, and cast the result to UUTYPE. We do not need to worry about
2788 overflows, as all the arithmetics will in the end be performed in UUTYPE
2789 anyway. */
2790 common_type = determine_common_wider_type (&ubase, &cbase);
2791
2792 /* use = ubase - ratio * cbase + ratio * var. */
2793 tree_to_aff_combination (ubase, common_type, aff);
2794 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2795 tree_to_aff_combination (var, uutype, &var_aff);
2796
2797 /* We need to shift the value if we are after the increment. */
2798 if (stmt_after_increment (loop, cand, at))
2799 {
2800 aff_tree cstep_aff;
2801
2802 if (common_type != uutype)
2803 cstep_common = fold_convert (common_type, cstep);
2804 else
2805 cstep_common = cstep;
2806
2807 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2808 aff_combination_add (&cbase_aff, &cstep_aff);
2809 }
2810
2811 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2812 aff_combination_add (aff, &cbase_aff);
2813 if (common_type != uutype)
2814 aff_combination_convert (aff, uutype);
2815
2816 aff_combination_scale (&var_aff, rat);
2817 aff_combination_add (aff, &var_aff);
2818
2819 return true;
2820}
2821
2822/* Determines the expression by that USE is expressed from induction variable
2823 CAND at statement AT in LOOP. The computation is unshared. */
2824
2825static tree
2826get_computation_at (struct loop *loop,
2827 struct iv_use *use, struct iv_cand *cand, gimple at)
2828{
2829 aff_tree aff;
2830 tree type = TREE_TYPE (use->iv->base);
2831
2832 if (!get_computation_aff (loop, use, cand, at, &aff))
2833 return NULL_TREE;
2834 unshare_aff_combination (&aff);
2835 return fold_convert (type, aff_combination_to_tree (&aff));
2836}
2837
2838/* Determines the expression by that USE is expressed from induction variable
2839 CAND in LOOP. The computation is unshared. */
2840
2841static tree
2842get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2843{
2844 return get_computation_at (loop, use, cand, use->stmt);
2845}
2846
2847/* Returns cost of addition in MODE. */
2848
2849static unsigned
2850add_cost (enum machine_mode mode, bool speed)
2851{
2852 static unsigned costs[NUM_MACHINE_MODES];
2853 rtx seq;
2854 unsigned cost;
2855
2856 if (costs[mode])
2857 return costs[mode];
2858
2859 start_sequence ();
2860 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2861 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2862 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2863 NULL_RTX);
2864 seq = get_insns ();
2865 end_sequence ();
2866
2867 cost = seq_cost (seq, speed);
2868 if (!cost)
2869 cost = 1;
2870
2871 costs[mode] = cost;
2872
2873 if (dump_file && (dump_flags & TDF_DETAILS))
2874 fprintf (dump_file, "Addition in %s costs %d\n",
2875 GET_MODE_NAME (mode), cost);
2876 return cost;
2877}
2878
2879/* Entry in a hashtable of already known costs for multiplication. */
2880struct mbc_entry
2881{
2882 HOST_WIDE_INT cst; /* The constant to multiply by. */
2883 enum machine_mode mode; /* In mode. */
2884 unsigned cost; /* The cost. */
2885};
2886
2887/* Counts hash value for the ENTRY. */
2888
2889static hashval_t
2890mbc_entry_hash (const void *entry)
2891{
2892 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2893
2894 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2895}
2896
2897/* Compares the hash table entries ENTRY1 and ENTRY2. */
2898
2899static int
2900mbc_entry_eq (const void *entry1, const void *entry2)
2901{
2902 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2903 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2904
2905 return (e1->mode == e2->mode
2906 && e1->cst == e2->cst);
2907}
2908
2909/* Returns cost of multiplication by constant CST in MODE. */
2910
2911unsigned
2912multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2913{
2914 static htab_t costs;
2915 struct mbc_entry **cached, act;
2916 rtx seq;
2917 unsigned cost;
2918
2919 if (!costs)
2920 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2921
2922 act.mode = mode;
2923 act.cst = cst;
2924 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2925 if (*cached)
2926 return (*cached)->cost;
2927
2928 *cached = XNEW (struct mbc_entry);
2929 (*cached)->mode = mode;
2930 (*cached)->cst = cst;
2931
2932 start_sequence ();
2933 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2934 gen_int_mode (cst, mode), NULL_RTX, 0);
2935 seq = get_insns ();
2936 end_sequence ();
2937
2938 cost = seq_cost (seq, speed);
2939
2940 if (dump_file && (dump_flags & TDF_DETAILS))
2941 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2942 (int) cst, GET_MODE_NAME (mode), cost);
2943
2944 (*cached)->cost = cost;
2945
2946 return cost;
2947}
2948
2949/* Returns true if multiplying by RATIO is allowed in an address. Test the
2950 validity for a memory reference accessing memory of mode MODE. */
2951
2952bool
2953multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2954{
2955#define MAX_RATIO 128
2956 static sbitmap valid_mult[MAX_MACHINE_MODE];
2957
2958 if (!valid_mult[mode])
2959 {
2960 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2961 rtx addr;
2962 HOST_WIDE_INT i;
2963
2964 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2965 sbitmap_zero (valid_mult[mode]);
2966 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2967 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2968 {
2969 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2970 if (memory_address_p (mode, addr))
2971 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2972 }
2973
2974 if (dump_file && (dump_flags & TDF_DETAILS))
2975 {
2976 fprintf (dump_file, " allowed multipliers:");
2977 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2978 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2979 fprintf (dump_file, " %d", (int) i);
2980 fprintf (dump_file, "\n");
2981 fprintf (dump_file, "\n");
2982 }
2983 }
2984
2985 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2986 return false;
2987
2988 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2989}
2990
2991/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2992 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2993 variable is omitted. Compute the cost for a memory reference that accesses
2994 a memory location of mode MEM_MODE.
2995
2996 TODO -- there must be some better way. This all is quite crude. */
2997
2998static comp_cost
2999get_address_cost (bool symbol_present, bool var_present,
3000 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3001 enum machine_mode mem_mode,
3002 bool speed)
3003{
3004 static bool initialized[MAX_MACHINE_MODE];
3005 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3006 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3007 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3008 unsigned cost, acost, complexity;
3009 bool offset_p, ratio_p;
3010 HOST_WIDE_INT s_offset;
3011 unsigned HOST_WIDE_INT mask;
3012 unsigned bits;
3013
3014 if (!initialized[mem_mode])
3015 {
3016 HOST_WIDE_INT i;
3017 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3018 int old_cse_not_expected;
3019 unsigned sym_p, var_p, off_p, rat_p, add_c;
3020 rtx seq, addr, base;
3021 rtx reg0, reg1;
3022
3023 initialized[mem_mode] = true;
3024
3025 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3026
3027 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3028 for (i = start; i <= 1 << 20; i <<= 1)
3029 {
3030 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3031 if (!memory_address_p (mem_mode, addr))
3032 break;
3033 }
3034 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3035 off[mem_mode] = max_offset[mem_mode];
3036
3037 for (i = start; i <= 1 << 20; i <<= 1)
3038 {
3039 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3040 if (!memory_address_p (mem_mode, addr))
3041 break;
3042 }
3043 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3044
3045 if (dump_file && (dump_flags & TDF_DETAILS))
3046 {
3047 fprintf (dump_file, "get_address_cost:\n");
3048 fprintf (dump_file, " min offset %s %d\n",
3049 GET_MODE_NAME (mem_mode),
3050 (int) min_offset[mem_mode]);
3051 fprintf (dump_file, " max offset %s %d\n",
3052 GET_MODE_NAME (mem_mode),
3053 (int) max_offset[mem_mode]);
3054 }
3055
3056 rat[mem_mode] = 1;
3057 for (i = 2; i <= MAX_RATIO; i++)
3058 if (multiplier_allowed_in_address_p (i, mem_mode))
3059 {
3060 rat[mem_mode] = i;
3061 break;
3062 }
3063
3064 /* Compute the cost of various addressing modes. */
3065 acost = 0;
3066 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3067 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3068
3069 for (i = 0; i < 16; i++)
3070 {
3071 sym_p = i & 1;
3072 var_p = (i >> 1) & 1;
3073 off_p = (i >> 2) & 1;
3074 rat_p = (i >> 3) & 1;
3075
3076 addr = reg0;
3077 if (rat_p)
3078 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3079 gen_int_mode (rat[mem_mode], Pmode));
3080
3081 if (var_p)
3082 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3083
3084 if (sym_p)
3085 {
3086 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3087 /* ??? We can run into trouble with some backends by presenting
3088 it with symbols which haven't been properly passed through
3089 targetm.encode_section_info. By setting the local bit, we
3090 enhance the probability of things working. */
3091 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3092
3093 if (off_p)
3094 base = gen_rtx_fmt_e (CONST, Pmode,
3095 gen_rtx_fmt_ee (PLUS, Pmode,
3096 base,
3097 gen_int_mode (off[mem_mode],
3098 Pmode)));
3099 }
3100 else if (off_p)
3101 base = gen_int_mode (off[mem_mode], Pmode);
3102 else
3103 base = NULL_RTX;
3104
3105 if (base)
3106 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3107
3108 start_sequence ();
3109 /* To avoid splitting addressing modes, pretend that no cse will
3110 follow. */
3111 old_cse_not_expected = cse_not_expected;
3112 cse_not_expected = true;
3113 addr = memory_address (mem_mode, addr);
3114 cse_not_expected = old_cse_not_expected;
3115 seq = get_insns ();
3116 end_sequence ();
3117
3118 acost = seq_cost (seq, speed);
3119 acost += address_cost (addr, mem_mode, speed);
3120
3121 if (!acost)
3122 acost = 1;
3123 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3124 }
3125
3126 /* On some targets, it is quite expensive to load symbol to a register,
3127 which makes addresses that contain symbols look much more expensive.
3128 However, the symbol will have to be loaded in any case before the
3129 loop (and quite likely we have it in register already), so it does not
3130 make much sense to penalize them too heavily. So make some final
3131 tweaks for the SYMBOL_PRESENT modes:
3132
3133 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3134 var is cheaper, use this mode with small penalty.
3135 If VAR_PRESENT is true, try whether the mode with
3136 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3137 if this is the case, use it. */
3138 add_c = add_cost (Pmode, speed);
3139 for (i = 0; i < 8; i++)
3140 {
3141 var_p = i & 1;
3142 off_p = (i >> 1) & 1;
3143 rat_p = (i >> 2) & 1;
3144
3145 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3146 if (var_p)
3147 acost += add_c;
3148
3149 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3150 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3151 }
3152
3153 if (dump_file && (dump_flags & TDF_DETAILS))
3154 {
3155 fprintf (dump_file, "Address costs:\n");
3156
3157 for (i = 0; i < 16; i++)
3158 {
3159 sym_p = i & 1;
3160 var_p = (i >> 1) & 1;
3161 off_p = (i >> 2) & 1;
3162 rat_p = (i >> 3) & 1;
3163
3164 fprintf (dump_file, " ");
3165 if (sym_p)
3166 fprintf (dump_file, "sym + ");
3167 if (var_p)
3168 fprintf (dump_file, "var + ");
3169 if (off_p)
3170 fprintf (dump_file, "cst + ");
3171 if (rat_p)
3172 fprintf (dump_file, "rat * ");
3173
3174 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3175 fprintf (dump_file, "index costs %d\n", acost);
3176 }
3177 fprintf (dump_file, "\n");
3178 }
3179 }
3180
3181 bits = GET_MODE_BITSIZE (Pmode);
3182 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3183 offset &= mask;
3184 if ((offset >> (bits - 1) & 1))
3185 offset |= ~mask;
3186 s_offset = offset;
3187
3188 cost = 0;
3189 offset_p = (s_offset != 0
3190 && min_offset[mem_mode] <= s_offset
3191 && s_offset <= max_offset[mem_mode]);
3192 ratio_p = (ratio != 1
3193 && multiplier_allowed_in_address_p (ratio, mem_mode));
3194
3195 if (ratio != 1 && !ratio_p)
3196 cost += multiply_by_cost (ratio, Pmode, speed);
3197
3198 if (s_offset && !offset_p && !symbol_present)
3199 cost += add_cost (Pmode, speed);
3200
3201 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3202 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3203 return new_cost (cost + acost, complexity);
3204}
3205
3206/* Estimates cost of forcing expression EXPR into a variable. */
3207
3208static comp_cost
3209force_expr_to_var_cost (tree expr, bool speed)
3210{
3211 static bool costs_initialized = false;
3212 static unsigned integer_cost [2];
3213 static unsigned symbol_cost [2];
3214 static unsigned address_cost [2];
3215 tree op0, op1;
3216 comp_cost cost0, cost1, cost;
3217 enum machine_mode mode;
3218
3219 if (!costs_initialized)
3220 {
3221 tree type = build_pointer_type (integer_type_node);
3222 tree var, addr;
3223 rtx x;
3224 int i;
3225
3226 var = create_tmp_var_raw (integer_type_node, "test_var");
3227 TREE_STATIC (var) = 1;
3228 x = produce_memory_decl_rtl (var, NULL);
3229 SET_DECL_RTL (var, x);
3230
3231 addr = build1 (ADDR_EXPR, type, var);
3232
3233
3234 for (i = 0; i < 2; i++)
3235 {
3236 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3237 2000), i);
3238
3239 symbol_cost[i] = computation_cost (addr, i) + 1;
3240
3241 address_cost[i]
3242 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3243 addr,
3244 build_int_cst (sizetype, 2000)), i) + 1;
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3246 {
3247 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3248 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3249 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3250 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3251 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3252 fprintf (dump_file, "\n");
3253 }
3254 }
3255
3256 costs_initialized = true;
3257 }
3258
3259 STRIP_NOPS (expr);
3260
3261 if (SSA_VAR_P (expr))
3262 return zero_cost;
3263
3264 if (is_gimple_min_invariant (expr))
3265 {
3266 if (TREE_CODE (expr) == INTEGER_CST)
3267 return new_cost (integer_cost [speed], 0);
3268
3269 if (TREE_CODE (expr) == ADDR_EXPR)
3270 {
3271 tree obj = TREE_OPERAND (expr, 0);
3272
3273 if (TREE_CODE (obj) == VAR_DECL
3274 || TREE_CODE (obj) == PARM_DECL
3275 || TREE_CODE (obj) == RESULT_DECL)
3276 return new_cost (symbol_cost [speed], 0);
3277 }
3278
3279 return new_cost (address_cost [speed], 0);
3280 }
3281
3282 switch (TREE_CODE (expr))
3283 {
3284 case POINTER_PLUS_EXPR:
3285 case PLUS_EXPR:
3286 case MINUS_EXPR:
3287 case MULT_EXPR:
3288 op0 = TREE_OPERAND (expr, 0);
3289 op1 = TREE_OPERAND (expr, 1);
3290 STRIP_NOPS (op0);
3291 STRIP_NOPS (op1);
3292
3293 if (is_gimple_val (op0))
3294 cost0 = zero_cost;
3295 else
3296 cost0 = force_expr_to_var_cost (op0, speed);
3297
3298 if (is_gimple_val (op1))
3299 cost1 = zero_cost;
3300 else
3301 cost1 = force_expr_to_var_cost (op1, speed);
3302
3303 break;
3304
3305 default:
3306 /* Just an arbitrary value, FIXME. */
3307 return new_cost (target_spill_cost[speed], 0);
3308 }
3309
3310 mode = TYPE_MODE (TREE_TYPE (expr));
3311 switch (TREE_CODE (expr))
3312 {
3313 case POINTER_PLUS_EXPR:
3314 case PLUS_EXPR:
3315 case MINUS_EXPR:
3316 cost = new_cost (add_cost (mode, speed), 0);
3317 break;
3318
3319 case MULT_EXPR:
3320 if (cst_and_fits_in_hwi (op0))
3321 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3322 else if (cst_and_fits_in_hwi (op1))
3323 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3324 else
3325 return new_cost (target_spill_cost [speed], 0);
3326 break;
3327
3328 default:
3329 gcc_unreachable ();
3330 }
3331
3332 cost = add_costs (cost, cost0);
3333 cost = add_costs (cost, cost1);
3334
3335 /* Bound the cost by target_spill_cost. The parts of complicated
3336 computations often are either loop invariant or at least can
3337 be shared between several iv uses, so letting this grow without
3338 limits would not give reasonable results. */
3339 if (cost.cost > target_spill_cost [speed])
3340 cost.cost = target_spill_cost [speed];
3341
3342 return cost;
3343}
3344
3345/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3346 invariants the computation depends on. */
3347
3348static comp_cost
3349force_var_cost (struct ivopts_data *data,
3350 tree expr, bitmap *depends_on)
3351{
3352 if (depends_on)
3353 {
3354 fd_ivopts_data = data;
3355 walk_tree (&expr, find_depends, depends_on, NULL);
3356 }
3357
3358 return force_expr_to_var_cost (expr, data->speed);
3359}
3360
3361/* Estimates cost of expressing address ADDR as var + symbol + offset. The
3362 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3363 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3364 invariants the computation depends on. */
3365
3366static comp_cost
3367split_address_cost (struct ivopts_data *data,
3368 tree addr, bool *symbol_present, bool *var_present,
3369 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3370{
3371 tree core;
3372 HOST_WIDE_INT bitsize;
3373 HOST_WIDE_INT bitpos;
3374 tree toffset;
3375 enum machine_mode mode;
3376 int unsignedp, volatilep;
3377
3378 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3379 &unsignedp, &volatilep, false);
3380
3381 if (toffset != 0
3382 || bitpos % BITS_PER_UNIT != 0
3383 || TREE_CODE (core) != VAR_DECL)
3384 {
3385 *symbol_present = false;
3386 *var_present = true;
3387 fd_ivopts_data = data;
3388 walk_tree (&addr, find_depends, depends_on, NULL);
3389 return new_cost (target_spill_cost[data->speed], 0);
3390 }
3391
3392 *offset += bitpos / BITS_PER_UNIT;
3393 if (TREE_STATIC (core)
3394 || DECL_EXTERNAL (core))
3395 {
3396 *symbol_present = true;
3397 *var_present = false;
3398 return zero_cost;
3399 }
3400
3401 *symbol_present = false;
3402 *var_present = true;
3403 return zero_cost;
3404}
3405
3406/* Estimates cost of expressing difference of addresses E1 - E2 as
3407 var + symbol + offset. The value of offset is added to OFFSET,
3408 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3409 part is missing. DEPENDS_ON is a set of the invariants the computation
3410 depends on. */
3411
3412static comp_cost
3413ptr_difference_cost (struct ivopts_data *data,
3414 tree e1, tree e2, bool *symbol_present, bool *var_present,
3415 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3416{
3417 HOST_WIDE_INT diff = 0;
3418 comp_cost cost;
3419 bool speed = optimize_loop_for_speed_p (data->current_loop);
3420
3421 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3422
3423 if (ptr_difference_const (e1, e2, &diff))
3424 {
3425 *offset += diff;
3426 *symbol_present = false;
3427 *var_present = false;
3428 return zero_cost;
3429 }
3430
3431 if (integer_zerop (e2))
3432 return split_address_cost (data, TREE_OPERAND (e1, 0),
3433 symbol_present, var_present, offset, depends_on);
3434
3435 *symbol_present = false;
3436 *var_present = true;
3437
3438 cost = force_var_cost (data, e1, depends_on);
3439 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3440 cost.cost += add_cost (Pmode, speed);
3441
3442 return cost;
3443}
3444
3445/* Estimates cost of expressing difference E1 - E2 as
3446 var + symbol + offset. The value of offset is added to OFFSET,
3447 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3448 part is missing. DEPENDS_ON is a set of the invariants the computation
3449 depends on. */
3450
3451static comp_cost
3452difference_cost (struct ivopts_data *data,
3453 tree e1, tree e2, bool *symbol_present, bool *var_present,
3454 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3455{
3456 comp_cost cost;
3457 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3458 unsigned HOST_WIDE_INT off1, off2;
3459
3460 e1 = strip_offset (e1, &off1);
3461 e2 = strip_offset (e2, &off2);
3462 *offset += off1 - off2;
3463
3464 STRIP_NOPS (e1);
3465 STRIP_NOPS (e2);
3466
3467 if (TREE_CODE (e1) == ADDR_EXPR)
3468 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3469 depends_on);
3470 *symbol_present = false;
3471
3472 if (operand_equal_p (e1, e2, 0))
3473 {
3474 *var_present = false;
3475 return zero_cost;
3476 }
3477 *var_present = true;
3478 if (integer_zerop (e2))
3479 return force_var_cost (data, e1, depends_on);
3480
3481 if (integer_zerop (e1))
3482 {
3483 cost = force_var_cost (data, e2, depends_on);
3484 cost.cost += multiply_by_cost (-1, mode, data->speed);
3485
3486 return cost;
3487 }
3488
3489 cost = force_var_cost (data, e1, depends_on);
3490 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3491 cost.cost += add_cost (mode, data->speed);
3492
3493 return cost;
3494}
3495
3496/* Determines the cost of the computation by that USE is expressed
3497 from induction variable CAND. If ADDRESS_P is true, we just need
3498 to create an address from it, otherwise we want to get it into
3499 register. A set of invariants we depend on is stored in
3500 DEPENDS_ON. AT is the statement at that the value is computed. */
3501
3502static comp_cost
3503get_computation_cost_at (struct ivopts_data *data,
3504 struct iv_use *use, struct iv_cand *cand,
3505 bool address_p, bitmap *depends_on, gimple at)
3506{
3507 tree ubase = use->iv->base, ustep = use->iv->step;
3508 tree cbase, cstep;
3509 tree utype = TREE_TYPE (ubase), ctype;
3510 unsigned HOST_WIDE_INT cstepi, offset = 0;
3511 HOST_WIDE_INT ratio, aratio;
3512 bool var_present, symbol_present;
3513 comp_cost cost;
3514 unsigned n_sums;
3515 double_int rat;
3516 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3517
3518 *depends_on = NULL;
3519
3520 /* Only consider real candidates. */
3521 if (!cand->iv)
3522 return infinite_cost;
3523
3524 cbase = cand->iv->base;
3525 cstep = cand->iv->step;
3526 ctype = TREE_TYPE (cbase);
3527
3528 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3529 {
3530 /* We do not have a precision to express the values of use. */
3531 return infinite_cost;
3532 }
3533
3534 if (address_p)
3535 {
3536 /* Do not try to express address of an object with computation based
3537 on address of a different object. This may cause problems in rtl
3538 level alias analysis (that does not expect this to be happening,
3539 as this is illegal in C), and would be unlikely to be useful
3540 anyway. */
3541 if (use->iv->base_object
3542 && cand->iv->base_object
3543 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3544 return infinite_cost;
3545 }
3546
3547 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3548 {
3549 /* TODO -- add direct handling of this case. */
3550 goto fallback;
3551 }
3552
3553 /* CSTEPI is removed from the offset in case statement is after the
3554 increment. If the step is not constant, we use zero instead.
3555 This is a bit imprecise (there is the extra addition), but
3556 redundancy elimination is likely to transform the code so that
3557 it uses value of the variable before increment anyway,
3558 so it is not that much unrealistic. */
3559 if (cst_and_fits_in_hwi (cstep))
3560 cstepi = int_cst_value (cstep);
3561 else
3562 cstepi = 0;
3563
3564 if (!constant_multiple_of (ustep, cstep, &rat))
3565 return infinite_cost;
3566
3567 if (double_int_fits_in_shwi_p (rat))
3568 ratio = double_int_to_shwi (rat);
3569 else
3570 return infinite_cost;
3571
3572 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3573 or ratio == 1, it is better to handle this like
3574
3575 ubase - ratio * cbase + ratio * var
3576
3577 (also holds in the case ratio == -1, TODO. */
3578
3579 if (cst_and_fits_in_hwi (cbase))
3580 {
3581 offset = - ratio * int_cst_value (cbase);
3582 cost = difference_cost (data,
3583 ubase, build_int_cst (utype, 0),
3584 &symbol_present, &var_present, &offset,
3585 depends_on);
3586 }
3587 else if (ratio == 1)
3588 {
3589 cost = difference_cost (data,
3590 ubase, cbase,
3591 &symbol_present, &var_present, &offset,
3592 depends_on);
3593 }
3594 else
3595 {
3596 cost = force_var_cost (data, cbase, depends_on);
3597 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3598 cost = add_costs (cost,
3599 difference_cost (data,
3600 ubase, build_int_cst (utype, 0),
3601 &symbol_present, &var_present,
3602 &offset, depends_on));
3603 }
3604
3605 /* If we are after the increment, the value of the candidate is higher by
3606 one iteration. */
3607 if (stmt_after_increment (data->current_loop, cand, at))
3608 offset -= ratio * cstepi;
3609
3610 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3611 (symbol/var/const parts may be omitted). If we are looking for an address,
3612 find the cost of addressing this. */
3613 if (address_p)
3614 return add_costs (cost, get_address_cost (symbol_present, var_present,
3615 offset, ratio,
3616 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
3617
3618 /* Otherwise estimate the costs for computing the expression. */
3619 aratio = ratio > 0 ? ratio : -ratio;
3620 if (!symbol_present && !var_present && !offset)
3621 {
3622 if (ratio != 1)
3623 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3624
3625 return cost;
3626 }
3627
3628 if (aratio != 1)
3629 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3630
3631 n_sums = 1;
3632 if (var_present
3633 /* Symbol + offset should be compile-time computable. */
3634 && (symbol_present || offset))
3635 n_sums++;
3636
3637 /* Having offset does not affect runtime cost in case it is added to
3638 symbol, but it increases complexity. */
3639 if (offset)
3640 cost.complexity++;
3641
3642 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3643 return cost;
3644
3645fallback:
3646 {
3647 /* Just get the expression, expand it and measure the cost. */
3648 tree comp = get_computation_at (data->current_loop, use, cand, at);
3649
3650 if (!comp)
3651 return infinite_cost;
3652
3653 if (address_p)
3654 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3655
3656 return new_cost (computation_cost (comp, speed), 0);
3657 }
3658}
3659
3660/* Determines the cost of the computation by that USE is expressed
3661 from induction variable CAND. If ADDRESS_P is true, we just need
3662 to create an address from it, otherwise we want to get it into
3663 register. A set of invariants we depend on is stored in
3664 DEPENDS_ON. */
3665
3666static comp_cost
3667get_computation_cost (struct ivopts_data *data,
3668 struct iv_use *use, struct iv_cand *cand,
3669 bool address_p, bitmap *depends_on)
3670{
3671 return get_computation_cost_at (data,
3672 use, cand, address_p, depends_on, use->stmt);
3673}
3674
3675/* Determines cost of basing replacement of USE on CAND in a generic
3676 expression. */
3677
3678static bool
3679determine_use_iv_cost_generic (struct ivopts_data *data,
3680 struct iv_use *use, struct iv_cand *cand)
3681{
3682 bitmap depends_on;
3683 comp_cost cost;
3684
3685 /* The simple case first -- if we need to express value of the preserved
3686 original biv, the cost is 0. This also prevents us from counting the
3687 cost of increment twice -- once at this use and once in the cost of
3688 the candidate. */
3689 if (cand->pos == IP_ORIGINAL
3690 && cand->incremented_at == use->stmt)
3691 {
3692 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3693 return true;
3694 }
3695
3696 cost = get_computation_cost (data, use, cand, false, &depends_on);
3697 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3698
3699 return !infinite_cost_p (cost);
3700}
3701
3702/* Determines cost of basing replacement of USE on CAND in an address. */
3703
3704static bool
3705determine_use_iv_cost_address (struct ivopts_data *data,
3706 struct iv_use *use, struct iv_cand *cand)
3707{
3708 bitmap depends_on;
3709 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3710
3711 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3712
3713 return !infinite_cost_p (cost);
3714}
3715
3716/* Computes value of candidate CAND at position AT in iteration NITER, and
3717 stores it to VAL. */
3718
3719static void
3720cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3721 aff_tree *val)
3722{
3723 aff_tree step, delta, nit;
3724 struct iv *iv = cand->iv;
3725 tree type = TREE_TYPE (iv->base);
3726 tree steptype = type;
3727 if (POINTER_TYPE_P (type))
3728 steptype = sizetype;
3729
3730 tree_to_aff_combination (iv->step, steptype, &step);
3731 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3732 aff_combination_convert (&nit, steptype);
3733 aff_combination_mult (&nit, &step, &delta);
3734 if (stmt_after_increment (loop, cand, at))
3735 aff_combination_add (&delta, &step);
3736
3737 tree_to_aff_combination (iv->base, type, val);
3738 aff_combination_add (val, &delta);
3739}
3740
3741/* Returns period of induction variable iv. */
3742
3743static tree
3744iv_period (struct iv *iv)
3745{
3746 tree step = iv->step, period, type;
3747 tree pow2div;
3748
3749 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3750
3751 /* Period of the iv is gcd (step, type range). Since type range is power