Merge branch 'vendor/GCC44'
[dragonfly.git] / contrib / gcc-4.4 / gcc / tree-ssa-loop-ivopts.c
CommitLineData
c251ad9e
SS
1/* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 3, or (at your option) any
10later version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT
13ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
70#include "rtl.h"
71#include "tm_p.h"
72#include "hard-reg-set.h"
73#include "basic-block.h"
74#include "output.h"
75#include "diagnostic.h"
76#include "tree-flow.h"
77#include "tree-dump.h"
78#include "timevar.h"
79#include "cfgloop.h"
80#include "varray.h"
81#include "expr.h"
82#include "tree-pass.h"
83#include "ggc.h"
84#include "insn-config.h"
85#include "recog.h"
86#include "pointer-set.h"
87#include "hashtab.h"
88#include "tree-chrec.h"
89#include "tree-scalar-evolution.h"
90#include "cfgloop.h"
91#include "params.h"
92#include "langhooks.h"
93#include "tree-affine.h"
94#include "target.h"
95
96/* The infinite cost. */
97#define INFTY 10000000
98
99/* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101#define AVG_LOOP_NITER(LOOP) 5
102
103
104/* Representation of the induction variable. */
105struct iv
106{
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
114};
115
116/* Per-ssa version information (induction variable descriptions, etc.). */
117struct version_info
118{
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
125};
126
127/* Types of uses. */
128enum use_type
129{
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
133};
134
135/* Cost of a computation. */
136typedef struct
137{
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143} comp_cost;
144
145static const comp_cost zero_cost = {0, 0};
146static const comp_cost infinite_cost = {INFTY, INFTY};
147
148/* The candidate - cost pair. */
149struct cost_pair
150{
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
158};
159
160/* Use. */
161struct iv_use
162{
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
170
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
174
175 struct iv_cand *selected;
176 /* The selected candidate. */
177};
178
179/* The position where the iv is computed. */
180enum iv_position
181{
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
185};
186
187/* The induction variable candidate. */
188struct iv_cand
189{
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
195 incremented. */
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */
205};
206
207/* The data used by the induction variable optimizations. */
208
209typedef struct iv_use *iv_use_p;
210DEF_VEC_P(iv_use_p);
211DEF_VEC_ALLOC_P(iv_use_p,heap);
212
213typedef struct iv_cand *iv_cand_p;
214DEF_VEC_P(iv_cand_p);
215DEF_VEC_ALLOC_P(iv_cand_p,heap);
216
217struct ivopts_data
218{
219 /* The currently optimized loop. */
220 struct loop *current_loop;
221
222 /* Numbers of iterations for all exits of the current loop. */
223 struct pointer_map_t *niters;
224
225 /* Number of registers used in it. */
226 unsigned regs_used;
227
228 /* The size of version_info array allocated. */
229 unsigned version_info_size;
230
231 /* The array of information for the ssa names. */
232 struct version_info *version_info;
233
234 /* The bitmap of indices in version_info whose value was changed. */
235 bitmap relevant;
236
237 /* The uses of induction variables. */
238 VEC(iv_use_p,heap) *iv_uses;
239
240 /* The candidates. */
241 VEC(iv_cand_p,heap) *iv_candidates;
242
243 /* A bitmap of important candidates. */
244 bitmap important_candidates;
245
246 /* The maximum invariant id. */
247 unsigned max_inv_id;
248
249 /* Whether to consider just related and important candidates when replacing a
250 use. */
251 bool consider_all_candidates;
252
253 /* Are we optimizing for speed? */
254 bool speed;
255};
256
257/* An assignment of iv candidates to uses. */
258
259struct iv_ca
260{
261 /* The number of uses covered by the assignment. */
262 unsigned upto;
263
264 /* Number of uses that cannot be expressed by the candidates in the set. */
265 unsigned bad_uses;
266
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
269
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
272
273 /* The candidates used. */
274 bitmap cands;
275
276 /* The number of candidates in the set. */
277 unsigned n_cands;
278
279 /* Total number of registers needed. */
280 unsigned n_regs;
281
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
284
285 /* Total cost of candidates. */
286 unsigned cand_cost;
287
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
290
291 /* Total cost of the assignment. */
292 comp_cost cost;
293};
294
295/* Difference of two iv candidate assignments. */
296
297struct iv_ca_delta
298{
299 /* Changed use. */
300 struct iv_use *use;
301
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
304
305 /* A new assignment. */
306 struct cost_pair *new_cp;
307
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
310};
311
312/* Bound on number of candidates below that all candidates are considered. */
313
314#define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
316
317/* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
319
320#define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
322
323/* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
325
326#define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
328
329/* The list of trees for that the decl_rtl field must be reset is stored
330 here. */
331
332static VEC(tree,heap) *decl_rtl_to_reset;
333
334/* Number of uses recorded in DATA. */
335
336static inline unsigned
337n_iv_uses (struct ivopts_data *data)
338{
339 return VEC_length (iv_use_p, data->iv_uses);
340}
341
342/* Ith use recorded in DATA. */
343
344static inline struct iv_use *
345iv_use (struct ivopts_data *data, unsigned i)
346{
347 return VEC_index (iv_use_p, data->iv_uses, i);
348}
349
350/* Number of candidates recorded in DATA. */
351
352static inline unsigned
353n_iv_cands (struct ivopts_data *data)
354{
355 return VEC_length (iv_cand_p, data->iv_candidates);
356}
357
358/* Ith candidate recorded in DATA. */
359
360static inline struct iv_cand *
361iv_cand (struct ivopts_data *data, unsigned i)
362{
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
364}
365
366/* The single loop exit if it dominates the latch, NULL otherwise. */
367
368edge
369single_dom_exit (struct loop *loop)
370{
371 edge exit = single_exit (loop);
372
373 if (!exit)
374 return NULL;
375
376 if (!just_once_each_iteration_p (loop, exit->src))
377 return NULL;
378
379 return exit;
380}
381
382/* Dumps information about the induction variable IV to FILE. */
383
384extern void dump_iv (FILE *, struct iv *);
385void
386dump_iv (FILE *file, struct iv *iv)
387{
388 if (iv->ssa_name)
389 {
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
393 }
394
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
398
399 if (iv->step)
400 {
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
404
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
408 }
409 else
410 {
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
416 if (iv->base_object)
417 {
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
421 }
422
423 if (iv->biv_p)
424 fprintf (file, " is a biv\n");
425}
426
427/* Dumps information about the USE to FILE. */
428
429extern void dump_use (FILE *, struct iv_use *);
430void
431dump_use (FILE *file, struct iv_use *use)
432{
433 fprintf (file, "use %d\n", use->id);
434
435 switch (use->type)
436 {
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
439 break;
440
441 case USE_ADDRESS:
442 fprintf (file, " address\n");
443 break;
444
445 case USE_COMPARE:
446 fprintf (file, " compare\n");
447 break;
448
449 default:
450 gcc_unreachable ();
451 }
452
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
456
457 fprintf (file, " at position ");
458 if (use->op_p)
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
461
462 dump_iv (file, use->iv);
463
464 if (use->related_cands)
465 {
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
468 }
469}
470
471/* Dumps information about the uses to FILE. */
472
473extern void dump_uses (FILE *, struct ivopts_data *);
474void
475dump_uses (FILE *file, struct ivopts_data *data)
476{
477 unsigned i;
478 struct iv_use *use;
479
480 for (i = 0; i < n_iv_uses (data); i++)
481 {
482 use = iv_use (data, i);
483
484 dump_use (file, use);
485 fprintf (file, "\n");
486 }
487}
488
489/* Dumps information about induction variable candidate CAND to FILE. */
490
491extern void dump_cand (FILE *, struct iv_cand *);
492void
493dump_cand (FILE *file, struct iv_cand *cand)
494{
495 struct iv *iv = cand->iv;
496
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
499
500 if (cand->depends_on)
501 {
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
504 }
505
506 if (!iv)
507 {
508 fprintf (file, " final value replacement\n");
509 return;
510 }
511
512 switch (cand->pos)
513 {
514 case IP_NORMAL:
515 fprintf (file, " incremented before exit test\n");
516 break;
517
518 case IP_END:
519 fprintf (file, " incremented at end\n");
520 break;
521
522 case IP_ORIGINAL:
523 fprintf (file, " original biv\n");
524 break;
525 }
526
527 dump_iv (file, iv);
528}
529
530/* Returns the info for ssa version VER. */
531
532static inline struct version_info *
533ver_info (struct ivopts_data *data, unsigned ver)
534{
535 return data->version_info + ver;
536}
537
538/* Returns the info for ssa name NAME. */
539
540static inline struct version_info *
541name_info (struct ivopts_data *data, tree name)
542{
543 return ver_info (data, SSA_NAME_VERSION (name));
544}
545
546/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
547 emitted in LOOP. */
548
549static bool
550stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
551{
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
553
554 gcc_assert (bb);
555
556 if (sbb == loop->latch)
557 return true;
558
559 if (sbb != bb)
560 return false;
561
562 return stmt == last_stmt (bb);
563}
564
565/* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
567
568static bool
569stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
570{
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
574
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
576 return false;
577
578 if (stmt_bb != cand_bb)
579 return true;
580
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
584 {
585 if (gsi_stmt (bsi) == cand->incremented_at)
586 return false;
587 if (gsi_stmt (bsi) == stmt)
588 return true;
589 }
590}
591
592/* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
594
595static bool
596stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
597{
598 switch (cand->pos)
599 {
600 case IP_END:
601 return false;
602
603 case IP_NORMAL:
604 return stmt_after_ip_normal_pos (loop, stmt);
605
606 case IP_ORIGINAL:
607 return stmt_after_ip_original_pos (cand, stmt);
608
609 default:
610 gcc_unreachable ();
611 }
612}
613
614/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
615
616static bool
617abnormal_ssa_name_p (tree exp)
618{
619 if (!exp)
620 return false;
621
622 if (TREE_CODE (exp) != SSA_NAME)
623 return false;
624
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
626}
627
628/* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
630
631static bool
632idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
634{
635 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
636 {
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
638 return false;
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
640 return false;
641 }
642
643 return !abnormal_ssa_name_p (*index);
644}
645
646/* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
648
649bool
650contains_abnormal_ssa_name_p (tree expr)
651{
652 enum tree_code code;
653 enum tree_code_class codeclass;
654
655 if (!expr)
656 return false;
657
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
660
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
663
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
666 return false;
667
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
671 NULL);
672
673 switch (codeclass)
674 {
675 case tcc_binary:
676 case tcc_comparison:
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
678 return true;
679
680 /* Fallthru. */
681 case tcc_unary:
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
683 return true;
684
685 break;
686
687 default:
688 gcc_unreachable ();
689 }
690
691 return false;
692}
693
694/* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
696
697static tree
698niter_for_exit (struct ivopts_data *data, edge exit)
699{
700 struct tree_niter_desc desc;
701 tree niter;
702 void **slot;
703
704 if (!data->niters)
705 {
706 data->niters = pointer_map_create ();
707 slot = NULL;
708 }
709 else
710 slot = pointer_map_contains (data->niters, exit);
711
712 if (!slot)
713 {
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
720 exit, &desc, true)
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
723 niter = desc.niter;
724 else
725 niter = NULL_TREE;
726
727 *pointer_map_insert (data->niters, exit) = niter;
728 }
729 else
730 niter = (tree) *slot;
731
732 return niter;
733}
734
735/* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
737 goes wrong. */
738
739static tree
740niter_for_single_dom_exit (struct ivopts_data *data)
741{
742 edge exit = single_dom_exit (data->current_loop);
743
744 if (!exit)
745 return NULL;
746
747 return niter_for_exit (data, exit);
748}
749
750/* Initializes data structures used by the iv optimization pass, stored
751 in DATA. */
752
753static void
754tree_ssa_iv_optimize_init (struct ivopts_data *data)
755{
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
761 data->niters = NULL;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
765}
766
767/* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
769
770static tree
771determine_base_object (tree expr)
772{
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj;
775
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
781
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
783 return NULL_TREE;
784
785 switch (code)
786 {
787 case INTEGER_CST:
788 return NULL_TREE;
789
790 case ADDR_EXPR:
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
793
794 if (!base)
795 return expr;
796
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
799
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
802
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
805
806 case PLUS_EXPR:
807 case MINUS_EXPR:
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 gcc_unreachable ();
810
811 default:
812 return fold_convert (ptr_type_node, expr);
813 }
814}
815
816/* Allocates an induction variable with given initial value BASE and step STEP
817 for loop LOOP. */
818
819static struct iv *
820alloc_iv (tree base, tree step)
821{
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
824
825 iv->base = base;
826 iv->base_object = determine_base_object (base);
827 iv->step = step;
828 iv->biv_p = false;
829 iv->have_use_for = false;
830 iv->use_id = 0;
831 iv->ssa_name = NULL_TREE;
832
833 return iv;
834}
835
836/* Sets STEP and BASE for induction variable IV. */
837
838static void
839set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
840{
841 struct version_info *info = name_info (data, iv);
842
843 gcc_assert (!info->iv);
844
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
848}
849
850/* Finds induction variable declaration for VAR. */
851
852static struct iv *
853get_iv (struct ivopts_data *data, tree var)
854{
855 basic_block bb;
856 tree type = TREE_TYPE (var);
857
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
860 return NULL;
861
862 if (!name_info (data, var)->iv)
863 {
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
865
866 if (!bb
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
869 }
870
871 return name_info (data, var)->iv;
872}
873
874/* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
876
877static tree
878determine_biv_step (gimple phi)
879{
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
882 affine_iv iv;
883
884 if (!is_gimple_reg (name))
885 return NULL_TREE;
886
887 if (!simple_iv (loop, loop, name, &iv, true))
888 return NULL_TREE;
889
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
891}
892
893/* Finds basic ivs. */
894
895static bool
896find_bivs (struct ivopts_data *data)
897{
898 gimple phi;
899 tree step, type, base;
900 bool found = false;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
903
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
905 {
906 phi = gsi_stmt (psi);
907
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
909 continue;
910
911 step = determine_biv_step (phi);
912 if (!step)
913 continue;
914
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
919 continue;
920
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
923 if (step)
924 {
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
927 else
928 step = fold_convert (type, step);
929 }
930
931 set_iv (data, PHI_RESULT (phi), base, step);
932 found = true;
933 }
934
935 return found;
936}
937
938/* Marks basic ivs. */
939
940static void
941mark_bivs (struct ivopts_data *data)
942{
943 gimple phi;
944 tree var;
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
947 basic_block incr_bb;
948 gimple_stmt_iterator psi;
949
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
951 {
952 phi = gsi_stmt (psi);
953
954 iv = get_iv (data, PHI_RESULT (phi));
955 if (!iv)
956 continue;
957
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
960 if (!incr_iv)
961 continue;
962
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
967 continue;
968
969 iv->biv_p = true;
970 incr_iv->biv_p = true;
971 }
972}
973
974/* Checks whether STMT defines a linear induction variable and stores its
975 parameters to IV. */
976
977static bool
978find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
979{
980 tree lhs;
981 struct loop *loop = data->current_loop;
982
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
985
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
987 return false;
988
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
991 return false;
992
993 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
994 return false;
995 iv->base = expand_simple_operations (iv->base);
996
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
999 return false;
1000
1001 return true;
1002}
1003
1004/* Finds general ivs in statement STMT. */
1005
1006static void
1007find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1008{
1009 affine_iv iv;
1010
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1012 return;
1013
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1015}
1016
1017/* Finds general ivs in basic block BB. */
1018
1019static void
1020find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1021{
1022 gimple_stmt_iterator bsi;
1023
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1026}
1027
1028/* Finds general ivs. */
1029
1030static void
1031find_givs (struct ivopts_data *data)
1032{
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1035 unsigned i;
1036
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1039 free (body);
1040}
1041
1042/* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1044
1045static bool
1046find_induction_variables (struct ivopts_data *data)
1047{
1048 unsigned i;
1049 bitmap_iterator bi;
1050
1051 if (!find_bivs (data))
1052 return false;
1053
1054 find_givs (data);
1055 mark_bivs (data);
1056
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1058 {
1059 tree niter = niter_for_single_dom_exit (data);
1060
1061 if (niter)
1062 {
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1066 };
1067
1068 fprintf (dump_file, "Induction variables:\n\n");
1069
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1071 {
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1074 }
1075 }
1076
1077 return true;
1078}
1079
1080/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1081
1082static struct iv_use *
1083record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1085{
1086 struct iv_use *use = XCNEW (struct iv_use);
1087
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1090 use->iv = iv;
1091 use->stmt = stmt;
1092 use->op_p = use_p;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1094
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1096 caller. */
1097 iv->ssa_name = NULL_TREE;
1098
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1101
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1103
1104 return use;
1105}
1106
1107/* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1110
1111static void
1112record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1113{
1114 basic_block bb;
1115 struct version_info *info;
1116
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1119 return;
1120
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1122 if (bb
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1124 return;
1125
1126 info = name_info (data, op);
1127 info->name = op;
1128 info->has_nonlin_use |= nonlinear_use;
1129 if (!info->inv_id)
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1132}
1133
1134/* Checks whether the use OP is interesting and if so, records it. */
1135
1136static struct iv_use *
1137find_interesting_uses_op (struct ivopts_data *data, tree op)
1138{
1139 struct iv *iv;
1140 struct iv *civ;
1141 gimple stmt;
1142 struct iv_use *use;
1143
1144 if (TREE_CODE (op) != SSA_NAME)
1145 return NULL;
1146
1147 iv = get_iv (data, op);
1148 if (!iv)
1149 return NULL;
1150
1151 if (iv->have_use_for)
1152 {
1153 use = iv_use (data, iv->use_id);
1154
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1156 return use;
1157 }
1158
1159 if (integer_zerop (iv->step))
1160 {
1161 record_invariant (data, op, true);
1162 return NULL;
1163 }
1164 iv->have_use_for = true;
1165
1166 civ = XNEW (struct iv);
1167 *civ = *iv;
1168
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1172
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1175
1176 return use;
1177}
1178
1179/* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1186
1187static bool
1188extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1191{
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1194 static tree zero;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1197 bool ret = false;
1198
1199 if (gimple_code (stmt) == GIMPLE_COND)
1200 {
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1203 }
1204 else
1205 {
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1208 }
1209
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1212
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1217
1218 /* Exactly one of the compared values must be an iv, and the other one must
1219 be an invariant. */
1220 if (!iv0 || !iv1)
1221 goto end;
1222
1223 if (integer_zerop (iv0->step))
1224 {
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1228 }
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1230
1231end:
1232 if (control_var)
1233 *control_var = op0;;
1234 if (iv_var)
1235 *iv_var = iv0;;
1236 if (bound)
1237 *bound = op1;
1238 if (iv_bound)
1239 *iv_bound = iv1;
1240
1241 return ret;
1242}
1243
1244/* Checks whether the condition in STMT is interesting and if so,
1245 records it. */
1246
1247static void
1248find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1249{
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1252
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1254 {
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1257 return;
1258 }
1259
1260 civ = XNEW (struct iv);
1261 *civ = *var_iv;
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1263}
1264
1265/* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1268
1269bool
1270expr_invariant_in_loop_p (struct loop *loop, tree expr)
1271{
1272 basic_block def_bb;
1273 unsigned i, len;
1274
1275 gcc_assert (loop_depth (loop) > 0);
1276
1277 if (is_gimple_min_invariant (expr))
1278 return true;
1279
1280 if (TREE_CODE (expr) == SSA_NAME)
1281 {
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1283 if (def_bb
1284 && flow_bb_inside_loop_p (loop, def_bb))
1285 return false;
1286
1287 return true;
1288 }
1289
1290 if (!EXPR_P (expr))
1291 return false;
1292
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1296 return false;
1297
1298 return true;
1299}
1300
1301/* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1304
1305bool
1306stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1307{
1308 unsigned i;
1309 tree lhs;
1310
1311 gcc_assert (loop_depth (loop) > 0);
1312
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1315 {
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1318 return false;
1319 }
1320
1321 return true;
1322}
1323
1324/* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1327
1328struct ifs_ivopts_data
1329{
1330 struct ivopts_data *ivopts_data;
1331 gimple stmt;
1332 tree step;
1333};
1334
1335static bool
1336idx_find_step (tree base, tree *idx, void *data)
1337{
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1339 struct iv *iv;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1342
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1345 return false;
1346
1347 /* If base is a component ref, require that the offset of the reference
1348 be invariant. */
1349 if (TREE_CODE (base) == COMPONENT_REF)
1350 {
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1353 }
1354
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1360 {
1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1364 return false;
1365
1366 step = array_ref_element_size (base);
1367 lbound = array_ref_low_bound (base);
1368
1369 if (!expr_invariant_in_loop_p (loop, step)
1370 || !expr_invariant_in_loop_p (loop, lbound))
1371 return false;
1372 }
1373
1374 if (TREE_CODE (*idx) != SSA_NAME)
1375 return true;
1376
1377 iv = get_iv (dta->ivopts_data, *idx);
1378 if (!iv)
1379 return false;
1380
1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
1384 *idx = iv->base;
1385
1386 if (integer_zerop (iv->step))
1387 return true;
1388
1389 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1390 {
1391 step = array_ref_element_size (base);
1392
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step) != INTEGER_CST)
1395 return false;
1396 }
1397 else
1398 /* The step for pointer arithmetics already is 1 byte. */
1399 step = build_int_cst (sizetype, 1);
1400
1401 iv_base = iv->base;
1402 iv_step = iv->step;
1403 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1404 sizetype, &iv_base, &iv_step, dta->stmt,
1405 false))
1406 {
1407 /* The index might wrap. */
1408 return false;
1409 }
1410
1411 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1412 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1413
1414 return true;
1415}
1416
1417/* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1419
1420static bool
1421idx_record_use (tree base, tree *idx,
1422 void *vdata)
1423{
1424 struct ivopts_data *data = (struct ivopts_data *) vdata;
1425 find_interesting_uses_op (data, *idx);
1426 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1427 {
1428 find_interesting_uses_op (data, array_ref_element_size (base));
1429 find_interesting_uses_op (data, array_ref_low_bound (base));
1430 }
1431 return true;
1432}
1433
1434/* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1438
1439static bool
1440constant_multiple_of (tree top, tree bot, double_int *mul)
1441{
1442 tree mby;
1443 enum tree_code code;
1444 double_int res, p0, p1;
1445 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1446
1447 STRIP_NOPS (top);
1448 STRIP_NOPS (bot);
1449
1450 if (operand_equal_p (top, bot, 0))
1451 {
1452 *mul = double_int_one;
1453 return true;
1454 }
1455
1456 code = TREE_CODE (top);
1457 switch (code)
1458 {
1459 case MULT_EXPR:
1460 mby = TREE_OPERAND (top, 1);
1461 if (TREE_CODE (mby) != INTEGER_CST)
1462 return false;
1463
1464 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1465 return false;
1466
1467 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1468 precision);
1469 return true;
1470
1471 case PLUS_EXPR:
1472 case MINUS_EXPR:
1473 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1474 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1475 return false;
1476
1477 if (code == MINUS_EXPR)
1478 p1 = double_int_neg (p1);
1479 *mul = double_int_sext (double_int_add (p0, p1), precision);
1480 return true;
1481
1482 case INTEGER_CST:
1483 if (TREE_CODE (bot) != INTEGER_CST)
1484 return false;
1485
1486 p0 = double_int_sext (tree_to_double_int (top), precision);
1487 p1 = double_int_sext (tree_to_double_int (bot), precision);
1488 if (double_int_zero_p (p1))
1489 return false;
1490 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1491 precision);
1492 return double_int_zero_p (res);
1493
1494 default:
1495 return false;
1496 }
1497}
1498
1499/* Returns true if memory reference REF with step STEP may be unaligned. */
1500
1501static bool
1502may_be_unaligned_p (tree ref, tree step)
1503{
1504 tree base;
1505 tree base_type;
1506 HOST_WIDE_INT bitsize;
1507 HOST_WIDE_INT bitpos;
1508 tree toffset;
1509 enum machine_mode mode;
1510 int unsignedp, volatilep;
1511 unsigned base_align;
1512
1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1514 thus they are not misaligned. */
1515 if (TREE_CODE (ref) == TARGET_MEM_REF)
1516 return false;
1517
1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1522 &unsignedp, &volatilep, true);
1523 base_type = TREE_TYPE (base);
1524 base_align = TYPE_ALIGN (base_type);
1525
1526 if (mode != BLKmode)
1527 {
4b1e227d 1528 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
c251ad9e 1529
4b1e227d
SW
1530 if (base_align < mode_align
1531 || (bitpos % mode_align) != 0
1532 || (bitpos % BITS_PER_UNIT) != 0)
c251ad9e 1533 return true;
4b1e227d
SW
1534
1535 if (toffset
1536 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1537 return true;
1538
1539 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
c251ad9e
SS
1540 return true;
1541 }
1542
1543 return false;
1544}
1545
1546/* Return true if EXPR may be non-addressable. */
1547
1548static bool
1549may_be_nonaddressable_p (tree expr)
1550{
1551 switch (TREE_CODE (expr))
1552 {
1553 case TARGET_MEM_REF:
1554 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1555 target, thus they are always addressable. */
1556 return false;
1557
1558 case COMPONENT_REF:
1559 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1560 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1561
1562 case VIEW_CONVERT_EXPR:
1563 /* This kind of view-conversions may wrap non-addressable objects
1564 and make them look addressable. After some processing the
1565 non-addressability may be uncovered again, causing ADDR_EXPRs
1566 of inappropriate objects to be built. */
1567 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1568 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1569 return true;
1570
1571 /* ... fall through ... */
1572
1573 case ARRAY_REF:
1574 case ARRAY_RANGE_REF:
1575 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1576
1577 CASE_CONVERT:
1578 return true;
1579
1580 default:
1581 break;
1582 }
1583
1584 return false;
1585}
1586
1587/* Finds addresses in *OP_P inside STMT. */
1588
1589static void
1590find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1591{
1592 tree base = *op_p, step = build_int_cst (sizetype, 0);
1593 struct iv *civ;
1594 struct ifs_ivopts_data ifs_ivopts_data;
1595
1596 /* Do not play with volatile memory references. A bit too conservative,
1597 perhaps, but safe. */
1598 if (gimple_has_volatile_ops (stmt))
1599 goto fail;
1600
1601 /* Ignore bitfields for now. Not really something terribly complicated
1602 to handle. TODO. */
1603 if (TREE_CODE (base) == BIT_FIELD_REF)
1604 goto fail;
1605
1606 base = unshare_expr (base);
1607
1608 if (TREE_CODE (base) == TARGET_MEM_REF)
1609 {
1610 tree type = build_pointer_type (TREE_TYPE (base));
1611 tree astep;
1612
1613 if (TMR_BASE (base)
1614 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1615 {
1616 civ = get_iv (data, TMR_BASE (base));
1617 if (!civ)
1618 goto fail;
1619
1620 TMR_BASE (base) = civ->base;
1621 step = civ->step;
1622 }
1623 if (TMR_INDEX (base)
1624 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1625 {
1626 civ = get_iv (data, TMR_INDEX (base));
1627 if (!civ)
1628 goto fail;
1629
1630 TMR_INDEX (base) = civ->base;
1631 astep = civ->step;
1632
1633 if (astep)
1634 {
1635 if (TMR_STEP (base))
1636 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1637
1638 step = fold_build2 (PLUS_EXPR, type, step, astep);
1639 }
1640 }
1641
1642 if (integer_zerop (step))
1643 goto fail;
1644 base = tree_mem_ref_addr (type, base);
1645 }
1646 else
1647 {
1648 ifs_ivopts_data.ivopts_data = data;
1649 ifs_ivopts_data.stmt = stmt;
1650 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1651 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1652 || integer_zerop (ifs_ivopts_data.step))
1653 goto fail;
1654 step = ifs_ivopts_data.step;
1655
1656 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1657 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1658
1659 /* Check that the base expression is addressable. This needs
1660 to be done after substituting bases of IVs into it. */
1661 if (may_be_nonaddressable_p (base))
1662 goto fail;
1663
1664 /* Moreover, on strict alignment platforms, check that it is
1665 sufficiently aligned. */
1666 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1667 goto fail;
1668
1669 base = build_fold_addr_expr (base);
1670
1671 /* Substituting bases of IVs into the base expression might
1672 have caused folding opportunities. */
1673 if (TREE_CODE (base) == ADDR_EXPR)
1674 {
1675 tree *ref = &TREE_OPERAND (base, 0);
1676 while (handled_component_p (*ref))
1677 ref = &TREE_OPERAND (*ref, 0);
1678 if (TREE_CODE (*ref) == INDIRECT_REF)
4b1e227d
SW
1679 {
1680 tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1681 if (tem)
1682 *ref = tem;
1683 }
c251ad9e
SS
1684 }
1685 }
1686
1687 civ = alloc_iv (base, step);
1688 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1689 return;
1690
1691fail:
1692 for_each_index (op_p, idx_record_use, data);
1693}
1694
1695/* Finds and records invariants used in STMT. */
1696
1697static void
1698find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1699{
1700 ssa_op_iter iter;
1701 use_operand_p use_p;
1702 tree op;
1703
1704 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1705 {
1706 op = USE_FROM_PTR (use_p);
1707 record_invariant (data, op, false);
1708 }
1709}
1710
1711/* Finds interesting uses of induction variables in the statement STMT. */
1712
1713static void
1714find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1715{
1716 struct iv *iv;
1717 tree op, *lhs, *rhs;
1718 ssa_op_iter iter;
1719 use_operand_p use_p;
1720 enum tree_code code;
1721
1722 find_invariants_stmt (data, stmt);
1723
1724 if (gimple_code (stmt) == GIMPLE_COND)
1725 {
1726 find_interesting_uses_cond (data, stmt);
1727 return;
1728 }
1729
1730 if (is_gimple_assign (stmt))
1731 {
1732 lhs = gimple_assign_lhs_ptr (stmt);
1733 rhs = gimple_assign_rhs1_ptr (stmt);
1734
1735 if (TREE_CODE (*lhs) == SSA_NAME)
1736 {
1737 /* If the statement defines an induction variable, the uses are not
1738 interesting by themselves. */
1739
1740 iv = get_iv (data, *lhs);
1741
1742 if (iv && !integer_zerop (iv->step))
1743 return;
1744 }
1745
1746 code = gimple_assign_rhs_code (stmt);
1747 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1748 && (REFERENCE_CLASS_P (*rhs)
1749 || is_gimple_val (*rhs)))
1750 {
1751 if (REFERENCE_CLASS_P (*rhs))
1752 find_interesting_uses_address (data, stmt, rhs);
1753 else
1754 find_interesting_uses_op (data, *rhs);
1755
1756 if (REFERENCE_CLASS_P (*lhs))
1757 find_interesting_uses_address (data, stmt, lhs);
1758 return;
1759 }
1760 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1761 {
1762 find_interesting_uses_cond (data, stmt);
1763 return;
1764 }
1765
1766 /* TODO -- we should also handle address uses of type
1767
1768 memory = call (whatever);
1769
1770 and
1771
1772 call (memory). */
1773 }
1774
1775 if (gimple_code (stmt) == GIMPLE_PHI
1776 && gimple_bb (stmt) == data->current_loop->header)
1777 {
1778 iv = get_iv (data, PHI_RESULT (stmt));
1779
1780 if (iv && !integer_zerop (iv->step))
1781 return;
1782 }
1783
1784 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1785 {
1786 op = USE_FROM_PTR (use_p);
1787
1788 if (TREE_CODE (op) != SSA_NAME)
1789 continue;
1790
1791 iv = get_iv (data, op);
1792 if (!iv)
1793 continue;
1794
1795 find_interesting_uses_op (data, op);
1796 }
1797}
1798
1799/* Finds interesting uses of induction variables outside of loops
1800 on loop exit edge EXIT. */
1801
1802static void
1803find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1804{
1805 gimple phi;
1806 gimple_stmt_iterator psi;
1807 tree def;
1808
1809 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1810 {
1811 phi = gsi_stmt (psi);
1812 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1813 if (is_gimple_reg (def))
1814 find_interesting_uses_op (data, def);
1815 }
1816}
1817
1818/* Finds uses of the induction variables that are interesting. */
1819
1820static void
1821find_interesting_uses (struct ivopts_data *data)
1822{
1823 basic_block bb;
1824 gimple_stmt_iterator bsi;
1825 basic_block *body = get_loop_body (data->current_loop);
1826 unsigned i;
1827 struct version_info *info;
1828 edge e;
1829
1830 if (dump_file && (dump_flags & TDF_DETAILS))
1831 fprintf (dump_file, "Uses:\n\n");
1832
1833 for (i = 0; i < data->current_loop->num_nodes; i++)
1834 {
1835 edge_iterator ei;
1836 bb = body[i];
1837
1838 FOR_EACH_EDGE (e, ei, bb->succs)
1839 if (e->dest != EXIT_BLOCK_PTR
1840 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1841 find_interesting_uses_outside (data, e);
1842
1843 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1844 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1845 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1846 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1847 }
1848
1849 if (dump_file && (dump_flags & TDF_DETAILS))
1850 {
1851 bitmap_iterator bi;
1852
1853 fprintf (dump_file, "\n");
1854
1855 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1856 {
1857 info = ver_info (data, i);
1858 if (info->inv_id)
1859 {
1860 fprintf (dump_file, " ");
1861 print_generic_expr (dump_file, info->name, TDF_SLIM);
1862 fprintf (dump_file, " is invariant (%d)%s\n",
1863 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1864 }
1865 }
1866
1867 fprintf (dump_file, "\n");
1868 }
1869
1870 free (body);
1871}
1872
1873/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1874 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1875 we are at the top-level of the processed address. */
1876
1877static tree
1878strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1879 unsigned HOST_WIDE_INT *offset)
1880{
1881 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1882 enum tree_code code;
1883 tree type, orig_type = TREE_TYPE (expr);
1884 unsigned HOST_WIDE_INT off0, off1, st;
1885 tree orig_expr = expr;
1886
1887 STRIP_NOPS (expr);
1888
1889 type = TREE_TYPE (expr);
1890 code = TREE_CODE (expr);
1891 *offset = 0;
1892
1893 switch (code)
1894 {
1895 case INTEGER_CST:
1896 if (!cst_and_fits_in_hwi (expr)
1897 || integer_zerop (expr))
1898 return orig_expr;
1899
1900 *offset = int_cst_value (expr);
1901 return build_int_cst (orig_type, 0);
1902
1903 case POINTER_PLUS_EXPR:
1904 case PLUS_EXPR:
1905 case MINUS_EXPR:
1906 op0 = TREE_OPERAND (expr, 0);
1907 op1 = TREE_OPERAND (expr, 1);
1908
1909 op0 = strip_offset_1 (op0, false, false, &off0);
1910 op1 = strip_offset_1 (op1, false, false, &off1);
1911
1912 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1913 if (op0 == TREE_OPERAND (expr, 0)
1914 && op1 == TREE_OPERAND (expr, 1))
1915 return orig_expr;
1916
1917 if (integer_zerop (op1))
1918 expr = op0;
1919 else if (integer_zerop (op0))
1920 {
1921 if (code == MINUS_EXPR)
1922 expr = fold_build1 (NEGATE_EXPR, type, op1);
1923 else
1924 expr = op1;
1925 }
1926 else
1927 expr = fold_build2 (code, type, op0, op1);
1928
1929 return fold_convert (orig_type, expr);
1930
1931 case ARRAY_REF:
1932 case ARRAY_RANGE_REF:
1933 if (!inside_addr)
1934 return orig_expr;
1935
1936 step = array_ref_element_size (expr);
1937 if (!cst_and_fits_in_hwi (step))
1938 break;
1939
1940 st = int_cst_value (step);
1941 op1 = TREE_OPERAND (expr, 1);
1942 op1 = strip_offset_1 (op1, false, false, &off1);
1943 *offset = off1 * st;
1944
1945 if (top_compref
1946 && integer_zerop (op1))
1947 {
1948 /* Strip the component reference completely. */
1949 op0 = TREE_OPERAND (expr, 0);
1950 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1951 *offset += off0;
1952 return op0;
1953 }
1954 break;
1955
1956 case COMPONENT_REF:
1957 if (!inside_addr)
1958 return orig_expr;
1959
1960 tmp = component_ref_field_offset (expr);
1961 if (top_compref
1962 && cst_and_fits_in_hwi (tmp))
1963 {
1964 /* Strip the component reference completely. */
1965 op0 = TREE_OPERAND (expr, 0);
1966 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1967 *offset = off0 + int_cst_value (tmp);
1968 return op0;
1969 }
1970 break;
1971
1972 case ADDR_EXPR:
1973 op0 = TREE_OPERAND (expr, 0);
1974 op0 = strip_offset_1 (op0, true, true, &off0);
1975 *offset += off0;
1976
1977 if (op0 == TREE_OPERAND (expr, 0))
1978 return orig_expr;
1979
1980 expr = build_fold_addr_expr (op0);
1981 return fold_convert (orig_type, expr);
1982
1983 case INDIRECT_REF:
1984 inside_addr = false;
1985 break;
1986
1987 default:
1988 return orig_expr;
1989 }
1990
1991 /* Default handling of expressions for that we want to recurse into
1992 the first operand. */
1993 op0 = TREE_OPERAND (expr, 0);
1994 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1995 *offset += off0;
1996
1997 if (op0 == TREE_OPERAND (expr, 0)
1998 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1999 return orig_expr;
2000
2001 expr = copy_node (expr);
2002 TREE_OPERAND (expr, 0) = op0;
2003 if (op1)
2004 TREE_OPERAND (expr, 1) = op1;
2005
2006 /* Inside address, we might strip the top level component references,
2007 thus changing type of the expression. Handling of ADDR_EXPR
2008 will fix that. */
2009 expr = fold_convert (orig_type, expr);
2010
2011 return expr;
2012}
2013
2014/* Strips constant offsets from EXPR and stores them to OFFSET. */
2015
2016static tree
2017strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2018{
2019 return strip_offset_1 (expr, false, false, offset);
2020}
2021
2022/* Returns variant of TYPE that can be used as base for different uses.
2023 We return unsigned type with the same precision, which avoids problems
2024 with overflows. */
2025
2026static tree
2027generic_type_for (tree type)
2028{
2029 if (POINTER_TYPE_P (type))
2030 return unsigned_type_for (type);
2031
2032 if (TYPE_UNSIGNED (type))
2033 return type;
2034
2035 return unsigned_type_for (type);
2036}
2037
2038/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2039 the bitmap to that we should store it. */
2040
2041static struct ivopts_data *fd_ivopts_data;
2042static tree
2043find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2044{
2045 bitmap *depends_on = (bitmap *) data;
2046 struct version_info *info;
2047
2048 if (TREE_CODE (*expr_p) != SSA_NAME)
2049 return NULL_TREE;
2050 info = name_info (fd_ivopts_data, *expr_p);
2051
2052 if (!info->inv_id || info->has_nonlin_use)
2053 return NULL_TREE;
2054
2055 if (!*depends_on)
2056 *depends_on = BITMAP_ALLOC (NULL);
2057 bitmap_set_bit (*depends_on, info->inv_id);
2058
2059 return NULL_TREE;
2060}
2061
2062/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2063 position to POS. If USE is not NULL, the candidate is set as related to
2064 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2065 replacement of the final value of the iv by a direct computation. */
2066
2067static struct iv_cand *
2068add_candidate_1 (struct ivopts_data *data,
2069 tree base, tree step, bool important, enum iv_position pos,
2070 struct iv_use *use, gimple incremented_at)
2071{
2072 unsigned i;
2073 struct iv_cand *cand = NULL;
2074 tree type, orig_type;
2075
2076 if (base)
2077 {
2078 orig_type = TREE_TYPE (base);
2079 type = generic_type_for (orig_type);
2080 if (type != orig_type)
2081 {
2082 base = fold_convert (type, base);
2083 step = fold_convert (type, step);
2084 }
2085 }
2086
2087 for (i = 0; i < n_iv_cands (data); i++)
2088 {
2089 cand = iv_cand (data, i);
2090
2091 if (cand->pos != pos)
2092 continue;
2093
2094 if (cand->incremented_at != incremented_at)
2095 continue;
2096
2097 if (!cand->iv)
2098 {
2099 if (!base && !step)
2100 break;
2101
2102 continue;
2103 }
2104
2105 if (!base && !step)
2106 continue;
2107
2108 if (operand_equal_p (base, cand->iv->base, 0)
2109 && operand_equal_p (step, cand->iv->step, 0))
2110 break;
2111 }
2112
2113 if (i == n_iv_cands (data))
2114 {
2115 cand = XCNEW (struct iv_cand);
2116 cand->id = i;
2117
2118 if (!base && !step)
2119 cand->iv = NULL;
2120 else
2121 cand->iv = alloc_iv (base, step);
2122
2123 cand->pos = pos;
2124 if (pos != IP_ORIGINAL && cand->iv)
2125 {
2126 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2127 cand->var_after = cand->var_before;
2128 }
2129 cand->important = important;
2130 cand->incremented_at = incremented_at;
2131 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2132
2133 if (step
2134 && TREE_CODE (step) != INTEGER_CST)
2135 {
2136 fd_ivopts_data = data;
2137 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2138 }
2139
2140 if (dump_file && (dump_flags & TDF_DETAILS))
2141 dump_cand (dump_file, cand);
2142 }
2143
2144 if (important && !cand->important)
2145 {
2146 cand->important = true;
2147 if (dump_file && (dump_flags & TDF_DETAILS))
2148 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2149 }
2150
2151 if (use)
2152 {
2153 bitmap_set_bit (use->related_cands, i);
2154 if (dump_file && (dump_flags & TDF_DETAILS))
2155 fprintf (dump_file, "Candidate %d is related to use %d\n",
2156 cand->id, use->id);
2157 }
2158
2159 return cand;
2160}
2161
2162/* Returns true if incrementing the induction variable at the end of the LOOP
2163 is allowed.
2164
2165 The purpose is to avoid splitting latch edge with a biv increment, thus
2166 creating a jump, possibly confusing other optimization passes and leaving
2167 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2168 is not available (so we do not have a better alternative), or if the latch
2169 edge is already nonempty. */
2170
2171static bool
2172allow_ip_end_pos_p (struct loop *loop)
2173{
2174 if (!ip_normal_pos (loop))
2175 return true;
2176
2177 if (!empty_block_p (ip_end_pos (loop)))
2178 return true;
2179
2180 return false;
2181}
2182
2183/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2184 position to POS. If USE is not NULL, the candidate is set as related to
2185 it. The candidate computation is scheduled on all available positions. */
2186
2187static void
2188add_candidate (struct ivopts_data *data,
2189 tree base, tree step, bool important, struct iv_use *use)
2190{
2191 if (ip_normal_pos (data->current_loop))
2192 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2193 if (ip_end_pos (data->current_loop)
2194 && allow_ip_end_pos_p (data->current_loop))
2195 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2196}
2197
2198/* Add a standard "0 + 1 * iteration" iv candidate for a
2199 type with SIZE bits. */
2200
2201static void
2202add_standard_iv_candidates_for_size (struct ivopts_data *data,
2203 unsigned int size)
2204{
2205 tree type = lang_hooks.types.type_for_size (size, true);
2206 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2207 true, NULL);
2208}
2209
2210/* Adds standard iv candidates. */
2211
2212static void
2213add_standard_iv_candidates (struct ivopts_data *data)
2214{
2215 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2216
2217 /* The same for a double-integer type if it is still fast enough. */
2218 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2219 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2220}
2221
2222
2223/* Adds candidates bases on the old induction variable IV. */
2224
2225static void
2226add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2227{
2228 gimple phi;
2229 tree def;
2230 struct iv_cand *cand;
2231
2232 add_candidate (data, iv->base, iv->step, true, NULL);
2233
2234 /* The same, but with initial value zero. */
2235 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2236 add_candidate (data, size_int (0), iv->step, true, NULL);
2237 else
2238 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2239 iv->step, true, NULL);
2240
2241 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2242 if (gimple_code (phi) == GIMPLE_PHI)
2243 {
2244 /* Additionally record the possibility of leaving the original iv
2245 untouched. */
2246 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2247 cand = add_candidate_1 (data,
2248 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2249 SSA_NAME_DEF_STMT (def));
2250 cand->var_before = iv->ssa_name;
2251 cand->var_after = def;
2252 }
2253}
2254
2255/* Adds candidates based on the old induction variables. */
2256
2257static void
2258add_old_ivs_candidates (struct ivopts_data *data)
2259{
2260 unsigned i;
2261 struct iv *iv;
2262 bitmap_iterator bi;
2263
2264 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2265 {
2266 iv = ver_info (data, i)->iv;
2267 if (iv && iv->biv_p && !integer_zerop (iv->step))
2268 add_old_iv_candidates (data, iv);
2269 }
2270}
2271
2272/* Adds candidates based on the value of the induction variable IV and USE. */
2273
2274static void
2275add_iv_value_candidates (struct ivopts_data *data,
2276 struct iv *iv, struct iv_use *use)
2277{
2278 unsigned HOST_WIDE_INT offset;
2279 tree base;
2280 tree basetype;
2281
2282 add_candidate (data, iv->base, iv->step, false, use);
2283
2284 /* The same, but with initial value zero. Make such variable important,
2285 since it is generic enough so that possibly many uses may be based
2286 on it. */
2287 basetype = TREE_TYPE (iv->base);
2288 if (POINTER_TYPE_P (basetype))
2289 basetype = sizetype;
2290 add_candidate (data, build_int_cst (basetype, 0),
2291 iv->step, true, use);
2292
2293 /* Third, try removing the constant offset. Make sure to even
2294 add a candidate for &a[0] vs. (T *)&a. */
2295 base = strip_offset (iv->base, &offset);
2296 if (offset
2297 || base != iv->base)
2298 add_candidate (data, base, iv->step, false, use);
2299}
2300
2301/* Adds candidates based on the uses. */
2302
2303static void
2304add_derived_ivs_candidates (struct ivopts_data *data)
2305{
2306 unsigned i;
2307
2308 for (i = 0; i < n_iv_uses (data); i++)
2309 {
2310 struct iv_use *use = iv_use (data, i);
2311
2312 if (!use)
2313 continue;
2314
2315 switch (use->type)
2316 {
2317 case USE_NONLINEAR_EXPR:
2318 case USE_COMPARE:
2319 case USE_ADDRESS:
2320 /* Just add the ivs based on the value of the iv used here. */
2321 add_iv_value_candidates (data, use->iv, use);
2322 break;
2323
2324 default:
2325 gcc_unreachable ();
2326 }
2327 }
2328}
2329
2330/* Record important candidates and add them to related_cands bitmaps
2331 if needed. */
2332
2333static void
2334record_important_candidates (struct ivopts_data *data)
2335{
2336 unsigned i;
2337 struct iv_use *use;
2338
2339 for (i = 0; i < n_iv_cands (data); i++)
2340 {
2341 struct iv_cand *cand = iv_cand (data, i);
2342
2343 if (cand->important)
2344 bitmap_set_bit (data->important_candidates, i);
2345 }
2346
2347 data->consider_all_candidates = (n_iv_cands (data)
2348 <= CONSIDER_ALL_CANDIDATES_BOUND);
2349
2350 if (data->consider_all_candidates)
2351 {
2352 /* We will not need "related_cands" bitmaps in this case,
2353 so release them to decrease peak memory consumption. */
2354 for (i = 0; i < n_iv_uses (data); i++)
2355 {
2356 use = iv_use (data, i);
2357 BITMAP_FREE (use->related_cands);
2358 }
2359 }
2360 else
2361 {
2362 /* Add important candidates to the related_cands bitmaps. */
2363 for (i = 0; i < n_iv_uses (data); i++)
2364 bitmap_ior_into (iv_use (data, i)->related_cands,
2365 data->important_candidates);
2366 }
2367}
2368
2369/* Finds the candidates for the induction variables. */
2370
2371static void
2372find_iv_candidates (struct ivopts_data *data)
2373{
2374 /* Add commonly used ivs. */
2375 add_standard_iv_candidates (data);
2376
2377 /* Add old induction variables. */
2378 add_old_ivs_candidates (data);
2379
2380 /* Add induction variables derived from uses. */
2381 add_derived_ivs_candidates (data);
2382
2383 /* Record the important candidates. */
2384 record_important_candidates (data);
2385}
2386
2387/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2388 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2389 we allocate a simple list to every use. */
2390
2391static void
2392alloc_use_cost_map (struct ivopts_data *data)
2393{
2394 unsigned i, size, s, j;
2395
2396 for (i = 0; i < n_iv_uses (data); i++)
2397 {
2398 struct iv_use *use = iv_use (data, i);
2399 bitmap_iterator bi;
2400
2401 if (data->consider_all_candidates)
2402 size = n_iv_cands (data);
2403 else
2404 {
2405 s = 0;
2406 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2407 {
2408 s++;
2409 }
2410
2411 /* Round up to the power of two, so that moduling by it is fast. */
2412 for (size = 1; size < s; size <<= 1)
2413 continue;
2414 }
2415
2416 use->n_map_members = size;
2417 use->cost_map = XCNEWVEC (struct cost_pair, size);
2418 }
2419}
2420
2421/* Returns description of computation cost of expression whose runtime
2422 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2423
2424static comp_cost
2425new_cost (unsigned runtime, unsigned complexity)
2426{
2427 comp_cost cost;
2428
2429 cost.cost = runtime;
2430 cost.complexity = complexity;
2431
2432 return cost;
2433}
2434
2435/* Adds costs COST1 and COST2. */
2436
2437static comp_cost
2438add_costs (comp_cost cost1, comp_cost cost2)
2439{
2440 cost1.cost += cost2.cost;
2441 cost1.complexity += cost2.complexity;
2442
2443 return cost1;
2444}
2445/* Subtracts costs COST1 and COST2. */
2446
2447static comp_cost
2448sub_costs (comp_cost cost1, comp_cost cost2)
2449{
2450 cost1.cost -= cost2.cost;
2451 cost1.complexity -= cost2.complexity;
2452
2453 return cost1;
2454}
2455
2456/* Returns a negative number if COST1 < COST2, a positive number if
2457 COST1 > COST2, and 0 if COST1 = COST2. */
2458
2459static int
2460compare_costs (comp_cost cost1, comp_cost cost2)
2461{
2462 if (cost1.cost == cost2.cost)
2463 return cost1.complexity - cost2.complexity;
2464
2465 return cost1.cost - cost2.cost;
2466}
2467
2468/* Returns true if COST is infinite. */
2469
2470static bool
2471infinite_cost_p (comp_cost cost)
2472{
2473 return cost.cost == INFTY;
2474}
2475
2476/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2477 on invariants DEPENDS_ON and that the value used in expressing it
2478 is VALUE.*/
2479
2480static void
2481set_use_iv_cost (struct ivopts_data *data,
2482 struct iv_use *use, struct iv_cand *cand,
2483 comp_cost cost, bitmap depends_on, tree value)
2484{
2485 unsigned i, s;
2486
2487 if (infinite_cost_p (cost))
2488 {
2489 BITMAP_FREE (depends_on);
2490 return;
2491 }
2492
2493 if (data->consider_all_candidates)
2494 {
2495 use->cost_map[cand->id].cand = cand;
2496 use->cost_map[cand->id].cost = cost;
2497 use->cost_map[cand->id].depends_on = depends_on;
2498 use->cost_map[cand->id].value = value;
2499 return;
2500 }
2501
2502 /* n_map_members is a power of two, so this computes modulo. */
2503 s = cand->id & (use->n_map_members - 1);
2504 for (i = s; i < use->n_map_members; i++)
2505 if (!use->cost_map[i].cand)
2506 goto found;
2507 for (i = 0; i < s; i++)
2508 if (!use->cost_map[i].cand)
2509 goto found;
2510
2511 gcc_unreachable ();
2512
2513found:
2514 use->cost_map[i].cand = cand;
2515 use->cost_map[i].cost = cost;
2516 use->cost_map[i].depends_on = depends_on;
2517 use->cost_map[i].value = value;
2518}
2519
2520/* Gets cost of (USE, CANDIDATE) pair. */
2521
2522static struct cost_pair *
2523get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2524 struct iv_cand *cand)
2525{
2526 unsigned i, s;
2527 struct cost_pair *ret;
2528
2529 if (!cand)
2530 return NULL;
2531
2532 if (data->consider_all_candidates)
2533 {
2534 ret = use->cost_map + cand->id;
2535 if (!ret->cand)
2536 return NULL;
2537
2538 return ret;
2539 }
2540
2541 /* n_map_members is a power of two, so this computes modulo. */
2542 s = cand->id & (use->n_map_members - 1);
2543 for (i = s; i < use->n_map_members; i++)
2544 if (use->cost_map[i].cand == cand)
2545 return use->cost_map + i;
2546
2547 for (i = 0; i < s; i++)
2548 if (use->cost_map[i].cand == cand)
2549 return use->cost_map + i;
2550
2551 return NULL;
2552}
2553
2554/* Returns estimate on cost of computing SEQ. */
2555
2556static unsigned
2557seq_cost (rtx seq, bool speed)
2558{
2559 unsigned cost = 0;
2560 rtx set;
2561
2562 for (; seq; seq = NEXT_INSN (seq))
2563 {
2564 set = single_set (seq);
2565 if (set)
2566 cost += rtx_cost (set, SET,speed);
2567 else
2568 cost++;
2569 }
2570
2571 return cost;
2572}
2573
2574/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2575static rtx
2576produce_memory_decl_rtl (tree obj, int *regno)
2577{
2578 rtx x;
2579
2580 gcc_assert (obj);
2581 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2582 {
2583 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2584 x = gen_rtx_SYMBOL_REF (Pmode, name);
2585 SET_SYMBOL_REF_DECL (x, obj);
2586 x = gen_rtx_MEM (DECL_MODE (obj), x);
2587 targetm.encode_section_info (obj, x, true);
2588 }
2589 else
2590 {
2591 x = gen_raw_REG (Pmode, (*regno)++);
2592 x = gen_rtx_MEM (DECL_MODE (obj), x);
2593 }
2594
2595 return x;
2596}
2597
2598/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2599 walk_tree. DATA contains the actual fake register number. */
2600
2601static tree
2602prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2603{
2604 tree obj = NULL_TREE;
2605 rtx x = NULL_RTX;
2606 int *regno = (int *) data;
2607
2608 switch (TREE_CODE (*expr_p))
2609 {
2610 case ADDR_EXPR:
2611 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2612 handled_component_p (*expr_p);
2613 expr_p = &TREE_OPERAND (*expr_p, 0))
2614 continue;
2615 obj = *expr_p;
2616 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2617 x = produce_memory_decl_rtl (obj, regno);
2618 break;
2619
2620 case SSA_NAME:
2621 *ws = 0;
2622 obj = SSA_NAME_VAR (*expr_p);
2623 if (!DECL_RTL_SET_P (obj))
2624 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2625 break;
2626
2627 case VAR_DECL:
2628 case PARM_DECL:
2629 case RESULT_DECL:
2630 *ws = 0;
2631 obj = *expr_p;
2632
2633 if (DECL_RTL_SET_P (obj))
2634 break;
2635
2636 if (DECL_MODE (obj) == BLKmode)
2637 x = produce_memory_decl_rtl (obj, regno);
2638 else
2639 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2640
2641 break;
2642
2643 default:
2644 break;
2645 }
2646
2647 if (x)
2648 {
2649 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2650 SET_DECL_RTL (obj, x);
2651 }
2652
2653 return NULL_TREE;
2654}
2655
2656/* Determines cost of the computation of EXPR. */
2657
2658static unsigned
2659computation_cost (tree expr, bool speed)
2660{
2661 rtx seq, rslt;
2662 tree type = TREE_TYPE (expr);
2663 unsigned cost;
2664 /* Avoid using hard regs in ways which may be unsupported. */
2665 int regno = LAST_VIRTUAL_REGISTER + 1;
2666 enum function_frequency real_frequency = cfun->function_frequency;
2667
2668 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2669 crtl->maybe_hot_insn_p = speed;
2670 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2671 start_sequence ();
2672 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2673 seq = get_insns ();
2674 end_sequence ();
2675 default_rtl_profile ();
2676 cfun->function_frequency = real_frequency;
2677
2678 cost = seq_cost (seq, speed);
2679 if (MEM_P (rslt))
2680 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2681
2682 return cost;
2683}
2684
2685/* Returns variable containing the value of candidate CAND at statement AT. */
2686
2687static tree
2688var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2689{
2690 if (stmt_after_increment (loop, cand, stmt))
2691 return cand->var_after;
2692 else
2693 return cand->var_before;
2694}
2695
2696/* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2697 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2698
2699int
2700tree_int_cst_sign_bit (const_tree t)
2701{
2702 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2703 unsigned HOST_WIDE_INT w;
2704
2705 if (bitno < HOST_BITS_PER_WIDE_INT)
2706 w = TREE_INT_CST_LOW (t);
2707 else
2708 {
2709 w = TREE_INT_CST_HIGH (t);
2710 bitno -= HOST_BITS_PER_WIDE_INT;
2711 }
2712
2713 return (w >> bitno) & 1;
2714}
2715
2716/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2717 same precision that is at least as wide as the precision of TYPE, stores
2718 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2719 type of A and B. */
2720
2721static tree
2722determine_common_wider_type (tree *a, tree *b)
2723{
2724 tree wider_type = NULL;
2725 tree suba, subb;
2726 tree atype = TREE_TYPE (*a);
2727
2728 if (CONVERT_EXPR_P (*a))
2729 {
2730 suba = TREE_OPERAND (*a, 0);
2731 wider_type = TREE_TYPE (suba);
2732 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2733 return atype;
2734 }
2735 else
2736 return atype;
2737
2738 if (CONVERT_EXPR_P (*b))
2739 {
2740 subb = TREE_OPERAND (*b, 0);
2741 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2742 return atype;
2743 }
2744 else
2745 return atype;
2746
2747 *a = suba;
2748 *b = subb;
2749 return wider_type;
2750}
2751
2752/* Determines the expression by that USE is expressed from induction variable
2753 CAND at statement AT in LOOP. The expression is stored in a decomposed
2754 form into AFF. Returns false if USE cannot be expressed using CAND. */
2755
2756static bool
2757get_computation_aff (struct loop *loop,
2758 struct iv_use *use, struct iv_cand *cand, gimple at,
2759 struct affine_tree_combination *aff)
2760{
2761 tree ubase = use->iv->base;
2762 tree ustep = use->iv->step;
2763 tree cbase = cand->iv->base;
2764 tree cstep = cand->iv->step, cstep_common;
2765 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2766 tree common_type, var;
2767 tree uutype;
2768 aff_tree cbase_aff, var_aff;
2769 double_int rat;
2770
2771 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2772 {
2773 /* We do not have a precision to express the values of use. */
2774 return false;
2775 }
2776
2777 var = var_at_stmt (loop, cand, at);
2778 uutype = unsigned_type_for (utype);
2779
2780 /* If the conversion is not noop, perform it. */
2781 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2782 {
2783 cstep = fold_convert (uutype, cstep);
2784 cbase = fold_convert (uutype, cbase);
2785 var = fold_convert (uutype, var);
2786 }
2787
2788 if (!constant_multiple_of (ustep, cstep, &rat))
2789 return false;
2790
2791 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2792 type, we achieve better folding by computing their difference in this
2793 wider type, and cast the result to UUTYPE. We do not need to worry about
2794 overflows, as all the arithmetics will in the end be performed in UUTYPE
2795 anyway. */
2796 common_type = determine_common_wider_type (&ubase, &cbase);
2797
2798 /* use = ubase - ratio * cbase + ratio * var. */
2799 tree_to_aff_combination (ubase, common_type, aff);
2800 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2801 tree_to_aff_combination (var, uutype, &var_aff);
2802
2803 /* We need to shift the value if we are after the increment. */
2804 if (stmt_after_increment (loop, cand, at))
2805 {
2806 aff_tree cstep_aff;
2807
2808 if (common_type != uutype)
2809 cstep_common = fold_convert (common_type, cstep);
2810 else
2811 cstep_common = cstep;
2812
2813 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2814 aff_combination_add (&cbase_aff, &cstep_aff);
2815 }
2816
2817 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2818 aff_combination_add (aff, &cbase_aff);
2819 if (common_type != uutype)
2820 aff_combination_convert (aff, uutype);
2821
2822 aff_combination_scale (&var_aff, rat);
2823 aff_combination_add (aff, &var_aff);
2824
2825 return true;
2826}
2827
2828/* Determines the expression by that USE is expressed from induction variable
2829 CAND at statement AT in LOOP. The computation is unshared. */
2830
2831static tree
2832get_computation_at (struct loop *loop,
2833 struct iv_use *use, struct iv_cand *cand, gimple at)
2834{
2835 aff_tree aff;
2836 tree type = TREE_TYPE (use->iv->base);
2837
2838 if (!get_computation_aff (loop, use, cand, at, &aff))
2839 return NULL_TREE;
2840 unshare_aff_combination (&aff);
2841 return fold_convert (type, aff_combination_to_tree (&aff));
2842}
2843
2844/* Determines the expression by that USE is expressed from induction variable
2845 CAND in LOOP. The computation is unshared. */
2846
2847static tree
2848get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2849{
2850 return get_computation_at (loop, use, cand, use->stmt);
2851}
2852
2853/* Returns cost of addition in MODE. */
2854
2855static unsigned
2856add_cost (enum machine_mode mode, bool speed)
2857{
2858 static unsigned costs[NUM_MACHINE_MODES];
2859 rtx seq;
2860 unsigned cost;
2861
2862 if (costs[mode])
2863 return costs[mode];
2864
2865 start_sequence ();
2866 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2867 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2868 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2869 NULL_RTX);
2870 seq = get_insns ();
2871 end_sequence ();
2872
2873 cost = seq_cost (seq, speed);
2874 if (!cost)
2875 cost = 1;
2876
2877 costs[mode] = cost;
2878
2879 if (dump_file && (dump_flags & TDF_DETAILS))
2880 fprintf (dump_file, "Addition in %s costs %d\n",
2881 GET_MODE_NAME (mode), cost);
2882 return cost;
2883}
2884
2885/* Entry in a hashtable of already known costs for multiplication. */
2886struct mbc_entry
2887{
2888 HOST_WIDE_INT cst; /* The constant to multiply by. */
2889 enum machine_mode mode; /* In mode. */
2890 unsigned cost; /* The cost. */
2891};
2892
2893/* Counts hash value for the ENTRY. */
2894
2895static hashval_t
2896mbc_entry_hash (const void *entry)
2897{
2898 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2899
2900 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2901}
2902
2903/* Compares the hash table entries ENTRY1 and ENTRY2. */
2904
2905static int
2906mbc_entry_eq (const void *entry1, const void *entry2)
2907{
2908 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2909 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2910
2911 return (e1->mode == e2->mode
2912 && e1->cst == e2->cst);
2913}
2914
2915/* Returns cost of multiplication by constant CST in MODE. */
2916
2917unsigned
2918multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2919{
2920 static htab_t costs;
2921 struct mbc_entry **cached, act;
2922 rtx seq;
2923 unsigned cost;
2924
2925 if (!costs)
2926 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2927
2928 act.mode = mode;
2929 act.cst = cst;
2930 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2931 if (*cached)
2932 return (*cached)->cost;
2933
2934 *cached = XNEW (struct mbc_entry);
2935 (*cached)->mode = mode;
2936 (*cached)->cst = cst;
2937
2938 start_sequence ();
2939 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2940 gen_int_mode (cst, mode), NULL_RTX, 0);
2941 seq = get_insns ();
2942 end_sequence ();
2943
2944 cost = seq_cost (seq, speed);
2945
2946 if (dump_file && (dump_flags & TDF_DETAILS))
2947 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2948 (int) cst, GET_MODE_NAME (mode), cost);
2949
2950 (*cached)->cost = cost;
2951
2952 return cost;
2953}
2954
2955/* Returns true if multiplying by RATIO is allowed in an address. Test the
2956 validity for a memory reference accessing memory of mode MODE. */
2957
2958bool
2959multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2960{
2961#define MAX_RATIO 128
2962 static sbitmap valid_mult[MAX_MACHINE_MODE];
2963
2964 if (!valid_mult[mode])
2965 {
2966 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2967 rtx addr;
2968 HOST_WIDE_INT i;
2969
2970 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2971 sbitmap_zero (valid_mult[mode]);
2972 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2973 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2974 {
2975 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2976 if (memory_address_p (mode, addr))
2977 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2978 }
2979
2980 if (dump_file && (dump_flags & TDF_DETAILS))
2981 {
2982 fprintf (dump_file, " allowed multipliers:");
2983 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2984 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2985 fprintf (dump_file, " %d", (int) i);
2986 fprintf (dump_file, "\n");
2987 fprintf (dump_file, "\n");
2988 }
2989 }
2990
2991 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2992 return false;
2993
2994 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2995}
2996
2997/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2998 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2999 variable is omitted. Compute the cost for a memory reference that accesses
3000 a memory location of mode MEM_MODE.
3001
3002 TODO -- there must be some better way. This all is quite crude. */
3003
3004static comp_cost
3005get_address_cost (bool symbol_present, bool var_present,
3006 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3007 enum machine_mode mem_mode,
3008 bool speed)
3009{
3010 static bool initialized[MAX_MACHINE_MODE];
3011 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3012 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3013 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3014 unsigned cost, acost, complexity;
3015 bool offset_p, ratio_p;
3016 HOST_WIDE_INT s_offset;
3017 unsigned HOST_WIDE_INT mask;
3018 unsigned bits;
3019
3020 if (!initialized[mem_mode])
3021 {
3022 HOST_WIDE_INT i;
3023 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3024 int old_cse_not_expected;
3025 unsigned sym_p, var_p, off_p, rat_p, add_c;
3026 rtx seq, addr, base;
3027 rtx reg0, reg1;
3028
3029 initialized[mem_mode] = true;
3030
3031 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3032
3033 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3034 for (i = start; i <= 1 << 20; i <<= 1)
3035 {
3036 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3037 if (!memory_address_p (mem_mode, addr))
3038 break;
3039 }
3040 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3041 off[mem_mode] = max_offset[mem_mode];
3042
3043 for (i = start; i <= 1 << 20; i <<= 1)
3044 {
3045 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3046 if (!memory_address_p (mem_mode, addr))
3047 break;
3048 }
3049 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3050
3051 if (dump_file && (dump_flags & TDF_DETAILS))
3052 {
3053 fprintf (dump_file, "get_address_cost:\n");
3054 fprintf (dump_file, " min offset %s %d\n",
3055 GET_MODE_NAME (mem_mode),
3056 (int) min_offset[mem_mode]);
3057 fprintf (dump_file, " max offset %s %d\n",
3058 GET_MODE_NAME (mem_mode),
3059 (int) max_offset[mem_mode]);
3060 }
3061
3062 rat[mem_mode] = 1;
3063 for (i = 2; i <= MAX_RATIO; i++)
3064 if (multiplier_allowed_in_address_p (i, mem_mode))
3065 {
3066 rat[mem_mode] = i;
3067 break;
3068 }
3069
3070 /* Compute the cost of various addressing modes. */
3071 acost = 0;
3072 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3073 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3074
3075 for (i = 0; i < 16; i++)
3076 {
3077 sym_p = i & 1;
3078 var_p = (i >> 1) & 1;
3079 off_p = (i >> 2) & 1;
3080 rat_p = (i >> 3) & 1;
3081
3082 addr = reg0;
3083 if (rat_p)
3084 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3085 gen_int_mode (rat[mem_mode], Pmode));
3086
3087 if (var_p)
3088 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3089
3090 if (sym_p)
3091 {
3092 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3093 /* ??? We can run into trouble with some backends by presenting
3094 it with symbols which haven't been properly passed through
3095 targetm.encode_section_info. By setting the local bit, we
3096 enhance the probability of things working. */
3097 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3098
3099 if (off_p)
3100 base = gen_rtx_fmt_e (CONST, Pmode,
3101 gen_rtx_fmt_ee (PLUS, Pmode,
3102 base,
3103 gen_int_mode (off[mem_mode],
3104 Pmode)));
3105 }
3106 else if (off_p)
3107 base = gen_int_mode (off[mem_mode], Pmode);
3108 else
3109 base = NULL_RTX;
3110
3111 if (base)
3112 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3113
3114 start_sequence ();
3115 /* To avoid splitting addressing modes, pretend that no cse will
3116 follow. */
3117 old_cse_not_expected = cse_not_expected;
3118 cse_not_expected = true;
3119 addr = memory_address (mem_mode, addr);
3120 cse_not_expected = old_cse_not_expected;
3121 seq = get_insns ();
3122 end_sequence ();
3123
3124 acost = seq_cost (seq, speed);
3125 acost += address_cost (addr, mem_mode, speed);
3126
3127 if (!acost)
3128 acost = 1;
3129 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3130 }
3131
3132 /* On some targets, it is quite expensive to load symbol to a register,
3133 which makes addresses that contain symbols look much more expensive.
3134 However, the symbol will have to be loaded in any case before the
3135 loop (and quite likely we have it in register already), so it does not
3136 make much sense to penalize them too heavily. So make some final
3137 tweaks for the SYMBOL_PRESENT modes:
3138
3139 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3140 var is cheaper, use this mode with small penalty.
3141 If VAR_PRESENT is true, try whether the mode with
3142 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3143 if this is the case, use it. */
3144 add_c = add_cost (Pmode, speed);
3145 for (i = 0; i < 8; i++)
3146 {
3147 var_p = i & 1;
3148 off_p = (i >> 1) & 1;
3149 rat_p = (i >> 2) & 1;
3150
3151 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3152 if (var_p)
3153 acost += add_c;
3154
3155 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3156 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3157 }
3158
3159 if (dump_file && (dump_flags & TDF_DETAILS))
3160 {
3161 fprintf (dump_file, "Address costs:\n");
3162
3163 for (i = 0; i < 16; i++)
3164 {
3165 sym_p = i & 1;
3166 var_p = (i >> 1) & 1;
3167 off_p = (i >> 2) & 1;
3168 rat_p = (i >> 3) & 1;
3169
3170 fprintf (dump_file, " ");
3171 if (sym_p)
3172 fprintf (dump_file, "sym + ");
3173 if (var_p)
3174 fprintf (dump_file, "var + ");
3175 if (off_p)
3176 fprintf (dump_file, "cst + ");
3177 if (rat_p)
3178 fprintf (dump_file, "rat * ");
3179
3180 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3181 fprintf (dump_file, "index costs %d\n", acost);
3182 }
3183 fprintf (dump_file, "\n");
3184 }
3185 }
3186
3187 bits = GET_MODE_BITSIZE (Pmode);
3188 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3189 offset &= mask;
3190 if ((offset >> (bits - 1) & 1))
3191 offset |= ~mask;
3192 s_offset = offset;
3193
3194 cost = 0;
3195 offset_p = (s_offset != 0
3196 && min_offset[mem_mode] <= s_offset
3197 && s_offset <= max_offset[mem_mode]);
3198 ratio_p = (ratio != 1
3199 && multiplier_allowed_in_address_p (ratio, mem_mode));
3200
3201 if (ratio != 1 && !ratio_p)
3202 cost += multiply_by_cost (ratio, Pmode, speed);
3203
3204 if (s_offset && !offset_p && !symbol_present)
3205 cost += add_cost (Pmode, speed);
3206
3207 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3208 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3209 return new_cost (cost + acost, complexity);
3210}
3211
3212/* Estimates cost of forcing expression EXPR into a variable. */
3213
3214static comp_cost
3215force_expr_to_var_cost (tree expr, bool speed)
3216{
3217 static bool costs_initialized = false;
3218 static unsigned integer_cost [2];
3219 static unsigned symbol_cost [2];
3220 static unsigned address_cost [2];
3221 tree op0, op1;
3222 comp_cost cost0, cost1, cost;
3223 enum machine_mode mode;
3224
3225 if (!costs_initialized)
3226 {
3227 tree type = build_pointer_type (integer_type_node);
3228 tree var, addr;
3229 rtx x;
3230 int i;
3231
3232 var = create_tmp_var_raw (integer_type_node, "test_var");
3233 TREE_STATIC (var) = 1;
3234 x = produce_memory_decl_rtl (var, NULL);
3235 SET_DECL_RTL (var, x);
3236
3237 addr = build1 (ADDR_EXPR, type, var);
3238
3239
3240 for (i = 0; i < 2; i++)
3241 {
3242 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3243 2000), i);
3244
3245 symbol_cost[i] = computation_cost (addr, i) + 1;
3246
3247 address_cost[i]
3248 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3249 addr,
3250 build_int_cst (sizetype, 2000)), i) + 1;
3251 if (dump_file && (dump_flags & TDF_DETAILS))
3252 {
3253 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3254 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3255 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3256 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3257 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3258 fprintf (dump_file, "\n");
3259 }
3260 }
3261
3262 costs_initialized = true;
3263 }
3264
3265 STRIP_NOPS (expr);
3266
3267 if (SSA_VAR_P (expr))
3268 return zero_cost;
3269
3270 if (is_gimple_min_invariant (expr))
3271 {
3272 if (TREE_CODE (expr) == INTEGER_CST)
3273 return new_cost (integer_cost [speed], 0);
3274
3275 if (TREE_CODE (expr) == ADDR_EXPR)
3276 {
3277 tree obj = TREE_OPERAND (expr, 0);
3278
3279 if (TREE_CODE (obj) == VAR_DECL
3280 || TREE_CODE (obj) == PARM_DECL
3281 || TREE_CODE (obj) == RESULT_DECL)
3282 return new_cost (symbol_cost [speed], 0);
3283 }
3284
3285 return new_cost (address_cost [speed], 0);
3286 }
3287
3288 switch (TREE_CODE (expr))
3289 {
3290 case POINTER_PLUS_EXPR:
3291 case PLUS_EXPR:
3292 case MINUS_EXPR:
3293 case MULT_EXPR:
3294 op0 = TREE_OPERAND (expr, 0);
3295 op1 = TREE_OPERAND (expr, 1);
3296 STRIP_NOPS (op0);
3297 STRIP_NOPS (op1);
3298
3299 if (is_gimple_val (op0))
3300 cost0 = zero_cost;
3301 else
3302 cost0 = force_expr_to_var_cost (op0, speed);
3303
3304 if (is_gimple_val (op1))
3305 cost1 = zero_cost;
3306 else
3307 cost1 = force_expr_to_var_cost (op1, speed);
3308
3309 break;
3310
3311 default:
3312 /* Just an arbitrary value, FIXME. */
3313 return new_cost (target_spill_cost[speed], 0);
3314 }
3315
3316 mode = TYPE_MODE (TREE_TYPE (expr));
3317 switch (TREE_CODE (expr))
3318 {
3319 case POINTER_PLUS_EXPR:
3320 case PLUS_EXPR:
3321 case MINUS_EXPR:
3322 cost = new_cost (add_cost (mode, speed), 0);
3323 break;
3324
3325 case MULT_EXPR:
3326 if (cst_and_fits_in_hwi (op0))
3327 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3328 else if (cst_and_fits_in_hwi (op1))
3329 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3330 else
3331 return new_cost (target_spill_cost [speed], 0);
3332 break;
3333
3334 default:
3335 gcc_unreachable ();
3336 }
3337
3338 cost = add_costs (cost, cost0);
3339 cost = add_costs (cost, cost1);
3340
3341 /* Bound the cost by target_spill_cost. The parts of complicated
3342 computations often are either loop invariant or at least can
3343 be shared between several iv uses, so letting this grow without
3344 limits would not give reasonable results. */
3345 if (cost.cost > target_spill_cost [speed])
3346 cost.cost = target_spill_cost [speed];
3347
3348 return cost;
3349}
3350
3351/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3352 invariants the computation depends on. */
3353
3354static comp_cost
3355force_var_cost (struct ivopts_data *data,
3356 tree expr, bitmap *depends_on)
3357{
3358 if (depends_on)
3359 {
3360 fd_ivopts_data = data;
3361 walk_tree (&expr, find_depends, depends_on, NULL);
3362 }
3363
3364 return force_expr_to_var_cost (expr, data->speed);
3365}
3366
3367/* Estimates cost of expressing address ADDR as var + symbol + offset. The
3368 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3369 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3370 invariants the computation depends on. */
3371
3372static comp_cost
3373split_address_cost (struct ivopts_data *data,
3374 tree addr, bool *symbol_present, bool *var_present,
3375 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3376{
3377 tree core;
3378 HOST_WIDE_INT bitsize;
3379 HOST_WIDE_INT bitpos;
3380 tree toffset;
3381 enum machine_mode mode;
3382 int unsignedp, volatilep;
3383
3384 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3385 &unsignedp, &volatilep, false);
3386
3387 if (toffset != 0
3388 || bitpos % BITS_PER_UNIT != 0
3389 || TREE_CODE (core) != VAR_DECL)
3390 {
3391 *symbol_present = false;
3392 *var_present = true;
3393 fd_ivopts_data = data;
3394 walk_tree (&addr, find_depends, depends_on, NULL);
3395 return new_cost (target_spill_cost[data->speed], 0);
3396 }
3397
3398 *offset += bitpos / BITS_PER_UNIT;
3399 if (TREE_STATIC (core)
3400 || DECL_EXTERNAL (core))
3401 {
3402 *symbol_present = true;
3403 *var_present = false;
3404 return zero_cost;
3405 }
3406
3407 *symbol_present = false;
3408 *var_present = true;
3409 return zero_cost;
3410}
3411
3412/* Estimates cost of expressing difference of addresses E1 - E2 as
3413 var + symbol + offset. The value of offset is added to OFFSET,
3414 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3415 part is missing. DEPENDS_ON is a set of the invariants the computation
3416 depends on. */
3417
3418static comp_cost
3419ptr_difference_cost (struct ivopts_data *data,
3420 tree e1, tree e2, bool *symbol_present, bool *var_present,
3421 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3422{
3423 HOST_WIDE_INT diff = 0;
3424 comp_cost cost;
3425 bool speed = optimize_loop_for_speed_p (data->current_loop);
3426
3427 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3428
3429 if (ptr_difference_const (e1, e2, &diff))
3430 {
3431 *offset += diff;
3432 *symbol_present = false;
3433 *var_present = false;
3434 return zero_cost;
3435 }
3436
3437 if (integer_zerop (e2))
3438 return split_address_cost (data, TREE_OPERAND (e1, 0),
3439 symbol_present, var_present, offset, depends_on);
3440
3441 *symbol_present = false;
3442 *var_present = true;
3443
3444 cost = force_var_cost (data, e1, depends_on);
3445 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3446 cost.cost += add_cost (Pmode, speed);
3447
3448 return cost;
3449}
3450
3451/* Estimates cost of expressing difference E1 - E2 as
3452 var + symbol + offset. The value of offset is added to OFFSET,
3453 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3454 part is missing. DEPENDS_ON is a set of the invariants the computation
3455 depends on. */
3456
3457static comp_cost
3458difference_cost (struct ivopts_data *data,
3459 tree e1, tree e2, bool *symbol_present, bool *var_present,
3460 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3461{
3462 comp_cost cost;
3463 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3464 unsigned HOST_WIDE_INT off1, off2;
3465
3466 e1 = strip_offset (e1, &off1);
3467 e2 = strip_offset (e2, &off2);
3468 *offset += off1 - off2;
3469
3470 STRIP_NOPS (e1);
3471 STRIP_NOPS (e2);
3472
3473 if (TREE_CODE (e1) == ADDR_EXPR)
3474 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3475 depends_on);
3476 *symbol_present = false;
3477
3478 if (operand_equal_p (e1, e2, 0))
3479 {
3480 *var_present = false;
3481 return zero_cost;
3482 }
3483 *var_present = true;
3484 if (integer_zerop (e2))
3485 return force_var_cost (data, e1, depends_on);
3486
3487 if (integer_zerop (e1))
3488 {
3489 cost = force_var_cost (data, e2, depends_on);
3490 cost.cost += multiply_by_cost (-1, mode, data->speed);
3491
3492 return cost;
3493 }
3494
3495 cost = force_var_cost (data, e1, depends_on);
3496 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3497 cost.cost += add_cost (mode, data->speed);
3498
3499 return cost;
3500}
3501
3502/* Determines the cost of the computation by that USE is expressed
3503 from induction variable CAND. If ADDRESS_P is true, we just need
3504 to create an address from it, otherwise we want to get it into
3505 register. A set of invariants we depend on is stored in
3506 DEPENDS_ON. AT is the statement at that the value is computed. */
3507
3508static comp_cost
3509get_computation_cost_at (struct ivopts_data *data,
3510 struct iv_use *use, struct iv_cand *cand,
3511 bool address_p, bitmap *depends_on, gimple at)
3512{
3513 tree ubase = use->iv->base, ustep = use->iv->step;
3514 tree cbase, cstep;
3515 tree utype = TREE_TYPE (ubase), ctype;
3516 unsigned HOST_WIDE_INT cstepi, offset = 0;
3517 HOST_WIDE_INT ratio, aratio;
3518 bool var_present, symbol_present;
3519 comp_cost cost;
3520 unsigned n_sums;
3521 double_int rat;
3522 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3523
3524 *depends_on = NULL;
3525
3526 /* Only consider real candidates. */
3527 if (!cand->iv)
3528 return infinite_cost;
3529
3530 cbase = cand->iv->base;
3531 cstep = cand->iv->step;
3532 ctype = TREE_TYPE (cbase);
3533
3534 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3535 {
3536 /* We do not have a precision to express the values of use. */
3537 return infinite_cost;
3538 }
3539
3540 if (address_p)
3541 {
3542 /* Do not try to express address of an object with computation based
3543 on address of a different object. This may cause problems in rtl
3544 level alias analysis (that does not expect this to be happening,
3545 as this is illegal in C), and would be unlikely to be useful
3546 anyway. */
3547 if (use->iv->base_object
3548 && cand->iv->base_object
3549 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3550 return infinite_cost;
3551 }
3552
3553 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3554 {
3555 /* TODO -- add direct handling of this case. */
3556 goto fallback;
3557 }
3558
3559 /* CSTEPI is removed from the offset in case statement is after the
3560 increment. If the step is not constant, we use zero instead.
3561 This is a bit imprecise (there is the extra addition), but
3562 redundancy elimination is likely to transform the code so that
3563 it uses value of the variable before increment anyway,
3564 so it is not that much unrealistic. */
3565 if (cst_and_fits_in_hwi (cstep))
3566 cstepi = int_cst_value (cstep);
3567 else
3568 cstepi = 0;
3569
3570 if (!constant_multiple_of (ustep, cstep, &rat))
3571 return infinite_cost;
3572
3573 if (double_int_fits_in_shwi_p (rat))
3574 ratio = double_int_to_shwi (rat);
3575 else
3576 return infinite_cost;
3577
3578 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3579 or ratio == 1, it is better to handle this like
3580
3581 ubase - ratio * cbase + ratio * var
3582
3583 (also holds in the case ratio == -1, TODO. */
3584
3585 if (cst_and_fits_in_hwi (cbase))
3586 {
3587 offset = - ratio * int_cst_value (cbase);
3588 cost = difference_cost (data,
3589 ubase, build_int_cst (utype, 0),
3590 &symbol_present, &var_present, &offset,
3591 depends_on);
3592 }
3593 else if (ratio == 1)
3594 {
3595 cost = difference_cost (data,
3596 ubase, cbase,
3597 &symbol_present, &var_present, &offset,
3598 depends_on);
3599 }
3600 else
3601 {
3602 cost = force_var_cost (data, cbase, depends_on);
3603 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3604 cost = add_costs (cost,
3605 difference_cost (data,
3606 ubase, build_int_cst (utype, 0),
3607 &symbol_present, &var_present,
3608 &offset, depends_on));
3609 }
3610
3611 /* If we are after the increment, the value of the candidate is higher by
3612 one iteration. */
3613 if (stmt_after_increment (data->current_loop, cand, at))
3614 offset -= ratio * cstepi;
3615
3616 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3617 (symbol/var/const parts may be omitted). If we are looking for an address,
3618 find the cost of addressing this. */
3619 if (address_p)
3620 return add_costs (cost, get_address_cost (symbol_present, var_present,
3621 offset, ratio,
3622 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
3623
3624 /* Otherwise estimate the costs for computing the expression. */
3625 aratio = ratio > 0 ? ratio : -ratio;
3626 if (!symbol_present && !var_present && !offset)
3627 {
3628 if (ratio != 1)
3629 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3630
3631 return cost;
3632 }
3633
3634 if (aratio != 1)
3635 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3636
3637 n_sums = 1;
3638 if (var_present
3639 /* Symbol + offset should be compile-time computable. */
3640 && (symbol_present || offset))
3641 n_sums++;
3642
3643 /* Having offset does not affect runtime cost in case it is added to
3644 symbol, but it increases complexity. */
3645 if (offset)
3646 cost.complexity++;
3647
3648 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3649 return cost;
3650
3651fallback:
3652 {
3653 /* Just get the expression, expand it and measure the cost. */
3654 tree comp = get_computation_at (data->current_loop, use, cand, at);
3655
3656 if (!comp)
3657 return infinite_cost;
3658
3659 if (address_p)
3660 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3661
3662 return new_cost (computation_cost (comp, speed), 0);
3663 }
3664}
3665
3666/* Determines the cost of the computation by that USE is expressed
3667 from induction variable CAND. If ADDRESS_P is true, we just need
3668 to create an address from it, otherwise we want to get it into
3669 register. A set of invariants we depend on is stored in
3670 DEPENDS_ON. */
3671
3672static comp_cost
3673get_computation_cost (struct ivopts_data *data,
3674 struct iv_use *use, struct iv_cand *cand,
3675 bool address_p, bitmap *depends_on)
3676{
3677 return get_computation_cost_at (data,
3678 use, cand, address_p, depends_on, use->stmt);
3679}
3680
3681/* Determines cost of basing replacement of USE on CAND in a generic
3682 expression. */
3683
3684static bool
3685determine_use_iv_cost_generic (struct ivopts_data *data,
3686 struct iv_use *use, struct iv_cand *cand)
3687{
3688 bitmap depends_on;
3689 comp_cost cost;
3690
3691 /* The simple case first -- if we need to express value of the preserved
3692 original biv, the cost is 0. This also prevents us from counting the
3693 cost of increment twice -- once at this use and once in the cost of
3694 the candidate. */
3695 if (cand->pos == IP_ORIGINAL
3696 && cand->incremented_at == use->stmt)
3697 {
3698 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3699 return true;
3700 }
3701
3702 cost = get_computation_cost (data, use, cand, false, &depends_on);
3703 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3704
3705 return !infinite_cost_p (cost);
3706}
3707
3708/* Determines cost of basing replacement of USE on CAND in an address. */
3709
3710static bool
3711determine_use_iv_cost_address (struct ivopts_data *data,
3712 struct iv_use *use, struct iv_cand *cand)
3713{
3714 bitmap depends_on;
3715 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3716
3717 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3718
3719 return !infinite_cost_p (cost);
3720}
3721
3722/* Computes value of candidate CAND at position AT in iteration NITER, and
3723 stores it to VAL. */
3724
3725static void
3726cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3727 aff_tree *val)
3728{
3729 aff_tree step, delta, nit;
3730 struct iv *iv = cand->iv;
3731 tree type = TREE_TYPE (iv->base);
3732 tree steptype = type;
3733 if (POINTER_TYPE_P (type))
3734 steptype = sizetype;
3735
3736 tree_to_aff_combination (iv->step, steptype, &step);
3737 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3738 aff_combination_convert (&nit, steptype);
3739 aff_combination_mult (&nit, &step, &delta);
3740 if (stmt_after_increment (loop, cand, at))
3741 aff_combination_add (&delta, &step);
3742
3743 tree_to_aff_combination (iv->base, type, val);
3744 aff_combination_add (val, &delta);
3745}
3746