contrib/gcc-8.0/gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 170                            reference.  */
 171   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 172                            cases where the expansion of the function
 173                            will turn the argument into a normal address.  */
 174   USE_COMPARE           /* Use is a compare.  */
 175 };
 176
 177 /* Cost of a computation.  */
 178 struct comp_cost
 179 {
 180   comp_cost (): cost (0), complexity (0), scratch (0)
 181   {}
 182
 183   comp_cost (int cost, unsigned complexity, int scratch = 0)
 184     : cost (cost), complexity (complexity), scratch (scratch)
 185   {}
 186
 187   /* Returns true if COST is infinite.  */
 188   bool infinite_cost_p ();
 189
 190   /* Adds costs COST1 and COST2.  */
 191   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 192
 193   /* Adds COST to the comp_cost.  */
 194   comp_cost operator+= (comp_cost cost);
 195
 196   /* Adds constant C to this comp_cost.  */
 197   comp_cost operator+= (HOST_WIDE_INT c);
 198
 199   /* Subtracts constant C to this comp_cost.  */
 200   comp_cost operator-= (HOST_WIDE_INT c);
 201
 202   /* Divide the comp_cost by constant C.  */
 203   comp_cost operator/= (HOST_WIDE_INT c);
 204
 205   /* Multiply the comp_cost by constant C.  */
 206   comp_cost operator*= (HOST_WIDE_INT c);
 207
 208   /* Subtracts costs COST1 and COST2.  */
 209   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 210
 211   /* Subtracts COST from this comp_cost.  */
 212   comp_cost operator-= (comp_cost cost);
 213
 214   /* Returns true if COST1 is smaller than COST2.  */
 215   friend bool operator< (comp_cost cost1, comp_cost cost2);
 216
 217   /* Returns true if COST1 and COST2 are equal.  */
 218   friend bool operator== (comp_cost cost1, comp_cost cost2);
 219
 220   /* Returns true if COST1 is smaller or equal than COST2.  */
 221   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 222
 223   int cost;             /* The runtime cost.  */
 224   unsigned complexity;  /* The estimate of the complexity of the code for
 225                            the computation (in no concrete units --
 226                            complexity field should be larger for more
 227                            complex expressions and addressing modes).  */
 228   int scratch;          /* Scratch used during cost computation.  */
 229 };
 230
 231 static const comp_cost no_cost;
 232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 233
 234 bool
 235 comp_cost::infinite_cost_p ()
 236 {
 237   return cost == INFTY;
 238 }
 239
 240 comp_cost
 241 operator+ (comp_cost cost1, comp_cost cost2)
 242 {
 243   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 244     return infinite_cost;
 245
 246   cost1.cost += cost2.cost;
 247   cost1.complexity += cost2.complexity;
 248
 249   return cost1;
 250 }
 251
 252 comp_cost
 253 operator- (comp_cost cost1, comp_cost cost2)
 254 {
 255   if (cost1.infinite_cost_p ())
 256     return infinite_cost;
 257
 258   gcc_assert (!cost2.infinite_cost_p ());
 259
 260   cost1.cost -= cost2.cost;
 261   cost1.complexity -= cost2.complexity;
 262
 263   return cost1;
 264 }
 265
 266 comp_cost
 267 comp_cost::operator+= (comp_cost cost)
 268 {
 269   *this = *this + cost;
 270   return *this;
 271 }
 272
 273 comp_cost
 274 comp_cost::operator+= (HOST_WIDE_INT c)
 275 {
 276   if (infinite_cost_p ())
 277     return *this;
 278
 279   this->cost += c;
 280
 281   return *this;
 282 }
 283
 284 comp_cost
 285 comp_cost::operator-= (HOST_WIDE_INT c)
 286 {
 287   if (infinite_cost_p ())
 288     return *this;
 289
 290   this->cost -= c;
 291
 292   return *this;
 293 }
 294
 295 comp_cost
 296 comp_cost::operator/= (HOST_WIDE_INT c)
 297 {
 298   if (infinite_cost_p ())
 299     return *this;
 300
 301   this->cost /= c;
 302
 303   return *this;
 304 }
 305
 306 comp_cost
 307 comp_cost::operator*= (HOST_WIDE_INT c)
 308 {
 309   if (infinite_cost_p ())
 310     return *this;
 311
 312   this->cost *= c;
 313
 314   return *this;
 315 }
 316
 317 comp_cost
 318 comp_cost::operator-= (comp_cost cost)
 319 {
 320   *this = *this - cost;
 321   return *this;
 322 }
 323
 324 bool
 325 operator< (comp_cost cost1, comp_cost cost2)
 326 {
 327   if (cost1.cost == cost2.cost)
 328     return cost1.complexity < cost2.complexity;
 329
 330   return cost1.cost < cost2.cost;
 331 }
 332
 333 bool
 334 operator== (comp_cost cost1, comp_cost cost2)
 335 {
 336   return cost1.cost == cost2.cost
 337     && cost1.complexity == cost2.complexity;
 338 }
 339
 340 bool
 341 operator<= (comp_cost cost1, comp_cost cost2)
 342 {
 343   return cost1 < cost2 || cost1 == cost2;
 344 }
 345
 346 struct iv_inv_expr_ent;
 347
 348 /* The candidate - cost pair.  */
 349 struct cost_pair
 350 {
 351   struct iv_cand *cand; /* The candidate.  */
 352   comp_cost cost;       /* The cost.  */
 353   enum tree_code comp;  /* For iv elimination, the comparison.  */
 354   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 355                            preserved when representing iv_use with iv_cand.  */
 356   bitmap inv_exprs;     /* The list of newly created invariant expressions
 357                            when representing iv_use with iv_cand.  */
 358   tree value;           /* For final value elimination, the expression for
 359                            the final value of the iv.  For iv elimination,
 360                            the new bound to compare with.  */
 361 };
 362
 363 /* Use.  */
 364 struct iv_use
 365 {
 366   unsigned id;          /* The id of the use.  */
 367   unsigned group_id;    /* The group id the use belongs to.  */
 368   enum use_type type;   /* Type of the use.  */
 369   tree mem_type;        /* The memory type to use when testing whether an
 370                            address is legitimate, and what the address's
 371                            cost is.  */
 372   struct iv *iv;        /* The induction variable it is based on.  */
 373   gimple *stmt;         /* Statement in that it occurs.  */
 374   tree *op_p;           /* The place where it occurs.  */
 375
 376   tree addr_base;       /* Base address with const offset stripped.  */
 377   poly_uint64_pod addr_offset;
 378                         /* Const offset stripped from base address.  */
 379 };
 380
 381 /* Group of uses.  */
 382 struct iv_group
 383 {
 384   /* The id of the group.  */
 385   unsigned id;
 386   /* Uses of the group are of the same type.  */
 387   enum use_type type;
 388   /* The set of "related" IV candidates, plus the important ones.  */
 389   bitmap related_cands;
 390   /* Number of IV candidates in the cost_map.  */
 391   unsigned n_map_members;
 392   /* The costs wrto the iv candidates.  */
 393   struct cost_pair *cost_map;
 394   /* The selected candidate for the group.  */
 395   struct iv_cand *selected;
 396   /* Uses in the group.  */
 397   vec<struct iv_use *> vuses;
 398 };
 399
 400 /* The position where the iv is computed.  */
 401 enum iv_position
 402 {
 403   IP_NORMAL,            /* At the end, just before the exit condition.  */
 404   IP_END,               /* At the end of the latch block.  */
 405   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 406   IP_AFTER_USE,         /* Immediately after a specific use.  */
 407   IP_ORIGINAL           /* The original biv.  */
 408 };
 409
 410 /* The induction variable candidate.  */
 411 struct iv_cand
 412 {
 413   unsigned id;          /* The number of the candidate.  */
 414   bool important;       /* Whether this is an "important" candidate, i.e. such
 415                            that it should be considered by all uses.  */
 416   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 417   gimple *incremented_at;/* For original biv, the statement where it is
 418                            incremented.  */
 419   tree var_before;      /* The variable used for it before increment.  */
 420   tree var_after;       /* The variable used for it after increment.  */
 421   struct iv *iv;        /* The value of the candidate.  NULL for
 422                            "pseudocandidate" used to indicate the possibility
 423                            to replace the final value of an iv by direct
 424                            computation of the value.  */
 425   unsigned cost;        /* Cost of the candidate.  */
 426   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 427   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 428                               where it is incremented.  */
 429   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 430                            iv_cand.  */
 431   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 432                            hanlde it as a new invariant expression which will
 433                            be hoisted out of loop.  */
 434   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 435                            smaller type.  */
 436 };
 437
 438 /* Hashtable entry for common candidate derived from iv uses.  */
 439 struct iv_common_cand
 440 {
 441   tree base;
 442   tree step;
 443   /* IV uses from which this common candidate is derived.  */
 444   auto_vec<struct iv_use *> uses;
 445   hashval_t hash;
 446 };
 447
 448 /* Hashtable helpers.  */
 449
 450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 451 {
 452   static inline hashval_t hash (const iv_common_cand *);
 453   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 454 };
 455
 456 /* Hash function for possible common candidates.  */
 457
 458 inline hashval_t
 459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 460 {
 461   return ccand->hash;
 462 }
 463
 464 /* Hash table equality function for common candidates.  */
 465
 466 inline bool
 467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 468                               const iv_common_cand *ccand2)
 469 {
 470   return (ccand1->hash == ccand2->hash
 471           && operand_equal_p (ccand1->base, ccand2->base, 0)
 472           && operand_equal_p (ccand1->step, ccand2->step, 0)
 473           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 474               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 475 }
 476
 477 /* Loop invariant expression hashtable entry.  */
 478
 479 struct iv_inv_expr_ent
 480 {
 481   /* Tree expression of the entry.  */
 482   tree expr;
 483   /* Unique indentifier.  */
 484   int id;
 485   /* Hash value.  */
 486   hashval_t hash;
 487 };
 488
 489 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 490
 491 static int
 492 sort_iv_inv_expr_ent (const void *a, const void *b)
 493 {
 494   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 495   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 496
 497   unsigned id1 = (*e1)->id;
 498   unsigned id2 = (*e2)->id;
 499
 500   if (id1 < id2)
 501     return -1;
 502   else if (id1 > id2)
 503     return 1;
 504   else
 505     return 0;
 506 }
 507
 508 /* Hashtable helpers.  */
 509
 510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 511 {
 512   static inline hashval_t hash (const iv_inv_expr_ent *);
 513   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 514 };
 515
 516 /* Return true if uses of type TYPE represent some form of address.  */
 517
 518 inline bool
 519 address_p (use_type type)
 520 {
 521   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 522 }
 523
 524 /* Hash function for loop invariant expressions.  */
 525
 526 inline hashval_t
 527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 528 {
 529   return expr->hash;
 530 }
 531
 532 /* Hash table equality function for expressions.  */
 533
 534 inline bool
 535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 536                            const iv_inv_expr_ent *expr2)
 537 {
 538   return expr1->hash == expr2->hash
 539          && operand_equal_p (expr1->expr, expr2->expr, 0);
 540 }
 541
 542 struct ivopts_data
 543 {
 544   /* The currently optimized loop.  */
 545   struct loop *current_loop;
 546   source_location loop_loc;
 547
 548   /* Numbers of iterations for all exits of the current loop.  */
 549   hash_map<edge, tree_niter_desc *> *niters;
 550
 551   /* Number of registers used in it.  */
 552   unsigned regs_used;
 553
 554   /* The size of version_info array allocated.  */
 555   unsigned version_info_size;
 556
 557   /* The array of information for the ssa names.  */
 558   struct version_info *version_info;
 559
 560   /* The hashtable of loop invariant expressions created
 561      by ivopt.  */
 562   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 563
 564   /* The bitmap of indices in version_info whose value was changed.  */
 565   bitmap relevant;
 566
 567   /* The uses of induction variables.  */
 568   vec<iv_group *> vgroups;
 569
 570   /* The candidates.  */
 571   vec<iv_cand *> vcands;
 572
 573   /* A bitmap of important candidates.  */
 574   bitmap important_candidates;
 575
 576   /* Cache used by tree_to_aff_combination_expand.  */
 577   hash_map<tree, name_expansion *> *name_expansion_cache;
 578
 579   /* The hashtable of common candidates derived from iv uses.  */
 580   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 581
 582   /* The common candidates.  */
 583   vec<iv_common_cand *> iv_common_cands;
 584
 585   /* The maximum invariant variable id.  */
 586   unsigned max_inv_var_id;
 587
 588   /* The maximum invariant expression id.  */
 589   unsigned max_inv_expr_id;
 590
 591   /* Number of no_overflow BIVs which are not used in memory address.  */
 592   unsigned bivs_not_used_in_addr;
 593
 594   /* Obstack for iv structure.  */
 595   struct obstack iv_obstack;
 596
 597   /* Whether to consider just related and important candidates when replacing a
 598      use.  */
 599   bool consider_all_candidates;
 600
 601   /* Are we optimizing for speed?  */
 602   bool speed;
 603
 604   /* Whether the loop body includes any function calls.  */
 605   bool body_includes_call;
 606
 607   /* Whether the loop body can only be exited via single exit.  */
 608   bool loop_single_exit_p;
 609 };
 610
 611 /* An assignment of iv candidates to uses.  */
 612
 613 struct iv_ca
 614 {
 615   /* The number of uses covered by the assignment.  */
 616   unsigned upto;
 617
 618   /* Number of uses that cannot be expressed by the candidates in the set.  */
 619   unsigned bad_groups;
 620
 621   /* Candidate assigned to a use, together with the related costs.  */
 622   struct cost_pair **cand_for_group;
 623
 624   /* Number of times each candidate is used.  */
 625   unsigned *n_cand_uses;
 626
 627   /* The candidates used.  */
 628   bitmap cands;
 629
 630   /* The number of candidates in the set.  */
 631   unsigned n_cands;
 632
 633   /* The number of invariants needed, including both invariant variants and
 634      invariant expressions.  */
 635   unsigned n_invs;
 636
 637   /* Total cost of expressing uses.  */
 638   comp_cost cand_use_cost;
 639
 640   /* Total cost of candidates.  */
 641   unsigned cand_cost;
 642
 643   /* Number of times each invariant variable is used.  */
 644   unsigned *n_inv_var_uses;
 645
 646   /* Number of times each invariant expression is used.  */
 647   unsigned *n_inv_expr_uses;
 648
 649   /* Total cost of the assignment.  */
 650   comp_cost cost;
 651 };
 652
 653 /* Difference of two iv candidate assignments.  */
 654
 655 struct iv_ca_delta
 656 {
 657   /* Changed group.  */
 658   struct iv_group *group;
 659
 660   /* An old assignment (for rollback purposes).  */
 661   struct cost_pair *old_cp;
 662
 663   /* A new assignment.  */
 664   struct cost_pair *new_cp;
 665
 666   /* Next change in the list.  */
 667   struct iv_ca_delta *next;
 668 };
 669
 670 /* Bound on number of candidates below that all candidates are considered.  */
 671
 672 #define CONSIDER_ALL_CANDIDATES_BOUND \
 673   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 674
 675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 676    optimizing such a loop would help, and it would take ages).  */
 677
 678 #define MAX_CONSIDERED_GROUPS \
 679   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 680
 681 /* If there are at most this number of ivs in the set, try removing unnecessary
 682    ivs from the set always.  */
 683
 684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 685   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 686
 687 /* The list of trees for that the decl_rtl field must be reset is stored
 688    here.  */
 689
 690 static vec<tree> decl_rtl_to_reset;
 691
 692 static comp_cost force_expr_to_var_cost (tree, bool);
 693
 694 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 695
 696 edge
 697 single_dom_exit (struct loop *loop)
 698 {
 699   edge exit = single_exit (loop);
 700
 701   if (!exit)
 702     return NULL;
 703
 704   if (!just_once_each_iteration_p (loop, exit->src))
 705     return NULL;
 706
 707   return exit;
 708 }
 709
 710 /* Dumps information about the induction variable IV to FILE.  Don't dump
 711    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 712    preceding spaces indicated by INDENT_LEVEL.  */
 713
 714 void
 715 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 716 {
 717   const char *p;
 718   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 719
 720   if (indent_level > 4)
 721     indent_level = 4;
 722   p = spaces + 8 - (indent_level << 1);
 723
 724   fprintf (file, "%sIV struct:\n", p);
 725   if (iv->ssa_name && dump_name)
 726     {
 727       fprintf (file, "%s  SSA_NAME:\t", p);
 728       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 729       fprintf (file, "\n");
 730     }
 731
 732   fprintf (file, "%s  Type:\t", p);
 733   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 734   fprintf (file, "\n");
 735
 736   fprintf (file, "%s  Base:\t", p);
 737   print_generic_expr (file, iv->base, TDF_SLIM);
 738   fprintf (file, "\n");
 739
 740   fprintf (file, "%s  Step:\t", p);
 741   print_generic_expr (file, iv->step, TDF_SLIM);
 742   fprintf (file, "\n");
 743
 744   if (iv->base_object)
 745     {
 746       fprintf (file, "%s  Object:\t", p);
 747       print_generic_expr (file, iv->base_object, TDF_SLIM);
 748       fprintf (file, "\n");
 749     }
 750
 751   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 752
 753   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 754            p, iv->no_overflow ? "No-overflow" : "Overflow");
 755 }
 756
 757 /* Dumps information about the USE to FILE.  */
 758
 759 void
 760 dump_use (FILE *file, struct iv_use *use)
 761 {
 762   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 763   fprintf (file, "    At stmt:\t");
 764   print_gimple_stmt (file, use->stmt, 0);
 765   fprintf (file, "    At pos:\t");
 766   if (use->op_p)
 767     print_generic_expr (file, *use->op_p, TDF_SLIM);
 768   fprintf (file, "\n");
 769   dump_iv (file, use->iv, false, 2);
 770 }
 771
 772 /* Dumps information about the uses to FILE.  */
 773
 774 void
 775 dump_groups (FILE *file, struct ivopts_data *data)
 776 {
 777   unsigned i, j;
 778   struct iv_group *group;
 779
 780   for (i = 0; i < data->vgroups.length (); i++)
 781     {
 782       group = data->vgroups[i];
 783       fprintf (file, "Group %d:\n", group->id);
 784       if (group->type == USE_NONLINEAR_EXPR)
 785         fprintf (file, "  Type:\tGENERIC\n");
 786       else if (group->type == USE_REF_ADDRESS)
 787         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 788       else if (group->type == USE_PTR_ADDRESS)
 789         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 790       else
 791         {
 792           gcc_assert (group->type == USE_COMPARE);
 793           fprintf (file, "  Type:\tCOMPARE\n");
 794         }
 795       for (j = 0; j < group->vuses.length (); j++)
 796         dump_use (file, group->vuses[j]);
 797     }
 798 }
 799
 800 /* Dumps information about induction variable candidate CAND to FILE.  */
 801
 802 void
 803 dump_cand (FILE *file, struct iv_cand *cand)
 804 {
 805   struct iv *iv = cand->iv;
 806
 807   fprintf (file, "Candidate %d:\n", cand->id);
 808   if (cand->inv_vars)
 809     {
 810       fprintf (file, "  Depend on inv.vars: ");
 811       dump_bitmap (file, cand->inv_vars);
 812     }
 813   if (cand->inv_exprs)
 814     {
 815       fprintf (file, "  Depend on inv.exprs: ");
 816       dump_bitmap (file, cand->inv_exprs);
 817     }
 818
 819   if (cand->var_before)
 820     {
 821       fprintf (file, "  Var befor: ");
 822       print_generic_expr (file, cand->var_before, TDF_SLIM);
 823       fprintf (file, "\n");
 824     }
 825   if (cand->var_after)
 826     {
 827       fprintf (file, "  Var after: ");
 828       print_generic_expr (file, cand->var_after, TDF_SLIM);
 829       fprintf (file, "\n");
 830     }
 831
 832   switch (cand->pos)
 833     {
 834     case IP_NORMAL:
 835       fprintf (file, "  Incr POS: before exit test\n");
 836       break;
 837
 838     case IP_BEFORE_USE:
 839       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 840       break;
 841
 842     case IP_AFTER_USE:
 843       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 844       break;
 845
 846     case IP_END:
 847       fprintf (file, "  Incr POS: at end\n");
 848       break;
 849
 850     case IP_ORIGINAL:
 851       fprintf (file, "  Incr POS: orig biv\n");
 852       break;
 853     }
 854
 855   dump_iv (file, iv, false, 1);
 856 }
 857
 858 /* Returns the info for ssa version VER.  */
 859
 860 static inline struct version_info *
 861 ver_info (struct ivopts_data *data, unsigned ver)
 862 {
 863   return data->version_info + ver;
 864 }
 865
 866 /* Returns the info for ssa name NAME.  */
 867
 868 static inline struct version_info *
 869 name_info (struct ivopts_data *data, tree name)
 870 {
 871   return ver_info (data, SSA_NAME_VERSION (name));
 872 }
 873
 874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 875    emitted in LOOP.  */
 876
 877 static bool
 878 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 879 {
 880   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 881
 882   gcc_assert (bb);
 883
 884   if (sbb == loop->latch)
 885     return true;
 886
 887   if (sbb != bb)
 888     return false;
 889
 890   return stmt == last_stmt (bb);
 891 }
 892
 893 /* Returns true if STMT if after the place where the original induction
 894    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 895    if the positions are identical.  */
 896
 897 static bool
 898 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 899 {
 900   basic_block cand_bb = gimple_bb (cand->incremented_at);
 901   basic_block stmt_bb = gimple_bb (stmt);
 902
 903   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 904     return false;
 905
 906   if (stmt_bb != cand_bb)
 907     return true;
 908
 909   if (true_if_equal
 910       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 911     return true;
 912   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 913 }
 914
 915 /* Returns true if STMT if after the place where the induction variable
 916    CAND is incremented in LOOP.  */
 917
 918 static bool
 919 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 920 {
 921   switch (cand->pos)
 922     {
 923     case IP_END:
 924       return false;
 925
 926     case IP_NORMAL:
 927       return stmt_after_ip_normal_pos (loop, stmt);
 928
 929     case IP_ORIGINAL:
 930     case IP_AFTER_USE:
 931       return stmt_after_inc_pos (cand, stmt, false);
 932
 933     case IP_BEFORE_USE:
 934       return stmt_after_inc_pos (cand, stmt, true);
 935
 936     default:
 937       gcc_unreachable ();
 938     }
 939 }
 940
 941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 942
 943 static bool
 944 abnormal_ssa_name_p (tree exp)
 945 {
 946   if (!exp)
 947     return false;
 948
 949   if (TREE_CODE (exp) != SSA_NAME)
 950     return false;
 951
 952   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 953 }
 954
 955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 956    abnormal phi node.  Callback for for_each_index.  */
 957
 958 static bool
 959 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 960                                   void *data ATTRIBUTE_UNUSED)
 961 {
 962   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 963     {
 964       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 965         return false;
 966       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 967         return false;
 968     }
 969
 970   return !abnormal_ssa_name_p (*index);
 971 }
 972
 973 /* Returns true if EXPR contains a ssa name that occurs in an
 974    abnormal phi node.  */
 975
 976 bool
 977 contains_abnormal_ssa_name_p (tree expr)
 978 {
 979   enum tree_code code;
 980   enum tree_code_class codeclass;
 981
 982   if (!expr)
 983     return false;
 984
 985   code = TREE_CODE (expr);
 986   codeclass = TREE_CODE_CLASS (code);
 987
 988   if (code == SSA_NAME)
 989     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 990
 991   if (code == INTEGER_CST
 992       || is_gimple_min_invariant (expr))
 993     return false;
 994
 995   if (code == ADDR_EXPR)
 996     return !for_each_index (&TREE_OPERAND (expr, 0),
 997                             idx_contains_abnormal_ssa_name_p,
 998                             NULL);
 999
1000   if (code == COND_EXPR)
1001     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1002       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1003       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1004
1005   switch (codeclass)
1006     {
1007     case tcc_binary:
1008     case tcc_comparison:
1009       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1010         return true;
1011
1012       /* Fallthru.  */
1013     case tcc_unary:
1014       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1015         return true;
1016
1017       break;
1018
1019     default:
1020       gcc_unreachable ();
1021     }
1022
1023   return false;
1024 }
1025
1026 /*  Returns the structure describing number of iterations determined from
1027     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1028
1029 static struct tree_niter_desc *
1030 niter_for_exit (struct ivopts_data *data, edge exit)
1031 {
1032   struct tree_niter_desc *desc;
1033   tree_niter_desc **slot;
1034
1035   if (!data->niters)
1036     {
1037       data->niters = new hash_map<edge, tree_niter_desc *>;
1038       slot = NULL;
1039     }
1040   else
1041     slot = data->niters->get (exit);
1042
1043   if (!slot)
1044     {
1045       /* Try to determine number of iterations.  We cannot safely work with ssa
1046          names that appear in phi nodes on abnormal edges, so that we do not
1047          create overlapping life ranges for them (PR 27283).  */
1048       desc = XNEW (struct tree_niter_desc);
1049       if (!number_of_iterations_exit (data->current_loop,
1050                                       exit, desc, true)
1051           || contains_abnormal_ssa_name_p (desc->niter))
1052         {
1053           XDELETE (desc);
1054           desc = NULL;
1055         }
1056       data->niters->put (exit, desc);
1057     }
1058   else
1059     desc = *slot;
1060
1061   return desc;
1062 }
1063
1064 /* Returns the structure describing number of iterations determined from
1065    single dominating exit of DATA->current_loop, or NULL if something
1066    goes wrong.  */
1067
1068 static struct tree_niter_desc *
1069 niter_for_single_dom_exit (struct ivopts_data *data)
1070 {
1071   edge exit = single_dom_exit (data->current_loop);
1072
1073   if (!exit)
1074     return NULL;
1075
1076   return niter_for_exit (data, exit);
1077 }
1078
1079 /* Initializes data structures used by the iv optimization pass, stored
1080    in DATA.  */
1081
1082 static void
1083 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1084 {
1085   data->version_info_size = 2 * num_ssa_names;
1086   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1087   data->relevant = BITMAP_ALLOC (NULL);
1088   data->important_candidates = BITMAP_ALLOC (NULL);
1089   data->max_inv_var_id = 0;
1090   data->max_inv_expr_id = 0;
1091   data->niters = NULL;
1092   data->vgroups.create (20);
1093   data->vcands.create (20);
1094   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1095   data->name_expansion_cache = NULL;
1096   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1097   data->iv_common_cands.create (20);
1098   decl_rtl_to_reset.create (20);
1099   gcc_obstack_init (&data->iv_obstack);
1100 }
1101
1102 /* Returns a memory object to that EXPR points.  In case we are able to
1103    determine that it does not point to any such object, NULL is returned.  */
1104
1105 static tree
1106 determine_base_object (tree expr)
1107 {
1108   enum tree_code code = TREE_CODE (expr);
1109   tree base, obj;
1110
1111   /* If this is a pointer casted to any type, we need to determine
1112      the base object for the pointer; so handle conversions before
1113      throwing away non-pointer expressions.  */
1114   if (CONVERT_EXPR_P (expr))
1115     return determine_base_object (TREE_OPERAND (expr, 0));
1116
1117   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1118     return NULL_TREE;
1119
1120   switch (code)
1121     {
1122     case INTEGER_CST:
1123       return NULL_TREE;
1124
1125     case ADDR_EXPR:
1126       obj = TREE_OPERAND (expr, 0);
1127       base = get_base_address (obj);
1128
1129       if (!base)
1130         return expr;
1131
1132       if (TREE_CODE (base) == MEM_REF)
1133         return determine_base_object (TREE_OPERAND (base, 0));
1134
1135       return fold_convert (ptr_type_node,
1136                            build_fold_addr_expr (base));
1137
1138     case POINTER_PLUS_EXPR:
1139       return determine_base_object (TREE_OPERAND (expr, 0));
1140
1141     case PLUS_EXPR:
1142     case MINUS_EXPR:
1143       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1144       gcc_unreachable ();
1145
1146     default:
1147       if (POLY_INT_CST_P (expr))
1148         return NULL_TREE;
1149       return fold_convert (ptr_type_node, expr);
1150     }
1151 }
1152
1153 /* Return true if address expression with non-DECL_P operand appears
1154    in EXPR.  */
1155
1156 static bool
1157 contain_complex_addr_expr (tree expr)
1158 {
1159   bool res = false;
1160
1161   STRIP_NOPS (expr);
1162   switch (TREE_CODE (expr))
1163     {
1164     case POINTER_PLUS_EXPR:
1165     case PLUS_EXPR:
1166     case MINUS_EXPR:
1167       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1168       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1169       break;
1170
1171     case ADDR_EXPR:
1172       return (!DECL_P (TREE_OPERAND (expr, 0)));
1173
1174     default:
1175       return false;
1176     }
1177
1178   return res;
1179 }
1180
1181 /* Allocates an induction variable with given initial value BASE and step STEP
1182    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1183
1184 static struct iv *
1185 alloc_iv (struct ivopts_data *data, tree base, tree step,
1186           bool no_overflow = false)
1187 {
1188   tree expr = base;
1189   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1190                                               sizeof (struct iv));
1191   gcc_assert (step != NULL_TREE);
1192
1193   /* Lower address expression in base except ones with DECL_P as operand.
1194      By doing this:
1195        1) More accurate cost can be computed for address expressions;
1196        2) Duplicate candidates won't be created for bases in different
1197           forms, like &a[0] and &a.  */
1198   STRIP_NOPS (expr);
1199   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1200       || contain_complex_addr_expr (expr))
1201     {
1202       aff_tree comb;
1203       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1204       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1205     }
1206
1207   iv->base = base;
1208   iv->base_object = determine_base_object (base);
1209   iv->step = step;
1210   iv->biv_p = false;
1211   iv->nonlin_use = NULL;
1212   iv->ssa_name = NULL_TREE;
1213   if (!no_overflow
1214        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1215                               base, step))
1216     no_overflow = true;
1217   iv->no_overflow = no_overflow;
1218   iv->have_address_use = false;
1219
1220   return iv;
1221 }
1222
1223 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1224    doesn't overflow.  */
1225
1226 static void
1227 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1228         bool no_overflow)
1229 {
1230   struct version_info *info = name_info (data, iv);
1231
1232   gcc_assert (!info->iv);
1233
1234   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1235   info->iv = alloc_iv (data, base, step, no_overflow);
1236   info->iv->ssa_name = iv;
1237 }
1238
1239 /* Finds induction variable declaration for VAR.  */
1240
1241 static struct iv *
1242 get_iv (struct ivopts_data *data, tree var)
1243 {
1244   basic_block bb;
1245   tree type = TREE_TYPE (var);
1246
1247   if (!POINTER_TYPE_P (type)
1248       && !INTEGRAL_TYPE_P (type))
1249     return NULL;
1250
1251   if (!name_info (data, var)->iv)
1252     {
1253       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1254
1255       if (!bb
1256           || !flow_bb_inside_loop_p (data->current_loop, bb))
1257         set_iv (data, var, var, build_int_cst (type, 0), true);
1258     }
1259
1260   return name_info (data, var)->iv;
1261 }
1262
1263 /* Return the first non-invariant ssa var found in EXPR.  */
1264
1265 static tree
1266 extract_single_var_from_expr (tree expr)
1267 {
1268   int i, n;
1269   tree tmp;
1270   enum tree_code code;
1271
1272   if (!expr || is_gimple_min_invariant (expr))
1273     return NULL;
1274
1275   code = TREE_CODE (expr);
1276   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1277     {
1278       n = TREE_OPERAND_LENGTH (expr);
1279       for (i = 0; i < n; i++)
1280         {
1281           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1282
1283           if (tmp)
1284             return tmp;
1285         }
1286     }
1287   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1288 }
1289
1290 /* Finds basic ivs.  */
1291
1292 static bool
1293 find_bivs (struct ivopts_data *data)
1294 {
1295   gphi *phi;
1296   affine_iv iv;
1297   tree step, type, base, stop;
1298   bool found = false;
1299   struct loop *loop = data->current_loop;
1300   gphi_iterator psi;
1301
1302   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1303     {
1304       phi = psi.phi ();
1305
1306       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1307         continue;
1308
1309       if (virtual_operand_p (PHI_RESULT (phi)))
1310         continue;
1311
1312       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1313         continue;
1314
1315       if (integer_zerop (iv.step))
1316         continue;
1317
1318       step = iv.step;
1319       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1320       /* Stop expanding iv base at the first ssa var referred by iv step.
1321          Ideally we should stop at any ssa var, because that's expensive
1322          and unusual to happen, we just do it on the first one.
1323
1324          See PR64705 for the rationale.  */
1325       stop = extract_single_var_from_expr (step);
1326       base = expand_simple_operations (base, stop);
1327       if (contains_abnormal_ssa_name_p (base)
1328           || contains_abnormal_ssa_name_p (step))
1329         continue;
1330
1331       type = TREE_TYPE (PHI_RESULT (phi));
1332       base = fold_convert (type, base);
1333       if (step)
1334         {
1335           if (POINTER_TYPE_P (type))
1336             step = convert_to_ptrofftype (step);
1337           else
1338             step = fold_convert (type, step);
1339         }
1340
1341       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1342       found = true;
1343     }
1344
1345   return found;
1346 }
1347
1348 /* Marks basic ivs.  */
1349
1350 static void
1351 mark_bivs (struct ivopts_data *data)
1352 {
1353   gphi *phi;
1354   gimple *def;
1355   tree var;
1356   struct iv *iv, *incr_iv;
1357   struct loop *loop = data->current_loop;
1358   basic_block incr_bb;
1359   gphi_iterator psi;
1360
1361   data->bivs_not_used_in_addr = 0;
1362   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1363     {
1364       phi = psi.phi ();
1365
1366       iv = get_iv (data, PHI_RESULT (phi));
1367       if (!iv)
1368         continue;
1369
1370       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1371       def = SSA_NAME_DEF_STMT (var);
1372       /* Don't mark iv peeled from other one as biv.  */
1373       if (def
1374           && gimple_code (def) == GIMPLE_PHI
1375           && gimple_bb (def) == loop->header)
1376         continue;
1377
1378       incr_iv = get_iv (data, var);
1379       if (!incr_iv)
1380         continue;
1381
1382       /* If the increment is in the subloop, ignore it.  */
1383       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1384       if (incr_bb->loop_father != data->current_loop
1385           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1386         continue;
1387
1388       iv->biv_p = true;
1389       incr_iv->biv_p = true;
1390       if (iv->no_overflow)
1391         data->bivs_not_used_in_addr++;
1392       if (incr_iv->no_overflow)
1393         data->bivs_not_used_in_addr++;
1394     }
1395 }
1396
1397 /* Checks whether STMT defines a linear induction variable and stores its
1398    parameters to IV.  */
1399
1400 static bool
1401 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1402 {
1403   tree lhs, stop;
1404   struct loop *loop = data->current_loop;
1405
1406   iv->base = NULL_TREE;
1407   iv->step = NULL_TREE;
1408
1409   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1410     return false;
1411
1412   lhs = gimple_assign_lhs (stmt);
1413   if (TREE_CODE (lhs) != SSA_NAME)
1414     return false;
1415
1416   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1417     return false;
1418
1419   /* Stop expanding iv base at the first ssa var referred by iv step.
1420      Ideally we should stop at any ssa var, because that's expensive
1421      and unusual to happen, we just do it on the first one.
1422
1423      See PR64705 for the rationale.  */
1424   stop = extract_single_var_from_expr (iv->step);
1425   iv->base = expand_simple_operations (iv->base, stop);
1426   if (contains_abnormal_ssa_name_p (iv->base)
1427       || contains_abnormal_ssa_name_p (iv->step))
1428     return false;
1429
1430   /* If STMT could throw, then do not consider STMT as defining a GIV.
1431      While this will suppress optimizations, we can not safely delete this
1432      GIV and associated statements, even if it appears it is not used.  */
1433   if (stmt_could_throw_p (stmt))
1434     return false;
1435
1436   return true;
1437 }
1438
1439 /* Finds general ivs in statement STMT.  */
1440
1441 static void
1442 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1443 {
1444   affine_iv iv;
1445
1446   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1447     return;
1448
1449   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1450 }
1451
1452 /* Finds general ivs in basic block BB.  */
1453
1454 static void
1455 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1456 {
1457   gimple_stmt_iterator bsi;
1458
1459   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1460     find_givs_in_stmt (data, gsi_stmt (bsi));
1461 }
1462
1463 /* Finds general ivs.  */
1464
1465 static void
1466 find_givs (struct ivopts_data *data)
1467 {
1468   struct loop *loop = data->current_loop;
1469   basic_block *body = get_loop_body_in_dom_order (loop);
1470   unsigned i;
1471
1472   for (i = 0; i < loop->num_nodes; i++)
1473     find_givs_in_bb (data, body[i]);
1474   free (body);
1475 }
1476
1477 /* For each ssa name defined in LOOP determines whether it is an induction
1478    variable and if so, its initial value and step.  */
1479
1480 static bool
1481 find_induction_variables (struct ivopts_data *data)
1482 {
1483   unsigned i;
1484   bitmap_iterator bi;
1485
1486   if (!find_bivs (data))
1487     return false;
1488
1489   find_givs (data);
1490   mark_bivs (data);
1491
1492   if (dump_file && (dump_flags & TDF_DETAILS))
1493     {
1494       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1495
1496       if (niter)
1497         {
1498           fprintf (dump_file, "  number of iterations ");
1499           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1500           if (!integer_zerop (niter->may_be_zero))
1501             {
1502               fprintf (dump_file, "; zero if ");
1503               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1504             }
1505           fprintf (dump_file, "\n");
1506         };
1507
1508       fprintf (dump_file, "\n<Induction Vars>:\n");
1509       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1510         {
1511           struct version_info *info = ver_info (data, i);
1512           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1513             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1514         }
1515     }
1516
1517   return true;
1518 }
1519
1520 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1521    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1522    is the const offset stripped from IV base and MEM_TYPE is the type
1523    of the memory being addressed.  For uses of other types, ADDR_BASE
1524    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1525
1526 static struct iv_use *
1527 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1528             gimple *stmt, enum use_type type, tree mem_type,
1529             tree addr_base, poly_uint64 addr_offset)
1530 {
1531   struct iv_use *use = XCNEW (struct iv_use);
1532
1533   use->id = group->vuses.length ();
1534   use->group_id = group->id;
1535   use->type = type;
1536   use->mem_type = mem_type;
1537   use->iv = iv;
1538   use->stmt = stmt;
1539   use->op_p = use_p;
1540   use->addr_base = addr_base;
1541   use->addr_offset = addr_offset;
1542
1543   group->vuses.safe_push (use);
1544   return use;
1545 }
1546
1547 /* Checks whether OP is a loop-level invariant and if so, records it.
1548    NONLINEAR_USE is true if the invariant is used in a way we do not
1549    handle specially.  */
1550
1551 static void
1552 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1553 {
1554   basic_block bb;
1555   struct version_info *info;
1556
1557   if (TREE_CODE (op) != SSA_NAME
1558       || virtual_operand_p (op))
1559     return;
1560
1561   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1562   if (bb
1563       && flow_bb_inside_loop_p (data->current_loop, bb))
1564     return;
1565
1566   info = name_info (data, op);
1567   info->name = op;
1568   info->has_nonlin_use |= nonlinear_use;
1569   if (!info->inv_id)
1570     info->inv_id = ++data->max_inv_var_id;
1571   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1572 }
1573
1574 /* Record a group of TYPE.  */
1575
1576 static struct iv_group *
1577 record_group (struct ivopts_data *data, enum use_type type)
1578 {
1579   struct iv_group *group = XCNEW (struct iv_group);
1580
1581   group->id = data->vgroups.length ();
1582   group->type = type;
1583   group->related_cands = BITMAP_ALLOC (NULL);
1584   group->vuses.create (1);
1585
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597                   struct iv *iv, gimple *stmt, enum use_type type,
1598                   tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608
1609       addr_base = strip_offset (iv->base, &addr_offset);
1610       for (i = 0; i < data->vgroups.length (); i++)
1611         {
1612           struct iv_use *use;
1613
1614           group = data->vgroups[i];
1615           use = group->vuses[0];
1616           if (!address_p (use->type))
1617             continue;
1618
1619           /* Check if it has the same stripped base and step.  */
1620           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621               && operand_equal_p (iv->step, use->iv->step, 0)
1622               && operand_equal_p (addr_base, use->addr_base, 0))
1623             break;
1624         }
1625       if (i == data->vgroups.length ())
1626         group = NULL;
1627     }
1628
1629   if (!group)
1630     group = record_group (data, type);
1631
1632   return record_use (group, use_p, iv, stmt, type, mem_type,
1633                      addr_base, addr_offset);
1634 }
1635
1636 /* Checks whether the use OP is interesting and if so, records it.  */
1637
1638 static struct iv_use *
1639 find_interesting_uses_op (struct ivopts_data *data, tree op)
1640 {
1641   struct iv *iv;
1642   gimple *stmt;
1643   struct iv_use *use;
1644
1645   if (TREE_CODE (op) != SSA_NAME)
1646     return NULL;
1647
1648   iv = get_iv (data, op);
1649   if (!iv)
1650     return NULL;
1651
1652   if (iv->nonlin_use)
1653     {
1654       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655       return iv->nonlin_use;
1656     }
1657
1658   if (integer_zerop (iv->step))
1659     {
1660       record_invariant (data, op, true);
1661       return NULL;
1662     }
1663
1664   stmt = SSA_NAME_DEF_STMT (op);
1665   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666
1667   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668   iv->nonlin_use = use;
1669   return use;
1670 }
1671
1672 /* Indicate how compare type iv_use can be handled.  */
1673 enum comp_iv_rewrite
1674 {
1675   COMP_IV_NA,
1676   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1677   COMP_IV_EXPR,
1678   /* We may rewrite compare type iv_uses on both sides of comparison by
1679      expressing value of each iv_use.  */
1680   COMP_IV_EXPR_2,
1681   /* We may rewrite compare type iv_use by expressing value of the iv_use
1682      or by eliminating it with other iv_cand.  */
1683   COMP_IV_ELIM
1684 };
1685
1686 /* Given a condition in statement STMT, checks whether it is a compare
1687    of an induction variable and an invariant.  If this is the case,
1688    CONTROL_VAR is set to location of the iv, BOUND to the location of
1689    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690    induction variable descriptions, and true is returned.  If this is not
1691    the case, CONTROL_VAR and BOUND are set to the arguments of the
1692    condition and false is returned.  */
1693
1694 static enum comp_iv_rewrite
1695 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696                        tree **control_var, tree **bound,
1697                        struct iv **iv_var, struct iv **iv_bound)
1698 {
1699   /* The objects returned when COND has constant operands.  */
1700   static struct iv const_iv;
1701   static tree zero;
1702   tree *op0 = &zero, *op1 = &zero;
1703   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705
1706   if (gimple_code (stmt) == GIMPLE_COND)
1707     {
1708       gcond *cond_stmt = as_a <gcond *> (stmt);
1709       op0 = gimple_cond_lhs_ptr (cond_stmt);
1710       op1 = gimple_cond_rhs_ptr (cond_stmt);
1711     }
1712   else
1713     {
1714       op0 = gimple_assign_rhs1_ptr (stmt);
1715       op1 = gimple_assign_rhs2_ptr (stmt);
1716     }
1717
1718   zero = integer_zero_node;
1719   const_iv.step = integer_zero_node;
1720
1721   if (TREE_CODE (*op0) == SSA_NAME)
1722     iv0 = get_iv (data, *op0);
1723   if (TREE_CODE (*op1) == SSA_NAME)
1724     iv1 = get_iv (data, *op1);
1725
1726   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1727   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728     {
1729       rewrite_type = COMP_IV_EXPR_2;
1730       goto end;
1731     }
1732
1733   /* If none side of comparison is IV.  */
1734   if ((!iv0 || integer_zerop (iv0->step))
1735       && (!iv1 || integer_zerop (iv1->step)))
1736     goto end;
1737
1738   /* Control variable may be on the other side.  */
1739   if (!iv0 || integer_zerop (iv0->step))
1740     {
1741       std::swap (op0, op1);
1742       std::swap (iv0, iv1);
1743     }
1744   /* If one side is IV and the other side isn't loop invariant.  */
1745   if (!iv1)
1746     rewrite_type = COMP_IV_EXPR;
1747   /* If one side is IV and the other side is loop invariant.  */
1748   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749     rewrite_type = COMP_IV_ELIM;
1750
1751 end:
1752   if (control_var)
1753     *control_var = op0;
1754   if (iv_var)
1755     *iv_var = iv0;
1756   if (bound)
1757     *bound = op1;
1758   if (iv_bound)
1759     *iv_bound = iv1;
1760
1761   return rewrite_type;
1762 }
1763
1764 /* Checks whether the condition in STMT is interesting and if so,
1765    records it.  */
1766
1767 static void
1768 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769 {
1770   tree *var_p, *bound_p;
1771   struct iv *var_iv, *bound_iv;
1772   enum comp_iv_rewrite ret;
1773
1774   ret = extract_cond_operands (data, stmt,
1775                                &var_p, &bound_p, &var_iv, &bound_iv);
1776   if (ret == COMP_IV_NA)
1777     {
1778       find_interesting_uses_op (data, *var_p);
1779       find_interesting_uses_op (data, *bound_p);
1780       return;
1781     }
1782
1783   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784   /* Record compare type iv_use for iv on the other side of comparison.  */
1785   if (ret == COMP_IV_EXPR_2)
1786     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787 }
1788
1789 /* Returns the outermost loop EXPR is obviously invariant in
1790    relative to the loop LOOP, i.e. if all its operands are defined
1791    outside of the returned loop.  Returns NULL if EXPR is not
1792    even obviously invariant in LOOP.  */
1793
1794 struct loop *
1795 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1796 {
1797   basic_block def_bb;
1798   unsigned i, len;
1799
1800   if (is_gimple_min_invariant (expr))
1801     return current_loops->tree_root;
1802
1803   if (TREE_CODE (expr) == SSA_NAME)
1804     {
1805       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806       if (def_bb)
1807         {
1808           if (flow_bb_inside_loop_p (loop, def_bb))
1809             return NULL;
1810           return superloop_at_depth (loop,
1811                                      loop_depth (def_bb->loop_father) + 1);
1812         }
1813
1814       return current_loops->tree_root;
1815     }
1816
1817   if (!EXPR_P (expr))
1818     return NULL;
1819
1820   unsigned maxdepth = 0;
1821   len = TREE_OPERAND_LENGTH (expr);
1822   for (i = 0; i < len; i++)
1823     {
1824       struct loop *ivloop;
1825       if (!TREE_OPERAND (expr, i))
1826         continue;
1827
1828       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829       if (!ivloop)
1830         return NULL;
1831       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832     }
1833
1834   return superloop_at_depth (loop, maxdepth);
1835 }
1836
1837 /* Returns true if expression EXPR is obviously invariant in LOOP,
1838    i.e. if all its operands are defined outside of the LOOP.  LOOP
1839    should not be the function body.  */
1840
1841 bool
1842 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1843 {
1844   basic_block def_bb;
1845   unsigned i, len;
1846
1847   gcc_assert (loop_depth (loop) > 0);
1848
1849   if (is_gimple_min_invariant (expr))
1850     return true;
1851
1852   if (TREE_CODE (expr) == SSA_NAME)
1853     {
1854       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855       if (def_bb
1856           && flow_bb_inside_loop_p (loop, def_bb))
1857         return false;
1858
1859       return true;
1860     }
1861
1862   if (!EXPR_P (expr))
1863     return false;
1864
1865   len = TREE_OPERAND_LENGTH (expr);
1866   for (i = 0; i < len; i++)
1867     if (TREE_OPERAND (expr, i)
1868         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869       return false;
1870
1871   return true;
1872 }
1873
1874 /* Given expression EXPR which computes inductive values with respect
1875    to loop recorded in DATA, this function returns biv from which EXPR
1876    is derived by tracing definition chains of ssa variables in EXPR.  */
1877
1878 static struct iv*
1879 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880 {
1881   struct iv *iv;
1882   unsigned i, n;
1883   tree e2, e1;
1884   enum tree_code code;
1885   gimple *stmt;
1886
1887   if (expr == NULL_TREE)
1888     return NULL;
1889
1890   if (is_gimple_min_invariant (expr))
1891     return NULL;
1892
1893   code = TREE_CODE (expr);
1894   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895     {
1896       n = TREE_OPERAND_LENGTH (expr);
1897       for (i = 0; i < n; i++)
1898         {
1899           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900           if (iv)
1901             return iv;
1902         }
1903     }
1904
1905   /* Stop if it's not ssa name.  */
1906   if (code != SSA_NAME)
1907     return NULL;
1908
1909   iv = get_iv (data, expr);
1910   if (!iv || integer_zerop (iv->step))
1911     return NULL;
1912   else if (iv->biv_p)
1913     return iv;
1914
1915   stmt = SSA_NAME_DEF_STMT (expr);
1916   if (gphi *phi = dyn_cast <gphi *> (stmt))
1917     {
1918       ssa_op_iter iter;
1919       use_operand_p use_p;
1920       basic_block phi_bb = gimple_bb (phi);
1921
1922       /* Skip loop header PHI that doesn't define biv.  */
1923       if (phi_bb->loop_father == data->current_loop)
1924         return NULL;
1925
1926       if (virtual_operand_p (gimple_phi_result (phi)))
1927         return NULL;
1928
1929       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930         {
1931           tree use = USE_FROM_PTR (use_p);
1932           iv = find_deriving_biv_for_expr (data, use);
1933           if (iv)
1934             return iv;
1935         }
1936       return NULL;
1937     }
1938   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939     return NULL;
1940
1941   e1 = gimple_assign_rhs1 (stmt);
1942   code = gimple_assign_rhs_code (stmt);
1943   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944     return find_deriving_biv_for_expr (data, e1);
1945
1946   switch (code)
1947     {
1948     case MULT_EXPR:
1949     case PLUS_EXPR:
1950     case MINUS_EXPR:
1951     case POINTER_PLUS_EXPR:
1952       /* Increments, decrements and multiplications by a constant
1953          are simple.  */
1954       e2 = gimple_assign_rhs2 (stmt);
1955       iv = find_deriving_biv_for_expr (data, e2);
1956       if (iv)
1957         return iv;
1958       gcc_fallthrough ();
1959
1960     CASE_CONVERT:
1961       /* Casts are simple.  */
1962       return find_deriving_biv_for_expr (data, e1);
1963
1964     default:
1965       break;
1966     }
1967
1968   return NULL;
1969 }
1970
1971 /* Record BIV, its predecessor and successor that they are used in
1972    address type uses.  */
1973
1974 static void
1975 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976 {
1977   unsigned i;
1978   tree type, base_1, base_2;
1979   bitmap_iterator bi;
1980
1981   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982       || biv->have_address_use || !biv->no_overflow)
1983     return;
1984
1985   type = TREE_TYPE (biv->base);
1986   if (!INTEGRAL_TYPE_P (type))
1987     return;
1988
1989   biv->have_address_use = true;
1990   data->bivs_not_used_in_addr--;
1991   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993     {
1994       struct iv *iv = ver_info (data, i)->iv;
1995
1996       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997           || iv->have_address_use || !iv->no_overflow)
1998         continue;
1999
2000       if (type != TREE_TYPE (iv->base)
2001           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002         continue;
2003
2004       if (!operand_equal_p (biv->step, iv->step, 0))
2005         continue;
2006
2007       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008       if (operand_equal_p (base_1, iv->base, 0)
2009           || operand_equal_p (base_2, biv->base, 0))
2010         {
2011           iv->have_address_use = true;
2012           data->bivs_not_used_in_addr--;
2013         }
2014     }
2015 }
2016
2017 /* Cumulates the steps of indices into DATA and replaces their values with the
2018    initial ones.  Returns false when the value of the index cannot be determined.
2019    Callback for for_each_index.  */
2020
2021 struct ifs_ivopts_data
2022 {
2023   struct ivopts_data *ivopts_data;
2024   gimple *stmt;
2025   tree step;
2026 };
2027
2028 static bool
2029 idx_find_step (tree base, tree *idx, void *data)
2030 {
2031   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032   struct iv *iv;
2033   bool use_overflow_semantics = false;
2034   tree step, iv_base, iv_step, lbound, off;
2035   struct loop *loop = dta->ivopts_data->current_loop;
2036
2037   /* If base is a component ref, require that the offset of the reference
2038      be invariant.  */
2039   if (TREE_CODE (base) == COMPONENT_REF)
2040     {
2041       off = component_ref_field_offset (base);
2042       return expr_invariant_in_loop_p (loop, off);
2043     }
2044
2045   /* If base is array, first check whether we will be able to move the
2046      reference out of the loop (in order to take its address in strength
2047      reduction).  In order for this to work we need both lower bound
2048      and step to be loop invariants.  */
2049   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050     {
2051       /* Moreover, for a range, the size needs to be invariant as well.  */
2052       if (TREE_CODE (base) == ARRAY_RANGE_REF
2053           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054         return false;
2055
2056       step = array_ref_element_size (base);
2057       lbound = array_ref_low_bound (base);
2058
2059       if (!expr_invariant_in_loop_p (loop, step)
2060           || !expr_invariant_in_loop_p (loop, lbound))
2061         return false;
2062     }
2063
2064   if (TREE_CODE (*idx) != SSA_NAME)
2065     return true;
2066
2067   iv = get_iv (dta->ivopts_data, *idx);
2068   if (!iv)
2069     return false;
2070
2071   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2072           *&x[0], which is not folded and does not trigger the
2073           ARRAY_REF path below.  */
2074   *idx = iv->base;
2075
2076   if (integer_zerop (iv->step))
2077     return true;
2078
2079   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080     {
2081       step = array_ref_element_size (base);
2082
2083       /* We only handle addresses whose step is an integer constant.  */
2084       if (TREE_CODE (step) != INTEGER_CST)
2085         return false;
2086     }
2087   else
2088     /* The step for pointer arithmetics already is 1 byte.  */
2089     step = size_one_node;
2090
2091   iv_base = iv->base;
2092   iv_step = iv->step;
2093   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094     use_overflow_semantics = true;
2095
2096   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097                             sizetype, &iv_base, &iv_step, dta->stmt,
2098                             use_overflow_semantics))
2099     {
2100       /* The index might wrap.  */
2101       return false;
2102     }
2103
2104   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106
2107   if (dta->ivopts_data->bivs_not_used_in_addr)
2108     {
2109       if (!iv->biv_p)
2110         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111
2112       record_biv_for_address_use (dta->ivopts_data, iv);
2113     }
2114   return true;
2115 }
2116
2117 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2118    object is passed to it in DATA.  */
2119
2120 static bool
2121 idx_record_use (tree base, tree *idx,
2122                 void *vdata)
2123 {
2124   struct ivopts_data *data = (struct ivopts_data *) vdata;
2125   find_interesting_uses_op (data, *idx);
2126   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127     {
2128       find_interesting_uses_op (data, array_ref_element_size (base));
2129       find_interesting_uses_op (data, array_ref_low_bound (base));
2130     }
2131   return true;
2132 }
2133
2134 /* If we can prove that TOP = cst * BOT for some constant cst,
2135    store cst to MUL and return true.  Otherwise return false.
2136    The returned value is always sign-extended, regardless of the
2137    signedness of TOP and BOT.  */
2138
2139 static bool
2140 constant_multiple_of (tree top, tree bot, widest_int *mul)
2141 {
2142   tree mby;
2143   enum tree_code code;
2144   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2145   widest_int res, p0, p1;
2146
2147   STRIP_NOPS (top);
2148   STRIP_NOPS (bot);
2149
2150   if (operand_equal_p (top, bot, 0))
2151     {
2152       *mul = 1;
2153       return true;
2154     }
2155
2156   code = TREE_CODE (top);
2157   switch (code)
2158     {
2159     case MULT_EXPR:
2160       mby = TREE_OPERAND (top, 1);
2161       if (TREE_CODE (mby) != INTEGER_CST)
2162         return false;
2163
2164       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2165         return false;
2166
2167       *mul = wi::sext (res * wi::to_widest (mby), precision);
2168       return true;
2169
2170     case PLUS_EXPR:
2171     case MINUS_EXPR:
2172       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2173           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2174         return false;
2175
2176       if (code == MINUS_EXPR)
2177         p1 = -p1;
2178       *mul = wi::sext (p0 + p1, precision);
2179       return true;
2180
2181     case INTEGER_CST:
2182       if (TREE_CODE (bot) != INTEGER_CST)
2183         return false;
2184
2185       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2186       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2187       if (p1 == 0)
2188         return false;
2189       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2190       return res == 0;
2191
2192     default:
2193       if (POLY_INT_CST_P (top)
2194           && POLY_INT_CST_P (bot)
2195           && constant_multiple_p (wi::to_poly_widest (top),
2196                                   wi::to_poly_widest (bot), mul))
2197         return true;
2198
2199       return false;
2200     }
2201 }
2202
2203 /* Return true if memory reference REF with step STEP may be unaligned.  */
2204
2205 static bool
2206 may_be_unaligned_p (tree ref, tree step)
2207 {
2208   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2209      thus they are not misaligned.  */
2210   if (TREE_CODE (ref) == TARGET_MEM_REF)
2211     return false;
2212
2213   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2214   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2215     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2216
2217   unsigned HOST_WIDE_INT bitpos;
2218   unsigned int ref_align;
2219   get_object_alignment_1 (ref, &ref_align, &bitpos);
2220   if (ref_align < align
2221       || (bitpos % align) != 0
2222       || (bitpos % BITS_PER_UNIT) != 0)
2223     return true;
2224
2225   unsigned int trailing_zeros = tree_ctz (step);
2226   if (trailing_zeros < HOST_BITS_PER_INT
2227       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2228     return true;
2229
2230   return false;
2231 }
2232
2233 /* Return true if EXPR may be non-addressable.   */
2234
2235 bool
2236 may_be_nonaddressable_p (tree expr)
2237 {
2238   switch (TREE_CODE (expr))
2239     {
2240     case TARGET_MEM_REF:
2241       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2242          target, thus they are always addressable.  */
2243       return false;
2244
2245     case MEM_REF:
2246       /* Likewise for MEM_REFs, modulo the storage order.  */
2247       return REF_REVERSE_STORAGE_ORDER (expr);
2248
2249     case BIT_FIELD_REF:
2250       if (REF_REVERSE_STORAGE_ORDER (expr))
2251         return true;
2252       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2253
2254     case COMPONENT_REF:
2255       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2256         return true;
2257       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2258              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2259
2260     case ARRAY_REF:
2261     case ARRAY_RANGE_REF:
2262       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2263         return true;
2264       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2265
2266     case VIEW_CONVERT_EXPR:
2267       /* This kind of view-conversions may wrap non-addressable objects
2268          and make them look addressable.  After some processing the
2269          non-addressability may be uncovered again, causing ADDR_EXPRs
2270          of inappropriate objects to be built.  */
2271       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2272           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2273         return true;
2274       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2275
2276     CASE_CONVERT:
2277       return true;
2278
2279     default:
2280       break;
2281     }
2282
2283   return false;
2284 }
2285
2286 /* Finds addresses in *OP_P inside STMT.  */
2287
2288 static void
2289 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2290                                tree *op_p)
2291 {
2292   tree base = *op_p, step = size_zero_node;
2293   struct iv *civ;
2294   struct ifs_ivopts_data ifs_ivopts_data;
2295
2296   /* Do not play with volatile memory references.  A bit too conservative,
2297      perhaps, but safe.  */
2298   if (gimple_has_volatile_ops (stmt))
2299     goto fail;
2300
2301   /* Ignore bitfields for now.  Not really something terribly complicated
2302      to handle.  TODO.  */
2303   if (TREE_CODE (base) == BIT_FIELD_REF)
2304     goto fail;
2305
2306   base = unshare_expr (base);
2307
2308   if (TREE_CODE (base) == TARGET_MEM_REF)
2309     {
2310       tree type = build_pointer_type (TREE_TYPE (base));
2311       tree astep;
2312
2313       if (TMR_BASE (base)
2314           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2315         {
2316           civ = get_iv (data, TMR_BASE (base));
2317           if (!civ)
2318             goto fail;
2319
2320           TMR_BASE (base) = civ->base;
2321           step = civ->step;
2322         }
2323       if (TMR_INDEX2 (base)
2324           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2325         {
2326           civ = get_iv (data, TMR_INDEX2 (base));
2327           if (!civ)
2328             goto fail;
2329
2330           TMR_INDEX2 (base) = civ->base;
2331           step = civ->step;
2332         }
2333       if (TMR_INDEX (base)
2334           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2335         {
2336           civ = get_iv (data, TMR_INDEX (base));
2337           if (!civ)
2338             goto fail;
2339
2340           TMR_INDEX (base) = civ->base;
2341           astep = civ->step;
2342
2343           if (astep)
2344             {
2345               if (TMR_STEP (base))
2346                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2347
2348               step = fold_build2 (PLUS_EXPR, type, step, astep);
2349             }
2350         }
2351
2352       if (integer_zerop (step))
2353         goto fail;
2354       base = tree_mem_ref_addr (type, base);
2355     }
2356   else
2357     {
2358       ifs_ivopts_data.ivopts_data = data;
2359       ifs_ivopts_data.stmt = stmt;
2360       ifs_ivopts_data.step = size_zero_node;
2361       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2362           || integer_zerop (ifs_ivopts_data.step))
2363         goto fail;
2364       step = ifs_ivopts_data.step;
2365
2366       /* Check that the base expression is addressable.  This needs
2367          to be done after substituting bases of IVs into it.  */
2368       if (may_be_nonaddressable_p (base))
2369         goto fail;
2370
2371       /* Moreover, on strict alignment platforms, check that it is
2372          sufficiently aligned.  */
2373       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2374         goto fail;
2375
2376       base = build_fold_addr_expr (base);
2377
2378       /* Substituting bases of IVs into the base expression might
2379          have caused folding opportunities.  */
2380       if (TREE_CODE (base) == ADDR_EXPR)
2381         {
2382           tree *ref = &TREE_OPERAND (base, 0);
2383           while (handled_component_p (*ref))
2384             ref = &TREE_OPERAND (*ref, 0);
2385           if (TREE_CODE (*ref) == MEM_REF)
2386             {
2387               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2388                                       TREE_OPERAND (*ref, 0),
2389                                       TREE_OPERAND (*ref, 1));
2390               if (tem)
2391                 *ref = tem;
2392             }
2393         }
2394     }
2395
2396   civ = alloc_iv (data, base, step);
2397   /* Fail if base object of this memory reference is unknown.  */
2398   if (civ->base_object == NULL_TREE)
2399     goto fail;
2400
2401   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2402   return;
2403
2404 fail:
2405   for_each_index (op_p, idx_record_use, data);
2406 }
2407
2408 /* Finds and records invariants used in STMT.  */
2409
2410 static void
2411 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2412 {
2413   ssa_op_iter iter;
2414   use_operand_p use_p;
2415   tree op;
2416
2417   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2418     {
2419       op = USE_FROM_PTR (use_p);
2420       record_invariant (data, op, false);
2421     }
2422 }
2423
2424 /* CALL calls an internal function.  If operand *OP_P will become an
2425    address when the call is expanded, return the type of the memory
2426    being addressed, otherwise return null.  */
2427
2428 static tree
2429 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2430 {
2431   switch (gimple_call_internal_fn (call))
2432     {
2433     case IFN_MASK_LOAD:
2434       if (op_p == gimple_call_arg_ptr (call, 0))
2435         return TREE_TYPE (gimple_call_lhs (call));
2436       return NULL_TREE;
2437
2438     case IFN_MASK_STORE:
2439       if (op_p == gimple_call_arg_ptr (call, 0))
2440         return TREE_TYPE (gimple_call_arg (call, 3));
2441       return NULL_TREE;
2442
2443     default:
2444       return NULL_TREE;
2445     }
2446 }
2447
2448 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2449    Return true if the operand will become an address when STMT
2450    is expanded and record the associated address use if so.  */
2451
2452 static bool
2453 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2454                        struct iv *iv)
2455 {
2456   /* Fail if base object of this memory reference is unknown.  */
2457   if (iv->base_object == NULL_TREE)
2458     return false;
2459
2460   tree mem_type = NULL_TREE;
2461   if (gcall *call = dyn_cast <gcall *> (stmt))
2462     if (gimple_call_internal_p (call))
2463       mem_type = get_mem_type_for_internal_fn (call, op_p);
2464   if (mem_type)
2465     {
2466       iv = alloc_iv (data, iv->base, iv->step);
2467       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2468       return true;
2469     }
2470   return false;
2471 }
2472
2473 /* Finds interesting uses of induction variables in the statement STMT.  */
2474
2475 static void
2476 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2477 {
2478   struct iv *iv;
2479   tree op, *lhs, *rhs;
2480   ssa_op_iter iter;
2481   use_operand_p use_p;
2482   enum tree_code code;
2483
2484   find_invariants_stmt (data, stmt);
2485
2486   if (gimple_code (stmt) == GIMPLE_COND)
2487     {
2488       find_interesting_uses_cond (data, stmt);
2489       return;
2490     }
2491
2492   if (is_gimple_assign (stmt))
2493     {
2494       lhs = gimple_assign_lhs_ptr (stmt);
2495       rhs = gimple_assign_rhs1_ptr (stmt);
2496
2497       if (TREE_CODE (*lhs) == SSA_NAME)
2498         {
2499           /* If the statement defines an induction variable, the uses are not
2500              interesting by themselves.  */
2501
2502           iv = get_iv (data, *lhs);
2503
2504           if (iv && !integer_zerop (iv->step))
2505             return;
2506         }
2507
2508       code = gimple_assign_rhs_code (stmt);
2509       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2510           && (REFERENCE_CLASS_P (*rhs)
2511               || is_gimple_val (*rhs)))
2512         {
2513           if (REFERENCE_CLASS_P (*rhs))
2514             find_interesting_uses_address (data, stmt, rhs);
2515           else
2516             find_interesting_uses_op (data, *rhs);
2517
2518           if (REFERENCE_CLASS_P (*lhs))
2519             find_interesting_uses_address (data, stmt, lhs);
2520           return;
2521         }
2522       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2523         {
2524           find_interesting_uses_cond (data, stmt);
2525           return;
2526         }
2527
2528       /* TODO -- we should also handle address uses of type
2529
2530          memory = call (whatever);
2531
2532          and
2533
2534          call (memory).  */
2535     }
2536
2537   if (gimple_code (stmt) == GIMPLE_PHI
2538       && gimple_bb (stmt) == data->current_loop->header)
2539     {
2540       iv = get_iv (data, PHI_RESULT (stmt));
2541
2542       if (iv && !integer_zerop (iv->step))
2543         return;
2544     }
2545
2546   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2547     {
2548       op = USE_FROM_PTR (use_p);
2549
2550       if (TREE_CODE (op) != SSA_NAME)
2551         continue;
2552
2553       iv = get_iv (data, op);
2554       if (!iv)
2555         continue;
2556
2557       if (!find_address_like_use (data, stmt, use_p->use, iv))
2558         find_interesting_uses_op (data, op);
2559     }
2560 }
2561
2562 /* Finds interesting uses of induction variables outside of loops
2563    on loop exit edge EXIT.  */
2564
2565 static void
2566 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2567 {
2568   gphi *phi;
2569   gphi_iterator psi;
2570   tree def;
2571
2572   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2573     {
2574       phi = psi.phi ();
2575       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2576       if (!virtual_operand_p (def))
2577         find_interesting_uses_op (data, def);
2578     }
2579 }
2580
2581 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2582    mode for memory reference represented by USE.  */
2583
2584 static GTY (()) vec<rtx, va_gc> *addr_list;
2585
2586 static bool
2587 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2588 {
2589   rtx reg, addr;
2590   unsigned list_index;
2591   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2592   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2593
2594   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2595   if (list_index >= vec_safe_length (addr_list))
2596     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2597
2598   addr = (*addr_list)[list_index];
2599   if (!addr)
2600     {
2601       addr_mode = targetm.addr_space.address_mode (as);
2602       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2603       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2604       (*addr_list)[list_index] = addr;
2605     }
2606   else
2607     addr_mode = GET_MODE (addr);
2608
2609   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2610   return (memory_address_addr_space_p (mem_mode, addr, as));
2611 }
2612
2613 /* Comparison function to sort group in ascending order of addr_offset.  */
2614
2615 static int
2616 group_compare_offset (const void *a, const void *b)
2617 {
2618   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2619   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2620
2621   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2622 }
2623
2624 /* Check if small groups should be split.  Return true if no group
2625    contains more than two uses with distinct addr_offsets.  Return
2626    false otherwise.  We want to split such groups because:
2627
2628      1) Small groups don't have much benefit and may interfer with
2629         general candidate selection.
2630      2) Size for problem with only small groups is usually small and
2631         general algorithm can handle it well.
2632
2633    TODO -- Above claim may not hold when we want to merge memory
2634    accesses with conseuctive addresses.  */
2635
2636 static bool
2637 split_small_address_groups_p (struct ivopts_data *data)
2638 {
2639   unsigned int i, j, distinct = 1;
2640   struct iv_use *pre;
2641   struct iv_group *group;
2642
2643   for (i = 0; i < data->vgroups.length (); i++)
2644     {
2645       group = data->vgroups[i];
2646       if (group->vuses.length () == 1)
2647         continue;
2648
2649       gcc_assert (address_p (group->type));
2650       if (group->vuses.length () == 2)
2651         {
2652           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2653                                       group->vuses[1]->addr_offset) > 0)
2654             std::swap (group->vuses[0], group->vuses[1]);
2655         }
2656       else
2657         group->vuses.qsort (group_compare_offset);
2658
2659       if (distinct > 2)
2660         continue;
2661
2662       distinct = 1;
2663       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2664         {
2665           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2666             {
2667               pre = group->vuses[j];
2668               distinct++;
2669             }
2670
2671           if (distinct > 2)
2672             break;
2673         }
2674     }
2675
2676   return (distinct <= 2);
2677 }
2678
2679 /* For each group of address type uses, this function further groups
2680    these uses according to the maximum offset supported by target's
2681    [base + offset] addressing mode.  */
2682
2683 static void
2684 split_address_groups (struct ivopts_data *data)
2685 {
2686   unsigned int i, j;
2687   /* Always split group.  */
2688   bool split_p = split_small_address_groups_p (data);
2689
2690   for (i = 0; i < data->vgroups.length (); i++)
2691     {
2692       struct iv_group *new_group = NULL;
2693       struct iv_group *group = data->vgroups[i];
2694       struct iv_use *use = group->vuses[0];
2695
2696       use->id = 0;
2697       use->group_id = group->id;
2698       if (group->vuses.length () == 1)
2699         continue;
2700
2701       gcc_assert (address_p (use->type));
2702
2703       for (j = 1; j < group->vuses.length ();)
2704         {
2705           struct iv_use *next = group->vuses[j];
2706           poly_int64 offset = next->addr_offset - use->addr_offset;
2707
2708           /* Split group if aksed to, or the offset against the first
2709              use can't fit in offset part of addressing mode.  IV uses
2710              having the same offset are still kept in one group.  */
2711           if (maybe_ne (offset, 0)
2712               && (split_p || !addr_offset_valid_p (use, offset)))
2713             {
2714               if (!new_group)
2715                 new_group = record_group (data, group->type);
2716               group->vuses.ordered_remove (j);
2717               new_group->vuses.safe_push (next);
2718               continue;
2719             }
2720
2721           next->id = j;
2722           next->group_id = group->id;
2723           j++;
2724         }
2725     }
2726 }
2727
2728 /* Finds uses of the induction variables that are interesting.  */
2729
2730 static void
2731 find_interesting_uses (struct ivopts_data *data)
2732 {
2733   basic_block bb;
2734   gimple_stmt_iterator bsi;
2735   basic_block *body = get_loop_body (data->current_loop);
2736   unsigned i;
2737   edge e;
2738
2739   for (i = 0; i < data->current_loop->num_nodes; i++)
2740     {
2741       edge_iterator ei;
2742       bb = body[i];
2743
2744       FOR_EACH_EDGE (e, ei, bb->succs)
2745         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2746             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2747           find_interesting_uses_outside (data, e);
2748
2749       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2750         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2751       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2752         if (!is_gimple_debug (gsi_stmt (bsi)))
2753           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2754     }
2755   free (body);
2756
2757   split_address_groups (data);
2758
2759   if (dump_file && (dump_flags & TDF_DETAILS))
2760     {
2761       fprintf (dump_file, "\n<IV Groups>:\n");
2762       dump_groups (dump_file, data);
2763       fprintf (dump_file, "\n");
2764     }
2765 }
2766
2767 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2768    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2769    we are at the top-level of the processed address.  */
2770
2771 static tree
2772 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2773                 poly_int64 *offset)
2774 {
2775   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2776   enum tree_code code;
2777   tree type, orig_type = TREE_TYPE (expr);
2778   poly_int64 off0, off1;
2779   HOST_WIDE_INT st;
2780   tree orig_expr = expr;
2781
2782   STRIP_NOPS (expr);
2783
2784   type = TREE_TYPE (expr);
2785   code = TREE_CODE (expr);
2786   *offset = 0;
2787
2788   switch (code)
2789     {
2790     case POINTER_PLUS_EXPR:
2791     case PLUS_EXPR:
2792     case MINUS_EXPR:
2793       op0 = TREE_OPERAND (expr, 0);
2794       op1 = TREE_OPERAND (expr, 1);
2795
2796       op0 = strip_offset_1 (op0, false, false, &off0);
2797       op1 = strip_offset_1 (op1, false, false, &off1);
2798
2799       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2800       if (op0 == TREE_OPERAND (expr, 0)
2801           && op1 == TREE_OPERAND (expr, 1))
2802         return orig_expr;
2803
2804       if (integer_zerop (op1))
2805         expr = op0;
2806       else if (integer_zerop (op0))
2807         {
2808           if (code == MINUS_EXPR)
2809             expr = fold_build1 (NEGATE_EXPR, type, op1);
2810           else
2811             expr = op1;
2812         }
2813       else
2814         expr = fold_build2 (code, type, op0, op1);
2815
2816       return fold_convert (orig_type, expr);
2817
2818     case MULT_EXPR:
2819       op1 = TREE_OPERAND (expr, 1);
2820       if (!cst_and_fits_in_hwi (op1))
2821         return orig_expr;
2822
2823       op0 = TREE_OPERAND (expr, 0);
2824       op0 = strip_offset_1 (op0, false, false, &off0);
2825       if (op0 == TREE_OPERAND (expr, 0))
2826         return orig_expr;
2827
2828       *offset = off0 * int_cst_value (op1);
2829       if (integer_zerop (op0))
2830         expr = op0;
2831       else
2832         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2833
2834       return fold_convert (orig_type, expr);
2835
2836     case ARRAY_REF:
2837     case ARRAY_RANGE_REF:
2838       if (!inside_addr)
2839         return orig_expr;
2840
2841       step = array_ref_element_size (expr);
2842       if (!cst_and_fits_in_hwi (step))
2843         break;
2844
2845       st = int_cst_value (step);
2846       op1 = TREE_OPERAND (expr, 1);
2847       op1 = strip_offset_1 (op1, false, false, &off1);
2848       *offset = off1 * st;
2849
2850       if (top_compref
2851           && integer_zerop (op1))
2852         {
2853           /* Strip the component reference completely.  */
2854           op0 = TREE_OPERAND (expr, 0);
2855           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2856           *offset += off0;
2857           return op0;
2858         }
2859       break;
2860
2861     case COMPONENT_REF:
2862       {
2863         tree field;
2864
2865         if (!inside_addr)
2866           return orig_expr;
2867
2868         tmp = component_ref_field_offset (expr);
2869         field = TREE_OPERAND (expr, 1);
2870         if (top_compref
2871             && cst_and_fits_in_hwi (tmp)
2872             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2873           {
2874             HOST_WIDE_INT boffset, abs_off;
2875
2876             /* Strip the component reference completely.  */
2877             op0 = TREE_OPERAND (expr, 0);
2878             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2879             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2880             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2881             if (boffset < 0)
2882               abs_off = -abs_off;
2883
2884             *offset = off0 + int_cst_value (tmp) + abs_off;
2885             return op0;
2886           }
2887       }
2888       break;
2889
2890     case ADDR_EXPR:
2891       op0 = TREE_OPERAND (expr, 0);
2892       op0 = strip_offset_1 (op0, true, true, &off0);
2893       *offset += off0;
2894
2895       if (op0 == TREE_OPERAND (expr, 0))
2896         return orig_expr;
2897
2898       expr = build_fold_addr_expr (op0);
2899       return fold_convert (orig_type, expr);
2900
2901     case MEM_REF:
2902       /* ???  Offset operand?  */
2903       inside_addr = false;
2904       break;
2905
2906     default:
2907       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2908         return build_int_cst (orig_type, 0);
2909       return orig_expr;
2910     }
2911
2912   /* Default handling of expressions for that we want to recurse into
2913      the first operand.  */
2914   op0 = TREE_OPERAND (expr, 0);
2915   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2916   *offset += off0;
2917
2918   if (op0 == TREE_OPERAND (expr, 0)
2919       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2920     return orig_expr;
2921
2922   expr = copy_node (expr);
2923   TREE_OPERAND (expr, 0) = op0;
2924   if (op1)
2925     TREE_OPERAND (expr, 1) = op1;
2926
2927   /* Inside address, we might strip the top level component references,
2928      thus changing type of the expression.  Handling of ADDR_EXPR
2929      will fix that.  */
2930   expr = fold_convert (orig_type, expr);
2931
2932   return expr;
2933 }
2934
2935 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2936
2937 tree
2938 strip_offset (tree expr, poly_uint64_pod *offset)
2939 {
2940   poly_int64 off;
2941   tree core = strip_offset_1 (expr, false, false, &off);
2942   *offset = off;
2943   return core;
2944 }
2945
2946 /* Returns variant of TYPE that can be used as base for different uses.
2947    We return unsigned type with the same precision, which avoids problems
2948    with overflows.  */
2949
2950 static tree
2951 generic_type_for (tree type)
2952 {
2953   if (POINTER_TYPE_P (type))
2954     return unsigned_type_for (type);
2955
2956   if (TYPE_UNSIGNED (type))
2957     return type;
2958
2959   return unsigned_type_for (type);
2960 }
2961
2962 /* Private data for walk_tree.  */
2963
2964 struct walk_tree_data
2965 {
2966   bitmap *inv_vars;
2967   struct ivopts_data *idata;
2968 };
2969
2970 /* Callback function for walk_tree, it records invariants and symbol
2971    reference in *EXPR_P.  DATA is the structure storing result info.  */
2972
2973 static tree
2974 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2975 {
2976   tree op = *expr_p;
2977   struct version_info *info;
2978   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2979
2980   if (TREE_CODE (op) != SSA_NAME)
2981     return NULL_TREE;
2982
2983   info = name_info (wdata->idata, op);
2984   /* Because we expand simple operations when finding IVs, loop invariant
2985      variable that isn't referred by the original loop could be used now.
2986      Record such invariant variables here.  */
2987   if (!info->iv)
2988     {
2989       struct ivopts_data *idata = wdata->idata;
2990       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2991
2992       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2993         {
2994           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2995           record_invariant (idata, op, false);
2996         }
2997     }
2998   if (!info->inv_id || info->has_nonlin_use)
2999     return NULL_TREE;
3000
3001   if (!*wdata->inv_vars)
3002     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3003   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3004
3005   return NULL_TREE;
3006 }
3007
3008 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3009    store it.  */
3010
3011 static inline void
3012 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3013 {
3014   struct walk_tree_data wdata;
3015
3016   if (!inv_vars)
3017     return;
3018
3019   wdata.idata = data;
3020   wdata.inv_vars = inv_vars;
3021   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3022 }
3023
3024 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3025    will be recorded if it doesn't exist yet.  Given below two exprs:
3026      inv_expr + cst1, inv_expr + cst2
3027    It's hard to make decision whether constant part should be stripped
3028    or not.  We choose to not strip based on below facts:
3029      1) We need to count ADD cost for constant part if it's stripped,
3030         which is't always trivial where this functions is called.
3031      2) Stripping constant away may be conflict with following loop
3032         invariant hoisting pass.
3033      3) Not stripping constant away results in more invariant exprs,
3034         which usually leads to decision preferring lower reg pressure.  */
3035
3036 static iv_inv_expr_ent *
3037 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3038 {
3039   STRIP_NOPS (inv_expr);
3040
3041   if (poly_int_tree_p (inv_expr)
3042       || TREE_CODE (inv_expr) == SSA_NAME)
3043     return NULL;
3044
3045   /* Don't strip constant part away as we used to.  */
3046
3047   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3048   struct iv_inv_expr_ent ent;
3049   ent.expr = inv_expr;
3050   ent.hash = iterative_hash_expr (inv_expr, 0);
3051   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3052
3053   if (!*slot)
3054     {
3055       *slot = XNEW (struct iv_inv_expr_ent);
3056       (*slot)->expr = inv_expr;
3057       (*slot)->hash = ent.hash;
3058       (*slot)->id = ++data->max_inv_expr_id;
3059     }
3060
3061   return *slot;
3062 }
3063
3064 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3065    position to POS.  If USE is not NULL, the candidate is set as related to
3066    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3067    replacement of the final value of the iv by a direct computation.  */
3068
3069 static struct iv_cand *
3070 add_candidate_1 (struct ivopts_data *data,
3071                  tree base, tree step, bool important, enum iv_position pos,
3072                  struct iv_use *use, gimple *incremented_at,
3073                  struct iv *orig_iv = NULL)
3074 {
3075   unsigned i;
3076   struct iv_cand *cand = NULL;
3077   tree type, orig_type;
3078
3079   gcc_assert (base && step);
3080
3081   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3082      live, but the ivopts code may replace a real pointer with one
3083      pointing before or after the memory block that is then adjusted
3084      into the memory block during the loop.  FIXME: It would likely be
3085      better to actually force the pointer live and still use ivopts;
3086      for example, it would be enough to write the pointer into memory
3087      and keep it there until after the loop.  */
3088   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3089     return NULL;
3090
3091   /* For non-original variables, make sure their values are computed in a type
3092      that does not invoke undefined behavior on overflows (since in general,
3093      we cannot prove that these induction variables are non-wrapping).  */
3094   if (pos != IP_ORIGINAL)
3095     {
3096       orig_type = TREE_TYPE (base);
3097       type = generic_type_for (orig_type);
3098       if (type != orig_type)
3099         {
3100           base = fold_convert (type, base);
3101           step = fold_convert (type, step);
3102         }
3103     }
3104
3105   for (i = 0; i < data->vcands.length (); i++)
3106     {
3107       cand = data->vcands[i];
3108
3109       if (cand->pos != pos)
3110         continue;
3111
3112       if (cand->incremented_at != incremented_at
3113           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3114               && cand->ainc_use != use))
3115         continue;
3116
3117       if (operand_equal_p (base, cand->iv->base, 0)
3118           && operand_equal_p (step, cand->iv->step, 0)
3119           && (TYPE_PRECISION (TREE_TYPE (base))
3120               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3121         break;
3122     }
3123
3124   if (i == data->vcands.length ())
3125     {
3126       cand = XCNEW (struct iv_cand);
3127       cand->id = i;
3128       cand->iv = alloc_iv (data, base, step);
3129       cand->pos = pos;
3130       if (pos != IP_ORIGINAL)
3131         {
3132           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3133           cand->var_after = cand->var_before;
3134         }
3135       cand->important = important;
3136       cand->incremented_at = incremented_at;
3137       data->vcands.safe_push (cand);
3138
3139       if (!poly_int_tree_p (step))
3140         {
3141           find_inv_vars (data, &step, &cand->inv_vars);
3142
3143           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3144           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3145           if (inv_expr != NULL)
3146             {
3147               cand->inv_exprs = cand->inv_vars;
3148               cand->inv_vars = NULL;
3149               if (cand->inv_exprs)
3150                 bitmap_clear (cand->inv_exprs);
3151               else
3152                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3153
3154               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3155             }
3156         }
3157
3158       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3159         cand->ainc_use = use;
3160       else
3161         cand->ainc_use = NULL;
3162
3163       cand->orig_iv = orig_iv;
3164       if (dump_file && (dump_flags & TDF_DETAILS))
3165         dump_cand (dump_file, cand);
3166     }
3167
3168   cand->important |= important;
3169
3170   /* Relate candidate to the group for which it is added.  */
3171   if (use)
3172     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3173
3174   return cand;
3175 }
3176
3177 /* Returns true if incrementing the induction variable at the end of the LOOP
3178    is allowed.
3179
3180    The purpose is to avoid splitting latch edge with a biv increment, thus
3181    creating a jump, possibly confusing other optimization passes and leaving
3182    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3183    available (so we do not have a better alternative), or if the latch edge
3184    is already nonempty.  */
3185
3186 static bool
3187 allow_ip_end_pos_p (struct loop *loop)
3188 {
3189   if (!ip_normal_pos (loop))
3190     return true;
3191
3192   if (!empty_block_p (ip_end_pos (loop)))
3193     return true;
3194
3195   return false;
3196 }
3197
3198 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3199    Important field is set to IMPORTANT.  */
3200
3201 static void
3202 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3203                         bool important, struct iv_use *use)
3204 {
3205   basic_block use_bb = gimple_bb (use->stmt);
3206   machine_mode mem_mode;
3207   unsigned HOST_WIDE_INT cstepi;
3208
3209   /* If we insert the increment in any position other than the standard
3210      ones, we must ensure that it is incremented once per iteration.
3211      It must not be in an inner nested loop, or one side of an if
3212      statement.  */
3213   if (use_bb->loop_father != data->current_loop
3214       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3215       || stmt_can_throw_internal (use->stmt)
3216       || !cst_and_fits_in_hwi (step))
3217     return;
3218
3219   cstepi = int_cst_value (step);
3220
3221   mem_mode = TYPE_MODE (use->mem_type);
3222   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3223         || USE_STORE_PRE_INCREMENT (mem_mode))
3224        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3225       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3226            || USE_STORE_PRE_DECREMENT (mem_mode))
3227           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3228     {
3229       enum tree_code code = MINUS_EXPR;
3230       tree new_base;
3231       tree new_step = step;
3232
3233       if (POINTER_TYPE_P (TREE_TYPE (base)))
3234         {
3235           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3236           code = POINTER_PLUS_EXPR;
3237         }
3238       else
3239         new_step = fold_convert (TREE_TYPE (base), new_step);
3240       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3241       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3242                        use->stmt);
3243     }
3244   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3245         || USE_STORE_POST_INCREMENT (mem_mode))
3246        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3247       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3248            || USE_STORE_POST_DECREMENT (mem_mode))
3249           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3250     {
3251       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3252                        use->stmt);
3253     }
3254 }
3255
3256 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3257    position to POS.  If USE is not NULL, the candidate is set as related to
3258    it.  The candidate computation is scheduled before exit condition and at
3259    the end of loop.  */
3260
3261 static void
3262 add_candidate (struct ivopts_data *data,
3263                tree base, tree step, bool important, struct iv_use *use,
3264                struct iv *orig_iv = NULL)
3265 {
3266   if (ip_normal_pos (data->current_loop))
3267     add_candidate_1 (data, base, step, important,
3268                      IP_NORMAL, use, NULL, orig_iv);
3269   if (ip_end_pos (data->current_loop)
3270       && allow_ip_end_pos_p (data->current_loop))
3271     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3272 }
3273
3274 /* Adds standard iv candidates.  */
3275
3276 static void
3277 add_standard_iv_candidates (struct ivopts_data *data)
3278 {
3279   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3280
3281   /* The same for a double-integer type if it is still fast enough.  */
3282   if (TYPE_PRECISION
3283         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3284       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3285     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3286                    build_int_cst (long_integer_type_node, 1), true, NULL);
3287
3288   /* The same for a double-integer type if it is still fast enough.  */
3289   if (TYPE_PRECISION
3290         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3291       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3292     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3293                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3294 }
3295
3296
3297 /* Adds candidates bases on the old induction variable IV.  */
3298
3299 static void
3300 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3301 {
3302   gimple *phi;
3303   tree def;
3304   struct iv_cand *cand;
3305
3306   /* Check if this biv is used in address type use.  */
3307   if (iv->no_overflow  && iv->have_address_use
3308       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3309       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3310     {
3311       tree base = fold_convert (sizetype, iv->base);
3312       tree step = fold_convert (sizetype, iv->step);
3313
3314       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3315       add_candidate (data, base, step, true, NULL, iv);
3316       /* Add iv cand of the original type only if it has nonlinear use.  */
3317       if (iv->nonlin_use)
3318         add_candidate (data, iv->base, iv->step, true, NULL);
3319     }
3320   else
3321     add_candidate (data, iv->base, iv->step, true, NULL);
3322
3323   /* The same, but with initial value zero.  */
3324   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3325     add_candidate (data, size_int (0), iv->step, true, NULL);
3326   else
3327     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3328                    iv->step, true, NULL);
3329
3330   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3331   if (gimple_code (phi) == GIMPLE_PHI)
3332     {
3333       /* Additionally record the possibility of leaving the original iv
3334          untouched.  */
3335       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3336       /* Don't add candidate if it's from another PHI node because
3337          it's an affine iv appearing in the form of PEELED_CHREC.  */
3338       phi = SSA_NAME_DEF_STMT (def);
3339       if (gimple_code (phi) != GIMPLE_PHI)
3340         {
3341           cand = add_candidate_1 (data,
3342                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3343                                   SSA_NAME_DEF_STMT (def));
3344           if (cand)
3345             {
3346               cand->var_before = iv->ssa_name;
3347               cand->var_after = def;
3348             }
3349         }
3350       else
3351         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3352     }
3353 }
3354
3355 /* Adds candidates based on the old induction variables.  */
3356
3357 static void
3358 add_iv_candidate_for_bivs (struct ivopts_data *data)
3359 {
3360   unsigned i;
3361   struct iv *iv;
3362   bitmap_iterator bi;
3363
3364   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3365     {
3366       iv = ver_info (data, i)->iv;
3367       if (iv && iv->biv_p && !integer_zerop (iv->step))
3368         add_iv_candidate_for_biv (data, iv);
3369     }
3370 }
3371
3372 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3373
3374 static void
3375 record_common_cand (struct ivopts_data *data, tree base,
3376                     tree step, struct iv_use *use)
3377 {
3378   struct iv_common_cand ent;
3379   struct iv_common_cand **slot;
3380
3381   ent.base = base;
3382   ent.step = step;
3383   ent.hash = iterative_hash_expr (base, 0);
3384   ent.hash = iterative_hash_expr (step, ent.hash);
3385
3386   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3387   if (*slot == NULL)
3388     {
3389       *slot = new iv_common_cand ();
3390       (*slot)->base = base;
3391       (*slot)->step = step;
3392       (*slot)->uses.create (8);
3393       (*slot)->hash = ent.hash;
3394       data->iv_common_cands.safe_push ((*slot));
3395     }
3396
3397   gcc_assert (use != NULL);
3398   (*slot)->uses.safe_push (use);
3399   return;
3400 }
3401
3402 /* Comparison function used to sort common candidates.  */
3403
3404 static int
3405 common_cand_cmp (const void *p1, const void *p2)
3406 {
3407   unsigned n1, n2;
3408   const struct iv_common_cand *const *const ccand1
3409     = (const struct iv_common_cand *const *)p1;
3410   const struct iv_common_cand *const *const ccand2
3411     = (const struct iv_common_cand *const *)p2;
3412
3413   n1 = (*ccand1)->uses.length ();
3414   n2 = (*ccand2)->uses.length ();
3415   return n2 - n1;
3416 }
3417
3418 /* Adds IV candidates based on common candidated recorded.  */
3419
3420 static void
3421 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3422 {
3423   unsigned i, j;
3424   struct iv_cand *cand_1, *cand_2;
3425
3426   data->iv_common_cands.qsort (common_cand_cmp);
3427   for (i = 0; i < data->iv_common_cands.length (); i++)
3428     {
3429       struct iv_common_cand *ptr = data->iv_common_cands[i];
3430
3431       /* Only add IV candidate if it's derived from multiple uses.  */
3432       if (ptr->uses.length () <= 1)
3433         break;
3434
3435       cand_1 = NULL;
3436       cand_2 = NULL;
3437       if (ip_normal_pos (data->current_loop))
3438         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3439                                   false, IP_NORMAL, NULL, NULL);
3440
3441       if (ip_end_pos (data->current_loop)
3442           && allow_ip_end_pos_p (data->current_loop))
3443         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3444                                   false, IP_END, NULL, NULL);
3445
3446       /* Bind deriving uses and the new candidates.  */
3447       for (j = 0; j < ptr->uses.length (); j++)
3448         {
3449           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3450           if (cand_1)
3451             bitmap_set_bit (group->related_cands, cand_1->id);
3452           if (cand_2)
3453             bitmap_set_bit (group->related_cands, cand_2->id);
3454         }
3455     }
3456
3457   /* Release data since it is useless from this point.  */
3458   data->iv_common_cand_tab->empty ();
3459   data->iv_common_cands.truncate (0);
3460 }
3461
3462 /* Adds candidates based on the value of USE's iv.  */
3463
3464 static void
3465 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3466 {
3467   poly_uint64 offset;
3468   tree base;
3469   tree basetype;
3470   struct iv *iv = use->iv;
3471
3472   add_candidate (data, iv->base, iv->step, false, use);
3473
3474   /* Record common candidate for use in case it can be shared by others.  */
3475   record_common_cand (data, iv->base, iv->step, use);
3476
3477   /* Record common candidate with initial value zero.  */
3478   basetype = TREE_TYPE (iv->base);
3479   if (POINTER_TYPE_P (basetype))
3480     basetype = sizetype;
3481   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3482
3483   /* Record common candidate with constant offset stripped in base.
3484      Like the use itself, we also add candidate directly for it.  */
3485   base = strip_offset (iv->base, &offset);
3486   if (maybe_ne (offset, 0U) || base != iv->base)
3487     {
3488       record_common_cand (data, base, iv->step, use);
3489       add_candidate (data, base, iv->step, false, use);
3490     }
3491
3492   /* Record common candidate with base_object removed in base.  */
3493   base = iv->base;
3494   STRIP_NOPS (base);
3495   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3496     {
3497       tree step = iv->step;
3498
3499       STRIP_NOPS (step);
3500       base = TREE_OPERAND (base, 1);
3501       step = fold_convert (sizetype, step);
3502       record_common_cand (data, base, step, use);
3503       /* Also record common candidate with offset stripped.  */
3504       base = strip_offset (base, &offset);
3505       if (maybe_ne (offset, 0U))
3506         record_common_cand (data, base, step, use);
3507     }
3508
3509   /* At last, add auto-incremental candidates.  Make such variables
3510      important since other iv uses with same base object may be based
3511      on it.  */
3512   if (use != NULL && address_p (use->type))
3513     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3514 }
3515
3516 /* Adds candidates based on the uses.  */
3517
3518 static void
3519 add_iv_candidate_for_groups (struct ivopts_data *data)
3520 {
3521   unsigned i;
3522
3523   /* Only add candidate for the first use in group.  */
3524   for (i = 0; i < data->vgroups.length (); i++)
3525     {
3526       struct iv_group *group = data->vgroups[i];
3527
3528       gcc_assert (group->vuses[0] != NULL);
3529       add_iv_candidate_for_use (data, group->vuses[0]);
3530     }
3531   add_iv_candidate_derived_from_uses (data);
3532 }
3533
3534 /* Record important candidates and add them to related_cands bitmaps.  */
3535
3536 static void
3537 record_important_candidates (struct ivopts_data *data)
3538 {
3539   unsigned i;
3540   struct iv_group *group;
3541
3542   for (i = 0; i < data->vcands.length (); i++)
3543     {
3544       struct iv_cand *cand = data->vcands[i];
3545
3546       if (cand->important)
3547         bitmap_set_bit (data->important_candidates, i);
3548     }
3549
3550   data->consider_all_candidates = (data->vcands.length ()
3551                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3552
3553   /* Add important candidates to groups' related_cands bitmaps.  */
3554   for (i = 0; i < data->vgroups.length (); i++)
3555     {
3556       group = data->vgroups[i];
3557       bitmap_ior_into (group->related_cands, data->important_candidates);
3558     }
3559 }
3560
3561 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3562    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3563    we allocate a simple list to every use.  */
3564
3565 static void
3566 alloc_use_cost_map (struct ivopts_data *data)
3567 {
3568   unsigned i, size, s;
3569
3570   for (i = 0; i < data->vgroups.length (); i++)
3571     {
3572       struct iv_group *group = data->vgroups[i];
3573
3574       if (data->consider_all_candidates)
3575         size = data->vcands.length ();
3576       else
3577         {
3578           s = bitmap_count_bits (group->related_cands);
3579
3580           /* Round up to the power of two, so that moduling by it is fast.  */
3581           size = s ? (1 << ceil_log2 (s)) : 1;
3582         }
3583
3584       group->n_map_members = size;
3585       group->cost_map = XCNEWVEC (struct cost_pair, size);
3586     }
3587 }
3588
3589 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3590    on invariants INV_VARS and that the value used in expressing it is
3591    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3592
3593 static void
3594 set_group_iv_cost (struct ivopts_data *data,
3595                    struct iv_group *group, struct iv_cand *cand,
3596                    comp_cost cost, bitmap inv_vars, tree value,
3597                    enum tree_code comp, bitmap inv_exprs)
3598 {
3599   unsigned i, s;
3600
3601   if (cost.infinite_cost_p ())
3602     {
3603       BITMAP_FREE (inv_vars);
3604       BITMAP_FREE (inv_exprs);
3605       return;
3606     }
3607
3608   if (data->consider_all_candidates)
3609     {
3610       group->cost_map[cand->id].cand = cand;
3611       group->cost_map[cand->id].cost = cost;
3612       group->cost_map[cand->id].inv_vars = inv_vars;
3613       group->cost_map[cand->id].inv_exprs = inv_exprs;
3614       group->cost_map[cand->id].value = value;
3615       group->cost_map[cand->id].comp = comp;
3616       return;
3617     }
3618
3619   /* n_map_members is a power of two, so this computes modulo.  */
3620   s = cand->id & (group->n_map_members - 1);
3621   for (i = s; i < group->n_map_members; i++)
3622     if (!group->cost_map[i].cand)
3623       goto found;
3624   for (i = 0; i < s; i++)
3625     if (!group->cost_map[i].cand)
3626       goto found;
3627
3628   gcc_unreachable ();
3629
3630 found:
3631   group->cost_map[i].cand = cand;
3632   group->cost_map[i].cost = cost;
3633   group->cost_map[i].inv_vars = inv_vars;
3634   group->cost_map[i].inv_exprs = inv_exprs;
3635   group->cost_map[i].value = value;
3636   group->cost_map[i].comp = comp;
3637 }
3638
3639 /* Gets cost of (GROUP, CAND) pair.  */
3640
3641 static struct cost_pair *
3642 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3643                    struct iv_cand *cand)
3644 {
3645   unsigned i, s;
3646   struct cost_pair *ret;
3647
3648   if (!cand)
3649     return NULL;
3650
3651   if (data->consider_all_candidates)
3652     {
3653       ret = group->cost_map + cand->id;
3654       if (!ret->cand)
3655         return NULL;
3656
3657       return ret;
3658     }
3659
3660   /* n_map_members is a power of two, so this computes modulo.  */
3661   s = cand->id & (group->n_map_members - 1);
3662   for (i = s; i < group->n_map_members; i++)
3663     if (group->cost_map[i].cand == cand)
3664       return group->cost_map + i;
3665     else if (group->cost_map[i].cand == NULL)
3666       return NULL;
3667   for (i = 0; i < s; i++)
3668     if (group->cost_map[i].cand == cand)
3669       return group->cost_map + i;
3670     else if (group->cost_map[i].cand == NULL)
3671       return NULL;
3672
3673   return NULL;
3674 }
3675
3676 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3677 static rtx
3678 produce_memory_decl_rtl (tree obj, int *regno)
3679 {
3680   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3681   machine_mode address_mode = targetm.addr_space.address_mode (as);
3682   rtx x;
3683
3684   gcc_assert (obj);
3685   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3686     {
3687       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3688       x = gen_rtx_SYMBOL_REF (address_mode, name);
3689       SET_SYMBOL_REF_DECL (x, obj);
3690       x = gen_rtx_MEM (DECL_MODE (obj), x);
3691       set_mem_addr_space (x, as);
3692       targetm.encode_section_info (obj, x, true);
3693     }
3694   else
3695     {
3696       x = gen_raw_REG (address_mode, (*regno)++);
3697       x = gen_rtx_MEM (DECL_MODE (obj), x);
3698       set_mem_addr_space (x, as);
3699     }
3700
3701   return x;
3702 }
3703
3704 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3705    walk_tree.  DATA contains the actual fake register number.  */
3706
3707 static tree
3708 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3709 {
3710   tree obj = NULL_TREE;
3711   rtx x = NULL_RTX;
3712   int *regno = (int *) data;
3713
3714   switch (TREE_CODE (*expr_p))
3715     {
3716     case ADDR_EXPR:
3717       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3718            handled_component_p (*expr_p);
3719            expr_p = &TREE_OPERAND (*expr_p, 0))
3720         continue;
3721       obj = *expr_p;
3722       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3723         x = produce_memory_decl_rtl (obj, regno);
3724       break;
3725
3726     case SSA_NAME:
3727       *ws = 0;
3728       obj = SSA_NAME_VAR (*expr_p);
3729       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3730       if (!obj)
3731         return NULL_TREE;
3732       if (!DECL_RTL_SET_P (obj))
3733         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3734       break;
3735
3736     case VAR_DECL:
3737     case PARM_DECL:
3738     case RESULT_DECL:
3739       *ws = 0;
3740       obj = *expr_p;
3741
3742       if (DECL_RTL_SET_P (obj))
3743         break;
3744
3745       if (DECL_MODE (obj) == BLKmode)
3746         x = produce_memory_decl_rtl (obj, regno);
3747       else
3748         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3749
3750       break;
3751
3752     default:
3753       break;
3754     }
3755
3756   if (x)
3757     {
3758       decl_rtl_to_reset.safe_push (obj);
3759       SET_DECL_RTL (obj, x);
3760     }
3761
3762   return NULL_TREE;
3763 }
3764
3765 /* Determines cost of the computation of EXPR.  */
3766
3767 static unsigned
3768 computation_cost (tree expr, bool speed)
3769 {
3770   rtx_insn *seq;
3771   rtx rslt;
3772   tree type = TREE_TYPE (expr);
3773   unsigned cost;
3774   /* Avoid using hard regs in ways which may be unsupported.  */
3775   int regno = LAST_VIRTUAL_REGISTER + 1;
3776   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3777   enum node_frequency real_frequency = node->frequency;
3778
3779   node->frequency = NODE_FREQUENCY_NORMAL;
3780   crtl->maybe_hot_insn_p = speed;
3781   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3782   start_sequence ();
3783   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3784   seq = get_insns ();
3785   end_sequence ();
3786   default_rtl_profile ();
3787   node->frequency = real_frequency;
3788
3789   cost = seq_cost (seq, speed);
3790   if (MEM_P (rslt))
3791     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3792                           TYPE_ADDR_SPACE (type), speed);
3793   else if (!REG_P (rslt))
3794     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3795
3796   return cost;
3797 }
3798
3799 /* Returns variable containing the value of candidate CAND at statement AT.  */
3800
3801 static tree
3802 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3803 {
3804   if (stmt_after_increment (loop, cand, stmt))
3805     return cand->var_after;
3806   else
3807     return cand->var_before;
3808 }
3809
3810 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3811    same precision that is at least as wide as the precision of TYPE, stores
3812    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3813    type of A and B.  */
3814
3815 static tree
3816 determine_common_wider_type (tree *a, tree *b)
3817 {
3818   tree wider_type = NULL;
3819   tree suba, subb;
3820   tree atype = TREE_TYPE (*a);
3821
3822   if (CONVERT_EXPR_P (*a))
3823     {
3824       suba = TREE_OPERAND (*a, 0);
3825       wider_type = TREE_TYPE (suba);
3826       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3827         return atype;
3828     }
3829   else
3830     return atype;
3831
3832   if (CONVERT_EXPR_P (*b))
3833     {
3834       subb = TREE_OPERAND (*b, 0);
3835       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3836         return atype;
3837     }
3838   else
3839     return atype;
3840
3841   *a = suba;
3842   *b = subb;
3843   return wider_type;
3844 }
3845
3846 /* Determines the expression by that USE is expressed from induction variable
3847    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3848    decomposed form.  The invariant part is stored in AFF_INV; while variant
3849    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3850    non-null.  Returns false if USE cannot be expressed using CAND.  */
3851
3852 static bool
3853 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3854                        struct iv_cand *cand, struct aff_tree *aff_inv,
3855                        struct aff_tree *aff_var, widest_int *prat = NULL)
3856 {
3857   tree ubase = use->iv->base, ustep = use->iv->step;
3858   tree cbase = cand->iv->base, cstep = cand->iv->step;
3859   tree common_type, uutype, var, cstep_common;
3860   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3861   aff_tree aff_cbase;
3862   widest_int rat;
3863
3864   /* We must have a precision to express the values of use.  */
3865   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3866     return false;
3867
3868   var = var_at_stmt (loop, cand, at);
3869   uutype = unsigned_type_for (utype);
3870
3871   /* If the conversion is not noop, perform it.  */
3872   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3873     {
3874       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3875           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3876         {
3877           tree inner_base, inner_step, inner_type;
3878           inner_base = TREE_OPERAND (cbase, 0);
3879           if (CONVERT_EXPR_P (cstep))
3880             inner_step = TREE_OPERAND (cstep, 0);
3881           else
3882             inner_step = cstep;
3883
3884           inner_type = TREE_TYPE (inner_base);
3885           /* If candidate is added from a biv whose type is smaller than
3886              ctype, we know both candidate and the biv won't overflow.
3887              In this case, it's safe to skip the convertion in candidate.
3888              As an example, (unsigned short)((unsigned long)A) equals to
3889              (unsigned short)A, if A has a type no larger than short.  */
3890           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3891             {
3892               cbase = inner_base;
3893               cstep = inner_step;
3894             }
3895         }
3896       cbase = fold_convert (uutype, cbase);
3897       cstep = fold_convert (uutype, cstep);
3898       var = fold_convert (uutype, var);
3899     }
3900
3901   /* Ratio is 1 when computing the value of biv cand by itself.
3902      We can't rely on constant_multiple_of in this case because the
3903      use is created after the original biv is selected.  The call
3904      could fail because of inconsistent fold behavior.  See PR68021
3905      for more information.  */
3906   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3907     {
3908       gcc_assert (is_gimple_assign (use->stmt));
3909       gcc_assert (use->iv->ssa_name == cand->var_after);
3910       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3911       rat = 1;
3912     }
3913   else if (!constant_multiple_of (ustep, cstep, &rat))
3914     return false;
3915
3916   if (prat)
3917     *prat = rat;
3918
3919   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3920      type, we achieve better folding by computing their difference in this
3921      wider type, and cast the result to UUTYPE.  We do not need to worry about
3922      overflows, as all the arithmetics will in the end be performed in UUTYPE
3923      anyway.  */
3924   common_type = determine_common_wider_type (&ubase, &cbase);
3925
3926   /* use = ubase - ratio * cbase + ratio * var.  */
3927   tree_to_aff_combination (ubase, common_type, aff_inv);
3928   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3929   tree_to_aff_combination (var, uutype, aff_var);
3930
3931   /* We need to shift the value if we are after the increment.  */
3932   if (stmt_after_increment (loop, cand, at))
3933     {
3934       aff_tree cstep_aff;
3935
3936       if (common_type != uutype)
3937         cstep_common = fold_convert (common_type, cstep);
3938       else
3939         cstep_common = cstep;
3940
3941       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3942       aff_combination_add (&aff_cbase, &cstep_aff);
3943     }
3944
3945   aff_combination_scale (&aff_cbase, -rat);
3946   aff_combination_add (aff_inv, &aff_cbase);
3947   if (common_type != uutype)
3948     aff_combination_convert (aff_inv, uutype);
3949
3950   aff_combination_scale (aff_var, rat);
3951   return true;
3952 }
3953
3954 /* Determines the expression by that USE is expressed from induction variable
3955    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3956    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3957
3958 static bool
3959 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3960                      struct iv_cand *cand, struct aff_tree *aff)
3961 {
3962   aff_tree aff_var;
3963
3964   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3965     return false;
3966
3967   aff_combination_add (aff, &aff_var);
3968   return true;
3969 }
3970
3971 /* Return the type of USE.  */
3972
3973 static tree
3974 get_use_type (struct iv_use *use)
3975 {
3976   tree base_type = TREE_TYPE (use->iv->base);
3977   tree type;
3978
3979   if (use->type == USE_REF_ADDRESS)
3980     {
3981       /* The base_type may be a void pointer.  Create a pointer type based on
3982          the mem_ref instead.  */
3983       type = build_pointer_type (TREE_TYPE (*use->op_p));
3984       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3985                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3986     }
3987   else
3988     type = base_type;
3989
3990   return type;
3991 }
3992
3993 /* Determines the expression by that USE is expressed from induction variable
3994    CAND at statement AT in LOOP.  The computation is unshared.  */
3995
3996 static tree
3997 get_computation_at (struct loop *loop, gimple *at,
3998                     struct iv_use *use, struct iv_cand *cand)
3999 {
4000   aff_tree aff;
4001   tree type = get_use_type (use);
4002
4003   if (!get_computation_aff (loop, at, use, cand, &aff))
4004     return NULL_TREE;
4005   unshare_aff_combination (&aff);
4006   return fold_convert (type, aff_combination_to_tree (&aff));
4007 }
4008
4009 /* Adjust the cost COST for being in loop setup rather than loop body.
4010    If we're optimizing for space, the loop setup overhead is constant;
4011    if we're optimizing for speed, amortize it over the per-iteration cost.
4012    If ROUND_UP_P is true, the result is round up rather than to zero when
4013    optimizing for speed.  */
4014 static unsigned
4015 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4016                    bool round_up_p = false)
4017 {
4018   if (cost == INFTY)
4019     return cost;
4020   else if (optimize_loop_for_speed_p (data->current_loop))
4021     {
4022       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4023       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4024     }
4025   else
4026     return cost;
4027 }
4028
4029 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4030    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4031    calculating the operands of EXPR.  Returns true if successful, and returns
4032    the cost in COST.  */
4033
4034 static bool
4035 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4036                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4037 {
4038   comp_cost res;
4039   tree op1 = TREE_OPERAND (expr, 1);
4040   tree cst = TREE_OPERAND (mult, 1);
4041   tree multop = TREE_OPERAND (mult, 0);
4042   int m = exact_log2 (int_cst_value (cst));
4043   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4044   int as_cost, sa_cost;
4045   bool mult_in_op1;
4046
4047   if (!(m >= 0 && m < maxm))
4048     return false;
4049
4050   STRIP_NOPS (op1);
4051   mult_in_op1 = operand_equal_p (op1, mult, 0);
4052
4053   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4054
4055   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4056      use that in preference to a shift insn followed by an add insn.  */
4057   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4058              ? shiftadd_cost (speed, mode, m)
4059              : (mult_in_op1
4060                 ? shiftsub1_cost (speed, mode, m)
4061                 : shiftsub0_cost (speed, mode, m)));
4062
4063   res = comp_cost (MIN (as_cost, sa_cost), 0);
4064   res += (mult_in_op1 ? cost0 : cost1);
4065
4066   STRIP_NOPS (multop);
4067   if (!is_gimple_val (multop))
4068     res += force_expr_to_var_cost (multop, speed);
4069
4070   *cost = res;
4071   return true;
4072 }
4073
4074 /* Estimates cost of forcing expression EXPR into a variable.  */
4075
4076 static comp_cost
4077 force_expr_to_var_cost (tree expr, bool speed)
4078 {
4079   static bool costs_initialized = false;
4080   static unsigned integer_cost [2];
4081   static unsigned symbol_cost [2];
4082   static unsigned address_cost [2];
4083   tree op0, op1;
4084   comp_cost cost0, cost1, cost;
4085   machine_mode mode;
4086   scalar_int_mode int_mode;
4087
4088   if (!costs_initialized)
4089     {
4090       tree type = build_pointer_type (integer_type_node);
4091       tree var, addr;
4092       rtx x;
4093       int i;
4094
4095       var = create_tmp_var_raw (integer_type_node, "test_var");
4096       TREE_STATIC (var) = 1;
4097       x = produce_memory_decl_rtl (var, NULL);
4098       SET_DECL_RTL (var, x);
4099
4100       addr = build1 (ADDR_EXPR, type, var);
4101
4102
4103       for (i = 0; i < 2; i++)
4104         {
4105           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4106                                                              2000), i);
4107
4108           symbol_cost[i] = computation_cost (addr, i) + 1;
4109
4110           address_cost[i]
4111             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4112           if (dump_file && (dump_flags & TDF_DETAILS))
4113             {
4114               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4115               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4116               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4117               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4118               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4119               fprintf (dump_file, "\n");
4120             }
4121         }
4122
4123       costs_initialized = true;
4124     }
4125
4126   STRIP_NOPS (expr);
4127
4128   if (SSA_VAR_P (expr))
4129     return no_cost;
4130
4131   if (is_gimple_min_invariant (expr))
4132     {
4133       if (poly_int_tree_p (expr))
4134         return comp_cost (integer_cost [speed], 0);
4135
4136       if (TREE_CODE (expr) == ADDR_EXPR)
4137         {
4138           tree obj = TREE_OPERAND (expr, 0);
4139
4140           if (VAR_P (obj)
4141               || TREE_CODE (obj) == PARM_DECL
4142               || TREE_CODE (obj) == RESULT_DECL)
4143             return comp_cost (symbol_cost [speed], 0);
4144         }
4145
4146       return comp_cost (address_cost [speed], 0);
4147     }
4148
4149   switch (TREE_CODE (expr))
4150     {
4151     case POINTER_PLUS_EXPR:
4152     case PLUS_EXPR:
4153     case MINUS_EXPR:
4154     case MULT_EXPR:
4155     case TRUNC_DIV_EXPR:
4156     case BIT_AND_EXPR:
4157     case BIT_IOR_EXPR:
4158     case LSHIFT_EXPR:
4159     case RSHIFT_EXPR:
4160       op0 = TREE_OPERAND (expr, 0);
4161       op1 = TREE_OPERAND (expr, 1);
4162       STRIP_NOPS (op0);
4163       STRIP_NOPS (op1);
4164       break;
4165
4166     CASE_CONVERT:
4167     case NEGATE_EXPR:
4168     case BIT_NOT_EXPR:
4169       op0 = TREE_OPERAND (expr, 0);
4170       STRIP_NOPS (op0);
4171       op1 = NULL_TREE;
4172       break;
4173
4174     default:
4175       /* Just an arbitrary value, FIXME.  */
4176       return comp_cost (target_spill_cost[speed], 0);
4177     }
4178
4179   if (op0 == NULL_TREE
4180       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4181     cost0 = no_cost;
4182   else
4183     cost0 = force_expr_to_var_cost (op0, speed);
4184
4185   if (op1 == NULL_TREE
4186       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4187     cost1 = no_cost;
4188   else
4189     cost1 = force_expr_to_var_cost (op1, speed);
4190
4191   mode = TYPE_MODE (TREE_TYPE (expr));
4192   switch (TREE_CODE (expr))
4193     {
4194     case POINTER_PLUS_EXPR:
4195     case PLUS_EXPR:
4196     case MINUS_EXPR:
4197     case NEGATE_EXPR:
4198       cost = comp_cost (add_cost (speed, mode), 0);
4199       if (TREE_CODE (expr) != NEGATE_EXPR)
4200         {
4201           tree mult = NULL_TREE;
4202           comp_cost sa_cost;
4203           if (TREE_CODE (op1) == MULT_EXPR)
4204             mult = op1;
4205           else if (TREE_CODE (op0) == MULT_EXPR)
4206             mult = op0;
4207
4208           if (mult != NULL_TREE
4209               && is_a <scalar_int_mode> (mode, &int_mode)
4210               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4211               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4212                                     speed, &sa_cost))
4213             return sa_cost;
4214         }
4215       break;
4216
4217     CASE_CONVERT:
4218       {
4219         tree inner_mode, outer_mode;
4220         outer_mode = TREE_TYPE (expr);
4221         inner_mode = TREE_TYPE (op0);
4222         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4223                                        TYPE_MODE (inner_mode), speed), 0);
4224       }
4225       break;
4226
4227     case MULT_EXPR:
4228       if (cst_and_fits_in_hwi (op0))
4229         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4230                                              mode, speed), 0);
4231       else if (cst_and_fits_in_hwi (op1))
4232         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4233                                              mode, speed), 0);
4234       else
4235         return comp_cost (target_spill_cost [speed], 0);
4236       break;
4237
4238     case TRUNC_DIV_EXPR:
4239       /* Division by power of two is usually cheap, so we allow it.  Forbid
4240          anything else.  */
4241       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4242         cost = comp_cost (add_cost (speed, mode), 0);
4243       else
4244         cost = comp_cost (target_spill_cost[speed], 0);
4245       break;
4246
4247     case BIT_AND_EXPR:
4248     case BIT_IOR_EXPR:
4249     case BIT_NOT_EXPR:
4250     case LSHIFT_EXPR:
4251     case RSHIFT_EXPR:
4252       cost = comp_cost (add_cost (speed, mode), 0);
4253       break;
4254
4255     default:
4256       gcc_unreachable ();
4257     }
4258
4259   cost += cost0;
4260   cost += cost1;
4261   return cost;
4262 }
4263
4264 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4265    invariants the computation depends on.  */
4266
4267 static comp_cost
4268 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4269 {
4270   if (!expr)
4271     return no_cost;
4272
4273   find_inv_vars (data, &expr, inv_vars);
4274   return force_expr_to_var_cost (expr, data->speed);
4275 }
4276
4277 /* Returns cost of auto-modifying address expression in shape base + offset.
4278    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4279    address expression.  The address expression has ADDR_MODE in addr space
4280    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4281    speed or size.  */
4282
4283 enum ainc_type
4284 {
4285   AINC_PRE_INC,         /* Pre increment.  */
4286   AINC_PRE_DEC,         /* Pre decrement.  */
4287   AINC_POST_INC,        /* Post increment.  */
4288   AINC_POST_DEC,        /* Post decrement.  */
4289   AINC_NONE             /* Also the number of auto increment types.  */
4290 };
4291
4292 struct ainc_cost_data
4293 {
4294   unsigned costs[AINC_NONE];
4295 };
4296
4297 static comp_cost
4298 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4299                        machine_mode addr_mode, machine_mode mem_mode,
4300                        addr_space_t as, bool speed)
4301 {
4302   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4303       && !USE_STORE_PRE_DECREMENT (mem_mode)
4304       && !USE_LOAD_POST_DECREMENT (mem_mode)
4305       && !USE_STORE_POST_DECREMENT (mem_mode)
4306       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4307       && !USE_STORE_PRE_INCREMENT (mem_mode)
4308       && !USE_LOAD_POST_INCREMENT (mem_mode)
4309       && !USE_STORE_POST_INCREMENT (mem_mode))
4310     return infinite_cost;
4311
4312   static vec<ainc_cost_data *> ainc_cost_data_list;
4313   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4314   if (idx >= ainc_cost_data_list.length ())
4315     {
4316       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4317
4318       gcc_assert (nsize > idx);
4319       ainc_cost_data_list.safe_grow_cleared (nsize);
4320     }
4321
4322   ainc_cost_data *data = ainc_cost_data_list[idx];
4323   if (data == NULL)
4324     {
4325       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4326
4327       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4328       data->costs[AINC_PRE_DEC] = INFTY;
4329       data->costs[AINC_POST_DEC] = INFTY;
4330       data->costs[AINC_PRE_INC] = INFTY;
4331       data->costs[AINC_POST_INC] = INFTY;
4332       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4333           || USE_STORE_PRE_DECREMENT (mem_mode))
4334         {
4335           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4336
4337           if (memory_address_addr_space_p (mem_mode, addr, as))
4338             data->costs[AINC_PRE_DEC]
4339               = address_cost (addr, mem_mode, as, speed);
4340         }
4341       if (USE_LOAD_POST_DECREMENT (mem_mode)
4342           || USE_STORE_POST_DECREMENT (mem_mode))
4343         {
4344           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4345
4346           if (memory_address_addr_space_p (mem_mode, addr, as))
4347             data->costs[AINC_POST_DEC]
4348               = address_cost (addr, mem_mode, as, speed);
4349         }
4350       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4351           || USE_STORE_PRE_INCREMENT (mem_mode))
4352         {
4353           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4354
4355           if (memory_address_addr_space_p (mem_mode, addr, as))
4356             data->costs[AINC_PRE_INC]
4357               = address_cost (addr, mem_mode, as, speed);
4358         }
4359       if (USE_LOAD_POST_INCREMENT (mem_mode)
4360           || USE_STORE_POST_INCREMENT (mem_mode))
4361         {
4362           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4363
4364           if (memory_address_addr_space_p (mem_mode, addr, as))
4365             data->costs[AINC_POST_INC]
4366               = address_cost (addr, mem_mode, as, speed);
4367         }
4368       ainc_cost_data_list[idx] = data;
4369     }
4370
4371   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4372   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4373     return comp_cost (data->costs[AINC_POST_INC], 0);
4374   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4375     return comp_cost (data->costs[AINC_POST_DEC], 0);
4376   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4377     return comp_cost (data->costs[AINC_PRE_INC], 0);
4378   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4379     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4380
4381   return infinite_cost;
4382 }
4383
4384 /* Return cost of computing USE's address expression by using CAND.
4385    AFF_INV and AFF_VAR represent invariant and variant parts of the
4386    address expression, respectively.  If AFF_INV is simple, store
4387    the loop invariant variables which are depended by it in INV_VARS;
4388    if AFF_INV is complicated, handle it as a new invariant expression
4389    and record it in INV_EXPR.  RATIO indicates multiple times between
4390    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4391    value to it indicating if this is an auto-increment address.  */
4392
4393 static comp_cost
4394 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4395                   struct iv_cand *cand, aff_tree *aff_inv,
4396                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4397                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4398                   bool *can_autoinc, bool speed)
4399 {
4400   rtx addr;
4401   bool simple_inv = true;
4402   tree comp_inv = NULL_TREE, type = aff_var->type;
4403   comp_cost var_cost = no_cost, cost = no_cost;
4404   struct mem_address parts = {NULL_TREE, integer_one_node,
4405                               NULL_TREE, NULL_TREE, NULL_TREE};
4406   machine_mode addr_mode = TYPE_MODE (type);
4407   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4408   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4409   /* Only true if ratio != 1.  */
4410   bool ok_with_ratio_p = false;
4411   bool ok_without_ratio_p = false;
4412
4413   if (!aff_combination_const_p (aff_inv))
4414     {
4415       parts.index = integer_one_node;
4416       /* Addressing mode "base + index".  */
4417       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4418       if (ratio != 1)
4419         {
4420           parts.step = wide_int_to_tree (type, ratio);
4421           /* Addressing mode "base + index << scale".  */
4422           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4423           if (!ok_with_ratio_p)
4424             parts.step = NULL_TREE;
4425         }
4426       if (ok_with_ratio_p || ok_without_ratio_p)
4427         {
4428           if (maybe_ne (aff_inv->offset, 0))
4429             {
4430               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4431               /* Addressing mode "base + index [<< scale] + offset".  */
4432               if (!valid_mem_ref_p (mem_mode, as, &parts))
4433                 parts.offset = NULL_TREE;
4434               else
4435                 aff_inv->offset = 0;
4436             }
4437
4438           move_fixed_address_to_symbol (&parts, aff_inv);
4439           /* Base is fixed address and is moved to symbol part.  */
4440           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4441             parts.base = NULL_TREE;
4442
4443           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4444           if (parts.symbol != NULL_TREE
4445               && !valid_mem_ref_p (mem_mode, as, &parts))
4446             {
4447               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4448               parts.symbol = NULL_TREE;
4449               /* Reset SIMPLE_INV since symbol address needs to be computed
4450                  outside of address expression in this case.  */
4451               simple_inv = false;
4452               /* Symbol part is moved back to base part, it can't be NULL.  */
4453               parts.base = integer_one_node;
4454             }
4455         }
4456       else
4457         parts.index = NULL_TREE;
4458     }
4459   else
4460     {
4461       poly_int64 ainc_step;
4462       if (can_autoinc
4463           && ratio == 1
4464           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4465         {
4466           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4467
4468           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4469             ainc_offset += ainc_step;
4470           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4471                                         addr_mode, mem_mode, as, speed);
4472           if (!cost.infinite_cost_p ())
4473             {
4474               *can_autoinc = true;
4475               return cost;
4476             }
4477           cost = no_cost;
4478         }
4479       if (!aff_combination_zero_p (aff_inv))
4480         {
4481           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4482           /* Addressing mode "base + offset".  */
4483           if (!valid_mem_ref_p (mem_mode, as, &parts))
4484             parts.offset = NULL_TREE;
4485           else
4486             aff_inv->offset = 0;
4487         }
4488     }
4489
4490   if (simple_inv)
4491     simple_inv = (aff_inv == NULL
4492                   || aff_combination_const_p (aff_inv)
4493                   || aff_combination_singleton_var_p (aff_inv));
4494   if (!aff_combination_zero_p (aff_inv))
4495     comp_inv = aff_combination_to_tree (aff_inv);
4496   if (comp_inv != NULL_TREE)
4497     cost = force_var_cost (data, comp_inv, inv_vars);
4498   if (ratio != 1 && parts.step == NULL_TREE)
4499     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4500   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4501     var_cost += add_cost (speed, addr_mode);
4502
4503   if (comp_inv && inv_expr && !simple_inv)
4504     {
4505       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4506       /* Clear depends on.  */
4507       if (*inv_expr != NULL && inv_vars && *inv_vars)
4508         bitmap_clear (*inv_vars);
4509
4510       /* Cost of small invariant expression adjusted against loop niters
4511          is usually zero, which makes it difficult to be differentiated
4512          from candidate based on loop invariant variables.  Secondly, the
4513          generated invariant expression may not be hoisted out of loop by
4514          following pass.  We penalize the cost by rounding up in order to
4515          neutralize such effects.  */
4516       cost.cost = adjust_setup_cost (data, cost.cost, true);
4517       cost.scratch = cost.cost;
4518     }
4519
4520   cost += var_cost;
4521   addr = addr_for_mem_ref (&parts, as, false);
4522   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4523   cost += address_cost (addr, mem_mode, as, speed);
4524
4525   if (parts.symbol != NULL_TREE)
4526     cost.complexity += 1;
4527   /* Don't increase the complexity of adding a scaled index if it's
4528      the only kind of index that the target allows.  */
4529   if (parts.step != NULL_TREE && ok_without_ratio_p)
4530     cost.complexity += 1;
4531   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4532     cost.complexity += 1;
4533   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4534     cost.complexity += 1;
4535
4536   return cost;
4537 }
4538
4539 /* Scale (multiply) the computed COST (except scratch part that should be
4540    hoisted out a loop) by header->frequency / AT->frequency, which makes
4541    expected cost more accurate.  */
4542
4543 static comp_cost
4544 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4545 {
4546    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4547    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4548    if (loop_freq != 0)
4549      {
4550        gcc_assert (cost.scratch <= cost.cost);
4551        int scaled_cost
4552          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4553
4554        if (dump_file && (dump_flags & TDF_DETAILS))
4555          fprintf (dump_file, "Scaling cost based on bb prob "
4556                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4557                   1.0f * bb_freq / loop_freq, cost.cost,
4558                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4559
4560        cost.cost = scaled_cost;
4561      }
4562
4563   return cost;
4564 }
4565
4566 /* Determines the cost of the computation by that USE is expressed
4567    from induction variable CAND.  If ADDRESS_P is true, we just need
4568    to create an address from it, otherwise we want to get it into
4569    register.  A set of invariants we depend on is stored in INV_VARS.
4570    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4571    addressing is likely.  If INV_EXPR is nonnull, record invariant
4572    expr entry in it.  */
4573
4574 static comp_cost
4575 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4576                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4577                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4578 {
4579   gimple *at = use->stmt;
4580   tree ubase = use->iv->base, cbase = cand->iv->base;
4581   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4582   tree comp_inv = NULL_TREE;
4583   HOST_WIDE_INT ratio, aratio;
4584   comp_cost cost;
4585   widest_int rat;
4586   aff_tree aff_inv, aff_var;
4587   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4588
4589   if (inv_vars)
4590     *inv_vars = NULL;
4591   if (can_autoinc)
4592     *can_autoinc = false;
4593   if (inv_expr)
4594     *inv_expr = NULL;
4595
4596   /* Check if we have enough precision to express the values of use.  */
4597   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4598     return infinite_cost;
4599
4600   if (address_p
4601       || (use->iv->base_object
4602           && cand->iv->base_object
4603           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4604           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4605     {
4606       /* Do not try to express address of an object with computation based
4607          on address of a different object.  This may cause problems in rtl
4608          level alias analysis (that does not expect this to be happening,
4609          as this is illegal in C), and would be unlikely to be useful
4610          anyway.  */
4611       if (use->iv->base_object
4612           && cand->iv->base_object
4613           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4614         return infinite_cost;
4615     }
4616
4617   if (!get_computation_aff_1 (data->current_loop, at, use,
4618                               cand, &aff_inv, &aff_var, &rat)
4619       || !wi::fits_shwi_p (rat))
4620     return infinite_cost;
4621
4622   ratio = rat.to_shwi ();
4623   if (address_p)
4624     {
4625       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4626                                inv_vars, inv_expr, can_autoinc, speed);
4627       return get_scaled_computation_cost_at (data, at, cost);
4628     }
4629
4630   bool simple_inv = (aff_combination_const_p (&aff_inv)
4631                      || aff_combination_singleton_var_p (&aff_inv));
4632   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4633   aff_combination_convert (&aff_inv, signed_type);
4634   if (!aff_combination_zero_p (&aff_inv))
4635     comp_inv = aff_combination_to_tree (&aff_inv);
4636
4637   cost = force_var_cost (data, comp_inv, inv_vars);
4638   if (comp_inv && inv_expr && !simple_inv)
4639     {
4640       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4641       /* Clear depends on.  */
4642       if (*inv_expr != NULL && inv_vars && *inv_vars)
4643         bitmap_clear (*inv_vars);
4644
4645       cost.cost = adjust_setup_cost (data, cost.cost);
4646       /* Record setup cost in scratch field.  */
4647       cost.scratch = cost.cost;
4648     }
4649   /* Cost of constant integer can be covered when adding invariant part to
4650      variant part.  */
4651   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4652     cost = no_cost;
4653
4654   /* Need type narrowing to represent use with cand.  */
4655   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4656     {
4657       machine_mode outer_mode = TYPE_MODE (utype);
4658       machine_mode inner_mode = TYPE_MODE (ctype);
4659       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4660     }
4661
4662   /* Turn a + i * (-c) into a - i * c.  */
4663   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4664     aratio = -ratio;
4665   else
4666     aratio = ratio;
4667
4668   if (ratio != 1)
4669     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4670
4671   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4672      instruction.  */
4673   /* Need to add up the invariant and variant parts.  */
4674   if (comp_inv && !integer_zerop (comp_inv))
4675     cost += add_cost (speed, TYPE_MODE (utype));
4676
4677   return get_scaled_computation_cost_at (data, at, cost);
4678 }
4679
4680 /* Determines cost of computing the use in GROUP with CAND in a generic
4681    expression.  */
4682
4683 static bool
4684 determine_group_iv_cost_generic (struct ivopts_data *data,
4685                                  struct iv_group *group, struct iv_cand *cand)
4686 {
4687   comp_cost cost;
4688   iv_inv_expr_ent *inv_expr = NULL;
4689   bitmap inv_vars = NULL, inv_exprs = NULL;
4690   struct iv_use *use = group->vuses[0];
4691
4692   /* The simple case first -- if we need to express value of the preserved
4693      original biv, the cost is 0.  This also prevents us from counting the
4694      cost of increment twice -- once at this use and once in the cost of
4695      the candidate.  */
4696   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4697     cost = no_cost;
4698   else
4699     cost = get_computation_cost (data, use, cand, false,
4700                                  &inv_vars, NULL, &inv_expr);
4701
4702   if (inv_expr)
4703     {
4704       inv_exprs = BITMAP_ALLOC (NULL);
4705       bitmap_set_bit (inv_exprs, inv_expr->id);
4706     }
4707   set_group_iv_cost (data, group, cand, cost, inv_vars,
4708                      NULL_TREE, ERROR_MARK, inv_exprs);
4709   return !cost.infinite_cost_p ();
4710 }
4711
4712 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4713
4714 static bool
4715 determine_group_iv_cost_address (struct ivopts_data *data,
4716                                  struct iv_group *group, struct iv_cand *cand)
4717 {
4718   unsigned i;
4719   bitmap inv_vars = NULL, inv_exprs = NULL;
4720   bool can_autoinc;
4721   iv_inv_expr_ent *inv_expr = NULL;
4722   struct iv_use *use = group->vuses[0];
4723   comp_cost sum_cost = no_cost, cost;
4724
4725   cost = get_computation_cost (data, use, cand, true,
4726                                &inv_vars, &can_autoinc, &inv_expr);
4727
4728   if (inv_expr)
4729     {
4730       inv_exprs = BITMAP_ALLOC (NULL);
4731       bitmap_set_bit (inv_exprs, inv_expr->id);
4732     }
4733   sum_cost = cost;
4734   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4735     {
4736       if (can_autoinc)
4737         sum_cost -= cand->cost_step;
4738       /* If we generated the candidate solely for exploiting autoincrement
4739          opportunities, and it turns out it can't be used, set the cost to
4740          infinity to make sure we ignore it.  */
4741       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4742         sum_cost = infinite_cost;
4743     }
4744
4745   /* Uses in a group can share setup code, so only add setup cost once.  */
4746   cost -= cost.scratch;
4747   /* Compute and add costs for rest uses of this group.  */
4748   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4749     {
4750       struct iv_use *next = group->vuses[i];
4751
4752       /* TODO: We could skip computing cost for sub iv_use when it has the
4753          same cost as the first iv_use, but the cost really depends on the
4754          offset and where the iv_use is.  */
4755         cost = get_computation_cost (data, next, cand, true,
4756                                      NULL, &can_autoinc, &inv_expr);
4757         if (inv_expr)
4758           {
4759             if (!inv_exprs)
4760               inv_exprs = BITMAP_ALLOC (NULL);
4761
4762             bitmap_set_bit (inv_exprs, inv_expr->id);
4763           }
4764       sum_cost += cost;
4765     }
4766   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4767                      NULL_TREE, ERROR_MARK, inv_exprs);
4768
4769   return !sum_cost.infinite_cost_p ();
4770 }
4771
4772 /* Computes value of candidate CAND at position AT in iteration NITER, and
4773    stores it to VAL.  */
4774
4775 static void
4776 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4777                aff_tree *val)
4778 {
4779   aff_tree step, delta, nit;
4780   struct iv *iv = cand->iv;
4781   tree type = TREE_TYPE (iv->base);
4782   tree steptype;
4783   if (POINTER_TYPE_P (type))
4784     steptype = sizetype;
4785   else
4786     steptype = unsigned_type_for (type);
4787
4788   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4789   aff_combination_convert (&step, steptype);
4790   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4791   aff_combination_convert (&nit, steptype);
4792   aff_combination_mult (&nit, &step, &delta);
4793   if (stmt_after_increment (loop, cand, at))
4794     aff_combination_add (&delta, &step);
4795
4796   tree_to_aff_combination (iv->base, type, val);
4797   if (!POINTER_TYPE_P (type))
4798     aff_combination_convert (val, steptype);
4799   aff_combination_add (val, &delta);
4800 }
4801
4802 /* Returns period of induction variable iv.  */
4803
4804 static tree
4805 iv_period (struct iv *iv)
4806 {
4807   tree step = iv->step, period, type;
4808   tree pow2div;
4809
4810   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4811
4812   type = unsigned_type_for (TREE_TYPE (step));
4813   /* Period of the iv is lcm (step, type_range)/step -1,
4814      i.e., N*type_range/step - 1. Since type range is power
4815      of two, N == (step >> num_of_ending_zeros_binary (step),
4816      so the final result is
4817
4818        (type_range >> num_of_ending_zeros_binary (step)) - 1
4819
4820   */
4821   pow2div = num_ending_zeros (step);
4822
4823   period = build_low_bits_mask (type,
4824                                 (TYPE_PRECISION (type)
4825                                  - tree_to_uhwi (pow2div)));
4826
4827   return period;
4828 }
4829
4830 /* Returns the comparison operator used when eliminating the iv USE.  */
4831
4832 static enum tree_code
4833 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4834 {
4835   struct loop *loop = data->current_loop;
4836   basic_block ex_bb;
4837   edge exit;
4838
4839   ex_bb = gimple_bb (use->stmt);
4840   exit = EDGE_SUCC (ex_bb, 0);
4841   if (flow_bb_inside_loop_p (loop, exit->dest))
4842     exit = EDGE_SUCC (ex_bb, 1);
4843
4844   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4845 }
4846
4847 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4848    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4849    calculation is performed in non-wrapping type.
4850
4851    TODO: More generally, we could test for the situation that
4852          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4853          This would require knowing the sign of OFFSET.  */
4854
4855 static bool
4856 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4857 {
4858   enum tree_code code;
4859   tree e1, e2;
4860   aff_tree aff_e1, aff_e2, aff_offset;
4861
4862   if (!nowrap_type_p (TREE_TYPE (base)))
4863     return false;
4864
4865   base = expand_simple_operations (base);
4866
4867   if (TREE_CODE (base) == SSA_NAME)
4868     {
4869       gimple *stmt = SSA_NAME_DEF_STMT (base);
4870
4871       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4872         return false;
4873
4874       code = gimple_assign_rhs_code (stmt);
4875       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4876         return false;
4877
4878       e1 = gimple_assign_rhs1 (stmt);
4879       e2 = gimple_assign_rhs2 (stmt);
4880     }
4881   else
4882     {
4883       code = TREE_CODE (base);
4884       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4885         return false;
4886       e1 = TREE_OPERAND (base, 0);
4887       e2 = TREE_OPERAND (base, 1);
4888     }
4889
4890   /* Use affine expansion as deeper inspection to prove the equality.  */
4891   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4892                                   &aff_e2, &data->name_expansion_cache);
4893   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4894                                   &aff_offset, &data->name_expansion_cache);
4895   aff_combination_scale (&aff_offset, -1);
4896   switch (code)
4897     {
4898     case PLUS_EXPR:
4899       aff_combination_add (&aff_e2, &aff_offset);
4900       if (aff_combination_zero_p (&aff_e2))
4901         return true;
4902
4903       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4904                                       &aff_e1, &data->name_expansion_cache);
4905       aff_combination_add (&aff_e1, &aff_offset);
4906       return aff_combination_zero_p (&aff_e1);
4907
4908     case POINTER_PLUS_EXPR:
4909       aff_combination_add (&aff_e2, &aff_offset);
4910       return aff_combination_zero_p (&aff_e2);
4911
4912     default:
4913       return false;
4914     }
4915 }
4916
4917 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4918    comparison with CAND.  NITER describes the number of iterations of
4919    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4920
4921    We aim to handle the following situation:
4922
4923    sometype *base, *p;
4924    int a, b, i;
4925
4926    i = a;
4927    p = p_0 = base + a;
4928
4929    do
4930      {
4931        bla (*p);
4932        p++;
4933        i++;
4934      }
4935    while (i < b);
4936
4937    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4938    We aim to optimize this to
4939
4940    p = p_0 = base + a;
4941    do
4942      {
4943        bla (*p);
4944        p++;
4945      }
4946    while (p < p_0 - a + b);
4947
4948    This preserves the correctness, since the pointer arithmetics does not
4949    overflow.  More precisely:
4950
4951    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4952       overflow in computing it or the values of p.
4953    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4954       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4955
4956 static bool
4957 iv_elimination_compare_lt (struct ivopts_data *data,
4958                            struct iv_cand *cand, enum tree_code *comp_p,
4959                            struct tree_niter_desc *niter)
4960 {
4961   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4962   struct aff_tree nit, tmpa, tmpb;
4963   enum tree_code comp;
4964   HOST_WIDE_INT step;
4965
4966   /* We need to know that the candidate induction variable does not overflow.
4967      While more complex analysis may be used to prove this, for now just
4968      check that the variable appears in the original program and that it
4969      is computed in a type that guarantees no overflows.  */
4970   cand_type = TREE_TYPE (cand->iv->base);
4971   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4972     return false;
4973
4974   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4975      the calculation of the BOUND could overflow, making the comparison
4976      invalid.  */
4977   if (!data->loop_single_exit_p)
4978     return false;
4979
4980   /* We need to be able to decide whether candidate is increasing or decreasing
4981      in order to choose the right comparison operator.  */
4982   if (!cst_and_fits_in_hwi (cand->iv->step))
4983     return false;
4984   step = int_cst_value (cand->iv->step);
4985
4986   /* Check that the number of iterations matches the expected pattern:
4987      a + 1 > b ? 0 : b - a - 1.  */
4988   mbz = niter->may_be_zero;
4989   if (TREE_CODE (mbz) == GT_EXPR)
4990     {
4991       /* Handle a + 1 > b.  */
4992       tree op0 = TREE_OPERAND (mbz, 0);
4993       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4994         {
4995           a = TREE_OPERAND (op0, 0);
4996           b = TREE_OPERAND (mbz, 1);
4997         }
4998       else
4999         return false;
5000     }
5001   else if (TREE_CODE (mbz) == LT_EXPR)
5002     {
5003       tree op1 = TREE_OPERAND (mbz, 1);
5004
5005       /* Handle b < a + 1.  */
5006       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5007         {
5008           a = TREE_OPERAND (op1, 0);
5009           b = TREE_OPERAND (mbz, 0);
5010         }
5011       else
5012         return false;
5013     }
5014   else
5015     return false;
5016
5017   /* Expected number of iterations is B - A - 1.  Check that it matches
5018      the actual number, i.e., that B - A - NITER = 1.  */
5019   tree_to_aff_combination (niter->niter, nit_type, &nit);
5020   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5021   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5022   aff_combination_scale (&nit, -1);
5023   aff_combination_scale (&tmpa, -1);
5024   aff_combination_add (&tmpb, &tmpa);
5025   aff_combination_add (&tmpb, &nit);
5026   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5027     return false;
5028
5029   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5030      overflow.  */
5031   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5032                         cand->iv->step,
5033                         fold_convert (TREE_TYPE (cand->iv->step), a));
5034   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5035     return false;
5036
5037   /* Determine the new comparison operator.  */
5038   comp = step < 0 ? GT_EXPR : LT_EXPR;
5039   if (*comp_p == NE_EXPR)
5040     *comp_p = comp;
5041   else if (*comp_p == EQ_EXPR)
5042     *comp_p = invert_tree_comparison (comp, false);
5043   else
5044     gcc_unreachable ();
5045
5046   return true;
5047 }
5048
5049 /* Check whether it is possible to express the condition in USE by comparison
5050    of candidate CAND.  If so, store the value compared with to BOUND, and the
5051    comparison operator to COMP.  */
5052
5053 static bool
5054 may_eliminate_iv (struct ivopts_data *data,
5055                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5056                   enum tree_code *comp)
5057 {
5058   basic_block ex_bb;
5059   edge exit;
5060   tree period;
5061   struct loop *loop = data->current_loop;
5062   aff_tree bnd;
5063   struct tree_niter_desc *desc = NULL;
5064
5065   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5066     return false;
5067
5068   /* For now works only for exits that dominate the loop latch.
5069      TODO: extend to other conditions inside loop body.  */
5070   ex_bb = gimple_bb (use->stmt);
5071   if (use->stmt != last_stmt (ex_bb)
5072       || gimple_code (use->stmt) != GIMPLE_COND
5073       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5074     return false;
5075
5076   exit = EDGE_SUCC (ex_bb, 0);
5077   if (flow_bb_inside_loop_p (loop, exit->dest))
5078     exit = EDGE_SUCC (ex_bb, 1);
5079   if (flow_bb_inside_loop_p (loop, exit->dest))
5080     return false;
5081
5082   desc = niter_for_exit (data, exit);
5083   if (!desc)
5084     return false;
5085
5086   /* Determine whether we can use the variable to test the exit condition.
5087      This is the case iff the period of the induction variable is greater
5088      than the number of iterations for which the exit condition is true.  */
5089   period = iv_period (cand->iv);
5090
5091   /* If the number of iterations is constant, compare against it directly.  */
5092   if (TREE_CODE (desc->niter) == INTEGER_CST)
5093     {
5094       /* See cand_value_at.  */
5095       if (stmt_after_increment (loop, cand, use->stmt))
5096         {
5097           if (!tree_int_cst_lt (desc->niter, period))
5098             return false;
5099         }
5100       else
5101         {
5102           if (tree_int_cst_lt (period, desc->niter))
5103             return false;
5104         }
5105     }
5106
5107   /* If not, and if this is the only possible exit of the loop, see whether
5108      we can get a conservative estimate on the number of iterations of the
5109      entire loop and compare against that instead.  */
5110   else
5111     {
5112       widest_int period_value, max_niter;
5113
5114       max_niter = desc->max;
5115       if (stmt_after_increment (loop, cand, use->stmt))
5116         max_niter += 1;
5117       period_value = wi::to_widest (period);
5118       if (wi::gtu_p (max_niter, period_value))
5119         {
5120           /* See if we can take advantage of inferred loop bound
5121              information.  */
5122           if (data->loop_single_exit_p)
5123             {
5124               if (!max_loop_iterations (loop, &max_niter))
5125                 return false;
5126               /* The loop bound is already adjusted by adding 1.  */
5127               if (wi::gtu_p (max_niter, period_value))
5128                 return false;
5129             }
5130           else
5131             return false;
5132         }
5133     }
5134
5135   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5136
5137   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5138                          aff_combination_to_tree (&bnd));
5139   *comp = iv_elimination_compare (data, use);
5140
5141   /* It is unlikely that computing the number of iterations using division
5142      would be more profitable than keeping the original induction variable.  */
5143   if (expression_expensive_p (*bound))
5144     return false;
5145
5146   /* Sometimes, it is possible to handle the situation that the number of
5147      iterations may be zero unless additional assumptions by using <
5148      instead of != in the exit condition.
5149
5150      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5151            base the exit condition on it.  However, that is often too
5152            expensive.  */
5153   if (!integer_zerop (desc->may_be_zero))
5154     return iv_elimination_compare_lt (data, cand, comp, desc);
5155
5156   return true;
5157 }
5158
5159  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5160     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5161
5162 static int
5163 parm_decl_cost (struct ivopts_data *data, tree bound)
5164 {
5165   tree sbound = bound;
5166   STRIP_NOPS (sbound);
5167
5168   if (TREE_CODE (sbound) == SSA_NAME
5169       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5170       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5171       && data->body_includes_call)
5172     return COSTS_N_INSNS (1);
5173
5174   return 0;
5175 }
5176
5177 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5178
5179 static bool
5180 determine_group_iv_cost_cond (struct ivopts_data *data,
5181                               struct iv_group *group, struct iv_cand *cand)
5182 {
5183   tree bound = NULL_TREE;
5184   struct iv *cmp_iv;
5185   bitmap inv_exprs = NULL;
5186   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5187   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5188   enum comp_iv_rewrite rewrite_type;
5189   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5190   tree *control_var, *bound_cst;
5191   enum tree_code comp = ERROR_MARK;
5192   struct iv_use *use = group->vuses[0];
5193
5194   /* Extract condition operands.  */
5195   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5196                                         &bound_cst, NULL, &cmp_iv);
5197   gcc_assert (rewrite_type != COMP_IV_NA);
5198
5199   /* Try iv elimination.  */
5200   if (rewrite_type == COMP_IV_ELIM
5201       && may_eliminate_iv (data, use, cand, &bound, &comp))
5202     {
5203       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5204       if (elim_cost.cost == 0)
5205         elim_cost.cost = parm_decl_cost (data, bound);
5206       else if (TREE_CODE (bound) == INTEGER_CST)
5207         elim_cost.cost = 0;
5208       /* If we replace a loop condition 'i < n' with 'p < base + n',
5209          inv_vars_elim will have 'base' and 'n' set, which implies that both
5210          'base' and 'n' will be live during the loop.    More likely,
5211          'base + n' will be loop invariant, resulting in only one live value
5212          during the loop.  So in that case we clear inv_vars_elim and set
5213          inv_expr_elim instead.  */
5214       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5215         {
5216           inv_expr_elim = get_loop_invariant_expr (data, bound);
5217           bitmap_clear (inv_vars_elim);
5218         }
5219       /* The bound is a loop invariant, so it will be only computed
5220          once.  */
5221       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5222     }
5223
5224   /* When the condition is a comparison of the candidate IV against
5225      zero, prefer this IV.
5226
5227      TODO: The constant that we're subtracting from the cost should
5228      be target-dependent.  This information should be added to the
5229      target costs for each backend.  */
5230   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5231       && integer_zerop (*bound_cst)
5232       && (operand_equal_p (*control_var, cand->var_after, 0)
5233           || operand_equal_p (*control_var, cand->var_before, 0)))
5234     elim_cost -= 1;
5235
5236   express_cost = get_computation_cost (data, use, cand, false,
5237                                        &inv_vars_express, NULL,
5238                                        &inv_expr_express);
5239   if (cmp_iv != NULL)
5240     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5241
5242   /* Count the cost of the original bound as well.  */
5243   bound_cost = force_var_cost (data, *bound_cst, NULL);
5244   if (bound_cost.cost == 0)
5245     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5246   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5247     bound_cost.cost = 0;
5248   express_cost += bound_cost;
5249
5250   /* Choose the better approach, preferring the eliminated IV. */
5251   if (elim_cost <= express_cost)
5252     {
5253       cost = elim_cost;
5254       inv_vars = inv_vars_elim;
5255       inv_vars_elim = NULL;
5256       inv_expr = inv_expr_elim;
5257     }
5258   else
5259     {
5260       cost = express_cost;
5261       inv_vars = inv_vars_express;
5262       inv_vars_express = NULL;
5263       bound = NULL_TREE;
5264       comp = ERROR_MARK;
5265       inv_expr = inv_expr_express;
5266     }
5267
5268   if (inv_expr)
5269     {
5270       inv_exprs = BITMAP_ALLOC (NULL);
5271       bitmap_set_bit (inv_exprs, inv_expr->id);
5272     }
5273   set_group_iv_cost (data, group, cand, cost,
5274                      inv_vars, bound, comp, inv_exprs);
5275
5276   if (inv_vars_elim)
5277     BITMAP_FREE (inv_vars_elim);
5278   if (inv_vars_express)
5279     BITMAP_FREE (inv_vars_express);
5280
5281   return !cost.infinite_cost_p ();
5282 }
5283
5284 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5285    if USE cannot be represented with CAND.  */
5286
5287 static bool
5288 determine_group_iv_cost (struct ivopts_data *data,
5289                          struct iv_group *group, struct iv_cand *cand)
5290 {
5291   switch (group->type)
5292     {
5293     case USE_NONLINEAR_EXPR:
5294       return determine_group_iv_cost_generic (data, group, cand);
5295
5296     case USE_REF_ADDRESS:
5297     case USE_PTR_ADDRESS:
5298       return determine_group_iv_cost_address (data, group, cand);
5299
5300     case USE_COMPARE:
5301       return determine_group_iv_cost_cond (data, group, cand);
5302
5303     default:
5304       gcc_unreachable ();
5305     }
5306 }
5307
5308 /* Return true if get_computation_cost indicates that autoincrement is
5309    a possibility for the pair of USE and CAND, false otherwise.  */
5310
5311 static bool
5312 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5313                            struct iv_cand *cand)
5314 {
5315   if (!address_p (use->type))
5316     return false;
5317
5318   bool can_autoinc = false;
5319   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5320   return can_autoinc;
5321 }
5322
5323 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5324    use that allows autoincrement, and set their AINC_USE if possible.  */
5325
5326 static void
5327 set_autoinc_for_original_candidates (struct ivopts_data *data)
5328 {
5329   unsigned i, j;
5330
5331   for (i = 0; i < data->vcands.length (); i++)
5332     {
5333       struct iv_cand *cand = data->vcands[i];
5334       struct iv_use *closest_before = NULL;
5335       struct iv_use *closest_after = NULL;
5336       if (cand->pos != IP_ORIGINAL)
5337         continue;
5338
5339       for (j = 0; j < data->vgroups.length (); j++)
5340         {
5341           struct iv_group *group = data->vgroups[j];
5342           struct iv_use *use = group->vuses[0];
5343           unsigned uid = gimple_uid (use->stmt);
5344
5345           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5346             continue;
5347
5348           if (uid < gimple_uid (cand->incremented_at)
5349               && (closest_before == NULL
5350                   || uid > gimple_uid (closest_before->stmt)))
5351             closest_before = use;
5352
5353           if (uid > gimple_uid (cand->incremented_at)
5354               && (closest_after == NULL
5355                   || uid < gimple_uid (closest_after->stmt)))
5356             closest_after = use;
5357         }
5358
5359       if (closest_before != NULL
5360           && autoinc_possible_for_pair (data, closest_before, cand))
5361         cand->ainc_use = closest_before;
5362       else if (closest_after != NULL
5363                && autoinc_possible_for_pair (data, closest_after, cand))
5364         cand->ainc_use = closest_after;
5365     }
5366 }
5367
5368 /* Relate compare use with all candidates.  */
5369
5370 static void
5371 relate_compare_use_with_all_cands (struct ivopts_data *data)
5372 {
5373   unsigned i, count = data->vcands.length ();
5374   for (i = 0; i < data->vgroups.length (); i++)
5375     {
5376       struct iv_group *group = data->vgroups[i];
5377
5378       if (group->type == USE_COMPARE)
5379         bitmap_set_range (group->related_cands, 0, count);
5380     }
5381 }
5382
5383 /* Finds the candidates for the induction variables.  */
5384
5385 static void
5386 find_iv_candidates (struct ivopts_data *data)
5387 {
5388   /* Add commonly used ivs.  */
5389   add_standard_iv_candidates (data);
5390
5391   /* Add old induction variables.  */
5392   add_iv_candidate_for_bivs (data);
5393
5394   /* Add induction variables derived from uses.  */
5395   add_iv_candidate_for_groups (data);
5396
5397   set_autoinc_for_original_candidates (data);
5398
5399   /* Record the important candidates.  */
5400   record_important_candidates (data);
5401
5402   /* Relate compare iv_use with all candidates.  */
5403   if (!data->consider_all_candidates)
5404     relate_compare_use_with_all_cands (data);
5405
5406   if (dump_file && (dump_flags & TDF_DETAILS))
5407     {
5408       unsigned i;
5409
5410       fprintf (dump_file, "\n<Important Candidates>:\t");
5411       for (i = 0; i < data->vcands.length (); i++)
5412         if (data->vcands[i]->important)
5413           fprintf (dump_file, " %d,", data->vcands[i]->id);
5414       fprintf (dump_file, "\n");
5415
5416       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5417       for (i = 0; i < data->vgroups.length (); i++)
5418         {
5419           struct iv_group *group = data->vgroups[i];
5420
5421           if (group->related_cands)
5422             {
5423               fprintf (dump_file, "  Group %d:\t", group->id);
5424               dump_bitmap (dump_file, group->related_cands);
5425             }
5426         }
5427       fprintf (dump_file, "\n");
5428     }
5429 }
5430
5431 /* Determines costs of computing use of iv with an iv candidate.  */
5432
5433 static void
5434 determine_group_iv_costs (struct ivopts_data *data)
5435 {
5436   unsigned i, j;
5437   struct iv_cand *cand;
5438   struct iv_group *group;
5439   bitmap to_clear = BITMAP_ALLOC (NULL);
5440
5441   alloc_use_cost_map (data);
5442
5443   for (i = 0; i < data->vgroups.length (); i++)
5444     {
5445       group = data->vgroups[i];
5446
5447       if (data->consider_all_candidates)
5448         {
5449           for (j = 0; j < data->vcands.length (); j++)
5450             {
5451               cand = data->vcands[j];
5452               determine_group_iv_cost (data, group, cand);
5453             }
5454         }
5455       else
5456         {
5457           bitmap_iterator bi;
5458
5459           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5460             {
5461               cand = data->vcands[j];
5462               if (!determine_group_iv_cost (data, group, cand))
5463                 bitmap_set_bit (to_clear, j);
5464             }
5465
5466           /* Remove the candidates for that the cost is infinite from
5467              the list of related candidates.  */
5468           bitmap_and_compl_into (group->related_cands, to_clear);
5469           bitmap_clear (to_clear);
5470         }
5471     }
5472
5473   BITMAP_FREE (to_clear);
5474
5475   if (dump_file && (dump_flags & TDF_DETAILS))
5476     {
5477       bitmap_iterator bi;
5478
5479       /* Dump invariant variables.  */
5480       fprintf (dump_file, "\n<Invariant Vars>:\n");
5481       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5482         {
5483           struct version_info *info = ver_info (data, i);
5484           if (info->inv_id)
5485             {
5486               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5487               print_generic_expr (dump_file, info->name, TDF_SLIM);
5488               fprintf (dump_file, "%s\n",
5489                        info->has_nonlin_use ? "" : "\t(eliminable)");
5490             }
5491         }
5492
5493       /* Dump invariant expressions.  */
5494       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5495       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5496
5497       for (hash_table<iv_inv_expr_hasher>::iterator it
5498            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5499            ++it)
5500         list.safe_push (*it);
5501
5502       list.qsort (sort_iv_inv_expr_ent);
5503
5504       for (i = 0; i < list.length (); ++i)
5505         {
5506           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5507           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5508           fprintf (dump_file, "\n");
5509         }
5510
5511       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5512
5513       for (i = 0; i < data->vgroups.length (); i++)
5514         {
5515           group = data->vgroups[i];
5516
5517           fprintf (dump_file, "Group %d:\n", i);
5518           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5519           for (j = 0; j < group->n_map_members; j++)
5520             {
5521               if (!group->cost_map[j].cand
5522                   || group->cost_map[j].cost.infinite_cost_p ())
5523                 continue;
5524
5525               fprintf (dump_file, "  %d\t%d\t%d\t",
5526                        group->cost_map[j].cand->id,
5527                        group->cost_map[j].cost.cost,
5528                        group->cost_map[j].cost.complexity);
5529               if (!group->cost_map[j].inv_exprs
5530                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5531                 fprintf (dump_file, "NIL;\t");
5532               else
5533                 bitmap_print (dump_file,
5534                               group->cost_map[j].inv_exprs, "", ";\t");
5535               if (!group->cost_map[j].inv_vars
5536                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5537                 fprintf (dump_file, "NIL;\n");
5538               else
5539                 bitmap_print (dump_file,
5540                               group->cost_map[j].inv_vars, "", "\n");
5541             }
5542
5543           fprintf (dump_file, "\n");
5544         }
5545       fprintf (dump_file, "\n");
5546     }
5547 }
5548
5549 /* Determines cost of the candidate CAND.  */
5550
5551 static void
5552 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5553 {
5554   comp_cost cost_base;
5555   unsigned cost, cost_step;
5556   tree base;
5557
5558   gcc_assert (cand->iv != NULL);
5559
5560   /* There are two costs associated with the candidate -- its increment
5561      and its initialization.  The second is almost negligible for any loop
5562      that rolls enough, so we take it just very little into account.  */
5563
5564   base = cand->iv->base;
5565   cost_base = force_var_cost (data, base, NULL);
5566   /* It will be exceptional that the iv register happens to be initialized with
5567      the proper value at no cost.  In general, there will at least be a regcopy
5568      or a const set.  */
5569   if (cost_base.cost == 0)
5570     cost_base.cost = COSTS_N_INSNS (1);
5571   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5572
5573   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5574
5575   /* Prefer the original ivs unless we may gain something by replacing it.
5576      The reason is to make debugging simpler; so this is not relevant for
5577      artificial ivs created by other optimization passes.  */
5578   if (cand->pos != IP_ORIGINAL
5579       || !SSA_NAME_VAR (cand->var_before)
5580       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5581     cost++;
5582
5583   /* Prefer not to insert statements into latch unless there are some
5584      already (so that we do not create unnecessary jumps).  */
5585   if (cand->pos == IP_END
5586       && empty_block_p (ip_end_pos (data->current_loop)))
5587     cost++;
5588
5589   cand->cost = cost;
5590   cand->cost_step = cost_step;
5591 }
5592
5593 /* Determines costs of computation of the candidates.  */
5594
5595 static void
5596 determine_iv_costs (struct ivopts_data *data)
5597 {
5598   unsigned i;
5599
5600   if (dump_file && (dump_flags & TDF_DETAILS))
5601     {
5602       fprintf (dump_file, "<Candidate Costs>:\n");
5603       fprintf (dump_file, "  cand\tcost\n");
5604     }
5605
5606   for (i = 0; i < data->vcands.length (); i++)
5607     {
5608       struct iv_cand *cand = data->vcands[i];
5609
5610       determine_iv_cost (data, cand);
5611
5612       if (dump_file && (dump_flags & TDF_DETAILS))
5613         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5614     }
5615
5616   if (dump_file && (dump_flags & TDF_DETAILS))
5617     fprintf (dump_file, "\n");
5618 }
5619
5620 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5621    induction variables.  Note N_INVS includes both invariant variables and
5622    invariant expressions.  */
5623
5624 static unsigned
5625 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5626                               unsigned n_cands)
5627 {
5628   unsigned cost;
5629   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5630   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5631   bool speed = data->speed;
5632
5633   /* If there is a call in the loop body, the call-clobbered registers
5634      are not available for loop invariants.  */
5635   if (data->body_includes_call)
5636     available_regs = available_regs - target_clobbered_regs;
5637
5638   /* If we have enough registers.  */
5639   if (regs_needed + target_res_regs < available_regs)
5640     cost = n_new;
5641   /* If close to running out of registers, try to preserve them.  */
5642   else if (regs_needed <= available_regs)
5643     cost = target_reg_cost [speed] * regs_needed;
5644   /* If we run out of available registers but the number of candidates
5645      does not, we penalize extra registers using target_spill_cost.  */
5646   else if (n_cands <= available_regs)
5647     cost = target_reg_cost [speed] * available_regs
5648            + target_spill_cost [speed] * (regs_needed - available_regs);
5649   /* If the number of candidates runs out available registers, we penalize
5650      extra candidate registers using target_spill_cost * 2.  Because it is
5651      more expensive to spill induction variable than invariant.  */
5652   else
5653     cost = target_reg_cost [speed] * available_regs
5654            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5655            + target_spill_cost [speed] * (regs_needed - n_cands);
5656
5657   /* Finally, add the number of candidates, so that we prefer eliminating
5658      induction variables if possible.  */
5659   return cost + n_cands;
5660 }
5661
5662 /* For each size of the induction variable set determine the penalty.  */
5663
5664 static void
5665 determine_set_costs (struct ivopts_data *data)
5666 {
5667   unsigned j, n;
5668   gphi *phi;
5669   gphi_iterator psi;
5670   tree op;
5671   struct loop *loop = data->current_loop;
5672   bitmap_iterator bi;
5673
5674   if (dump_file && (dump_flags & TDF_DETAILS))
5675     {
5676       fprintf (dump_file, "<Global Costs>:\n");
5677       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5678       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5679       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5680       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5681     }
5682
5683   n = 0;
5684   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5685     {
5686       phi = psi.phi ();
5687       op = PHI_RESULT (phi);
5688
5689       if (virtual_operand_p (op))
5690         continue;
5691
5692       if (get_iv (data, op))
5693         continue;
5694
5695       if (!POINTER_TYPE_P (TREE_TYPE (op))
5696           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5697         continue;
5698
5699       n++;
5700     }
5701
5702   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5703     {
5704       struct version_info *info = ver_info (data, j);
5705
5706       if (info->inv_id && info->has_nonlin_use)
5707         n++;
5708     }
5709
5710   data->regs_used = n;
5711   if (dump_file && (dump_flags & TDF_DETAILS))
5712     fprintf (dump_file, "  regs_used %d\n", n);
5713
5714   if (dump_file && (dump_flags & TDF_DETAILS))
5715     {
5716       fprintf (dump_file, "  cost for size:\n");
5717       fprintf (dump_file, "  ivs\tcost\n");
5718       for (j = 0; j <= 2 * target_avail_regs; j++)
5719         fprintf (dump_file, "  %d\t%d\n", j,
5720                  ivopts_estimate_reg_pressure (data, 0, j));
5721       fprintf (dump_file, "\n");
5722     }
5723 }
5724
5725 /* Returns true if A is a cheaper cost pair than B.  */
5726
5727 static bool
5728 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5729 {
5730   if (!a)
5731     return false;
5732
5733   if (!b)
5734     return true;
5735
5736   if (a->cost < b->cost)
5737     return true;
5738
5739   if (b->cost < a->cost)
5740     return false;
5741
5742   /* In case the costs are the same, prefer the cheaper candidate.  */
5743   if (a->cand->cost < b->cand->cost)
5744     return true;
5745
5746   return false;
5747 }
5748
5749 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5750    for more expensive, equal and cheaper respectively.  */
5751
5752 static int
5753 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5754 {
5755   if (cheaper_cost_pair (a, b))
5756     return -1;
5757   if (cheaper_cost_pair (b, a))
5758     return 1;
5759
5760   return 0;
5761 }
5762
5763 /* Returns candidate by that USE is expressed in IVS.  */
5764
5765 static struct cost_pair *
5766 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5767 {
5768   return ivs->cand_for_group[group->id];
5769 }
5770
5771 /* Computes the cost field of IVS structure.  */
5772
5773 static void
5774 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5775 {
5776   comp_cost cost = ivs->cand_use_cost;
5777
5778   cost += ivs->cand_cost;
5779   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5780   ivs->cost = cost;
5781 }
5782
5783 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5784    and IVS.  */
5785
5786 static void
5787 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5788 {
5789   bitmap_iterator bi;
5790   unsigned iid;
5791
5792   if (!invs)
5793     return;
5794
5795   gcc_assert (n_inv_uses != NULL);
5796   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5797     {
5798       n_inv_uses[iid]--;
5799       if (n_inv_uses[iid] == 0)
5800         ivs->n_invs--;
5801     }
5802 }
5803
5804 /* Set USE not to be expressed by any candidate in IVS.  */
5805
5806 static void
5807 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5808                  struct iv_group *group)
5809 {
5810   unsigned gid = group->id, cid;
5811   struct cost_pair *cp;
5812
5813   cp = ivs->cand_for_group[gid];
5814   if (!cp)
5815     return;
5816   cid = cp->cand->id;
5817
5818   ivs->bad_groups++;
5819   ivs->cand_for_group[gid] = NULL;
5820   ivs->n_cand_uses[cid]--;
5821
5822   if (ivs->n_cand_uses[cid] == 0)
5823     {
5824       bitmap_clear_bit (ivs->cands, cid);
5825       ivs->n_cands--;
5826       ivs->cand_cost -= cp->cand->cost;
5827       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5828       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5829     }
5830
5831   ivs->cand_use_cost -= cp->cost;
5832   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5833   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5834   iv_ca_recount_cost (data, ivs);
5835 }
5836
5837 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5838    IVS.  */
5839
5840 static void
5841 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5842 {
5843   bitmap_iterator bi;
5844   unsigned iid;
5845
5846   if (!invs)
5847     return;
5848
5849   gcc_assert (n_inv_uses != NULL);
5850   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5851     {
5852       n_inv_uses[iid]++;
5853       if (n_inv_uses[iid] == 1)
5854         ivs->n_invs++;
5855     }
5856 }
5857
5858 /* Set cost pair for GROUP in set IVS to CP.  */
5859
5860 static void
5861 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5862               struct iv_group *group, struct cost_pair *cp)
5863 {
5864   unsigned gid = group->id, cid;
5865
5866   if (ivs->cand_for_group[gid] == cp)
5867     return;
5868
5869   if (ivs->cand_for_group[gid])
5870     iv_ca_set_no_cp (data, ivs, group);
5871
5872   if (cp)
5873     {
5874       cid = cp->cand->id;
5875
5876       ivs->bad_groups--;
5877       ivs->cand_for_group[gid] = cp;
5878       ivs->n_cand_uses[cid]++;
5879       if (ivs->n_cand_uses[cid] == 1)
5880         {
5881           bitmap_set_bit (ivs->cands, cid);
5882           ivs->n_cands++;
5883           ivs->cand_cost += cp->cand->cost;
5884           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5885           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5886         }
5887
5888       ivs->cand_use_cost += cp->cost;
5889       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5890       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5891       iv_ca_recount_cost (data, ivs);
5892     }
5893 }
5894
5895 /* Extend set IVS by expressing USE by some of the candidates in it
5896    if possible.  Consider all important candidates if candidates in
5897    set IVS don't give any result.  */
5898
5899 static void
5900 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5901                struct iv_group *group)
5902 {
5903   struct cost_pair *best_cp = NULL, *cp;
5904   bitmap_iterator bi;
5905   unsigned i;
5906   struct iv_cand *cand;
5907
5908   gcc_assert (ivs->upto >= group->id);
5909   ivs->upto++;
5910   ivs->bad_groups++;
5911
5912   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5913     {
5914       cand = data->vcands[i];
5915       cp = get_group_iv_cost (data, group, cand);
5916       if (cheaper_cost_pair (cp, best_cp))
5917         best_cp = cp;
5918     }
5919
5920   if (best_cp == NULL)
5921     {
5922       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5923         {
5924           cand = data->vcands[i];
5925           cp = get_group_iv_cost (data, group, cand);
5926           if (cheaper_cost_pair (cp, best_cp))
5927             best_cp = cp;
5928         }
5929     }
5930
5931   iv_ca_set_cp (data, ivs, group, best_cp);
5932 }
5933
5934 /* Get cost for assignment IVS.  */
5935
5936 static comp_cost
5937 iv_ca_cost (struct iv_ca *ivs)
5938 {
5939   /* This was a conditional expression but it triggered a bug in
5940      Sun C 5.5.  */
5941   if (ivs->bad_groups)
5942     return infinite_cost;
5943   else
5944     return ivs->cost;
5945 }
5946
5947 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5948    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5949    respectively.  */
5950
5951 static int
5952 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5953                     struct iv_group *group, struct cost_pair *old_cp,
5954                     struct cost_pair *new_cp)
5955 {
5956   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5957   unsigned old_n_invs = ivs->n_invs;
5958   iv_ca_set_cp (data, ivs, group, new_cp);
5959   unsigned new_n_invs = ivs->n_invs;
5960   iv_ca_set_cp (data, ivs, group, old_cp);
5961
5962   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5963 }
5964
5965 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5966    it before NEXT.  */
5967
5968 static struct iv_ca_delta *
5969 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5970                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5971 {
5972   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5973
5974   change->group = group;
5975   change->old_cp = old_cp;
5976   change->new_cp = new_cp;
5977   change->next = next;
5978
5979   return change;
5980 }
5981
5982 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5983    are rewritten.  */
5984
5985 static struct iv_ca_delta *
5986 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5987 {
5988   struct iv_ca_delta *last;
5989
5990   if (!l2)
5991     return l1;
5992
5993   if (!l1)
5994     return l2;
5995
5996   for (last = l1; last->next; last = last->next)
5997     continue;
5998   last->next = l2;
5999
6000   return l1;
6001 }
6002
6003 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6004
6005 static struct iv_ca_delta *
6006 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6007 {
6008   struct iv_ca_delta *act, *next, *prev = NULL;
6009
6010   for (act = delta; act; act = next)
6011     {
6012       next = act->next;
6013       act->next = prev;
6014       prev = act;
6015
6016       std::swap (act->old_cp, act->new_cp);
6017     }
6018
6019   return prev;
6020 }
6021
6022 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6023    reverted instead.  */
6024
6025 static void
6026 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6027                     struct iv_ca_delta *delta, bool forward)
6028 {
6029   struct cost_pair *from, *to;
6030   struct iv_ca_delta *act;
6031
6032   if (!forward)
6033     delta = iv_ca_delta_reverse (delta);
6034
6035   for (act = delta; act; act = act->next)
6036     {
6037       from = act->old_cp;
6038       to = act->new_cp;
6039       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6040       iv_ca_set_cp (data, ivs, act->group, to);
6041     }
6042
6043   if (!forward)
6044     iv_ca_delta_reverse (delta);
6045 }
6046
6047 /* Returns true if CAND is used in IVS.  */
6048
6049 static bool
6050 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6051 {
6052   return ivs->n_cand_uses[cand->id] > 0;
6053 }
6054
6055 /* Returns number of induction variable candidates in the set IVS.  */
6056
6057 static unsigned
6058 iv_ca_n_cands (struct iv_ca *ivs)
6059 {
6060   return ivs->n_cands;
6061 }
6062
6063 /* Free the list of changes DELTA.  */
6064
6065 static void
6066 iv_ca_delta_free (struct iv_ca_delta **delta)
6067 {
6068   struct iv_ca_delta *act, *next;
6069
6070   for (act = *delta; act; act = next)
6071     {
6072       next = act->next;
6073       free (act);
6074     }
6075
6076   *delta = NULL;
6077 }
6078
6079 /* Allocates new iv candidates assignment.  */
6080
6081 static struct iv_ca *
6082 iv_ca_new (struct ivopts_data *data)
6083 {
6084   struct iv_ca *nw = XNEW (struct iv_ca);
6085
6086   nw->upto = 0;
6087   nw->bad_groups = 0;
6088   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6089                                  data->vgroups.length ());
6090   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6091   nw->cands = BITMAP_ALLOC (NULL);
6092   nw->n_cands = 0;
6093   nw->n_invs = 0;
6094   nw->cand_use_cost = no_cost;
6095   nw->cand_cost = 0;
6096   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6097   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6098   nw->cost = no_cost;
6099
6100   return nw;
6101 }
6102
6103 /* Free memory occupied by the set IVS.  */
6104
6105 static void
6106 iv_ca_free (struct iv_ca **ivs)
6107 {
6108   free ((*ivs)->cand_for_group);
6109   free ((*ivs)->n_cand_uses);
6110   BITMAP_FREE ((*ivs)->cands);
6111   free ((*ivs)->n_inv_var_uses);
6112   free ((*ivs)->n_inv_expr_uses);
6113   free (*ivs);
6114   *ivs = NULL;
6115 }
6116
6117 /* Dumps IVS to FILE.  */
6118
6119 static void
6120 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6121 {
6122   unsigned i;
6123   comp_cost cost = iv_ca_cost (ivs);
6124
6125   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6126            cost.complexity);
6127   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6128            ivs->cand_cost, ivs->cand_use_cost.cost,
6129            ivs->cand_use_cost.complexity);
6130   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6131
6132   for (i = 0; i < ivs->upto; i++)
6133     {
6134       struct iv_group *group = data->vgroups[i];
6135       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6136       if (cp)
6137         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6138                  group->id, cp->cand->id, cp->cost.cost,
6139                  cp->cost.complexity);
6140       else
6141         fprintf (file, "   group:%d --> ??\n", group->id);
6142     }
6143
6144   const char *pref = "";
6145   fprintf (file, "  invariant variables: ");
6146   for (i = 1; i <= data->max_inv_var_id; i++)
6147     if (ivs->n_inv_var_uses[i])
6148       {
6149         fprintf (file, "%s%d", pref, i);
6150         pref = ", ";
6151       }
6152
6153   pref = "";
6154   fprintf (file, "\n  invariant expressions: ");
6155   for (i = 1; i <= data->max_inv_expr_id; i++)
6156     if (ivs->n_inv_expr_uses[i])
6157       {
6158         fprintf (file, "%s%d", pref, i);
6159         pref = ", ";
6160       }
6161
6162   fprintf (file, "\n\n");
6163 }
6164
6165 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6166    new set, and store differences in DELTA.  Number of induction variables
6167    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6168    the function will try to find a solution with mimimal iv candidates.  */
6169
6170 static comp_cost
6171 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6172               struct iv_cand *cand, struct iv_ca_delta **delta,
6173               unsigned *n_ivs, bool min_ncand)
6174 {
6175   unsigned i;
6176   comp_cost cost;
6177   struct iv_group *group;
6178   struct cost_pair *old_cp, *new_cp;
6179
6180   *delta = NULL;
6181   for (i = 0; i < ivs->upto; i++)
6182     {
6183       group = data->vgroups[i];
6184       old_cp = iv_ca_cand_for_group (ivs, group);
6185
6186       if (old_cp
6187           && old_cp->cand == cand)
6188         continue;
6189
6190       new_cp = get_group_iv_cost (data, group, cand);
6191       if (!new_cp)
6192         continue;
6193
6194       if (!min_ncand)
6195         {
6196           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6197           /* Skip if new_cp depends on more invariants.  */
6198           if (cmp_invs > 0)
6199             continue;
6200
6201           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6202           /* Skip if new_cp is not cheaper.  */
6203           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6204             continue;
6205         }
6206
6207       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6208     }
6209
6210   iv_ca_delta_commit (data, ivs, *delta, true);
6211   cost = iv_ca_cost (ivs);
6212   if (n_ivs)
6213     *n_ivs = iv_ca_n_cands (ivs);
6214   iv_ca_delta_commit (data, ivs, *delta, false);
6215
6216   return cost;
6217 }
6218
6219 /* Try narrowing set IVS by removing CAND.  Return the cost of
6220    the new set and store the differences in DELTA.  START is
6221    the candidate with which we start narrowing.  */
6222
6223 static comp_cost
6224 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6225               struct iv_cand *cand, struct iv_cand *start,
6226               struct iv_ca_delta **delta)
6227 {
6228   unsigned i, ci;
6229   struct iv_group *group;
6230   struct cost_pair *old_cp, *new_cp, *cp;
6231   bitmap_iterator bi;
6232   struct iv_cand *cnd;
6233   comp_cost cost, best_cost, acost;
6234
6235   *delta = NULL;
6236   for (i = 0; i < data->vgroups.length (); i++)
6237     {
6238       group = data->vgroups[i];
6239
6240       old_cp = iv_ca_cand_for_group (ivs, group);
6241       if (old_cp->cand != cand)
6242         continue;
6243
6244       best_cost = iv_ca_cost (ivs);
6245       /* Start narrowing with START.  */
6246       new_cp = get_group_iv_cost (data, group, start);
6247
6248       if (data->consider_all_candidates)
6249         {
6250           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6251             {
6252               if (ci == cand->id || (start && ci == start->id))
6253                 continue;
6254
6255               cnd = data->vcands[ci];
6256
6257               cp = get_group_iv_cost (data, group, cnd);
6258               if (!cp)
6259                 continue;
6260
6261               iv_ca_set_cp (data, ivs, group, cp);
6262               acost = iv_ca_cost (ivs);
6263
6264               if (acost < best_cost)
6265                 {
6266                   best_cost = acost;
6267                   new_cp = cp;
6268                 }
6269             }
6270         }
6271       else
6272         {
6273           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6274             {
6275               if (ci == cand->id || (start && ci == start->id))
6276                 continue;
6277
6278               cnd = data->vcands[ci];
6279
6280               cp = get_group_iv_cost (data, group, cnd);
6281               if (!cp)
6282                 continue;
6283
6284               iv_ca_set_cp (data, ivs, group, cp);
6285               acost = iv_ca_cost (ivs);
6286
6287               if (acost < best_cost)
6288                 {
6289                   best_cost = acost;
6290                   new_cp = cp;
6291                 }
6292             }
6293         }
6294       /* Restore to old cp for use.  */
6295       iv_ca_set_cp (data, ivs, group, old_cp);
6296
6297       if (!new_cp)
6298         {
6299           iv_ca_delta_free (delta);
6300           return infinite_cost;
6301         }
6302
6303       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6304     }
6305
6306   iv_ca_delta_commit (data, ivs, *delta, true);
6307   cost = iv_ca_cost (ivs);
6308   iv_ca_delta_commit (data, ivs, *delta, false);
6309
6310   return cost;
6311 }
6312
6313 /* Try optimizing the set of candidates IVS by removing candidates different
6314    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6315    differences in DELTA.  */
6316
6317 static comp_cost
6318 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6319              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6320 {
6321   bitmap_iterator bi;
6322   struct iv_ca_delta *act_delta, *best_delta;
6323   unsigned i;
6324   comp_cost best_cost, acost;
6325   struct iv_cand *cand;
6326
6327   best_delta = NULL;
6328   best_cost = iv_ca_cost (ivs);
6329
6330   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6331     {
6332       cand = data->vcands[i];
6333
6334       if (cand == except_cand)
6335         continue;
6336
6337       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6338
6339       if (acost < best_cost)
6340         {
6341           best_cost = acost;
6342           iv_ca_delta_free (&best_delta);
6343           best_delta = act_delta;
6344         }
6345       else
6346         iv_ca_delta_free (&act_delta);
6347     }
6348
6349   if (!best_delta)
6350     {
6351       *delta = NULL;
6352       return best_cost;
6353     }
6354
6355   /* Recurse to possibly remove other unnecessary ivs.  */
6356   iv_ca_delta_commit (data, ivs, best_delta, true);
6357   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6358   iv_ca_delta_commit (data, ivs, best_delta, false);
6359   *delta = iv_ca_delta_join (best_delta, *delta);
6360   return best_cost;
6361 }
6362
6363 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6364    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6365    the corresponding cost_pair, otherwise just return BEST_CP.  */
6366
6367 static struct cost_pair*
6368 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6369                         unsigned int cand_idx, struct iv_cand *old_cand,
6370                         struct cost_pair *best_cp)
6371 {
6372   struct iv_cand *cand;
6373   struct cost_pair *cp;
6374
6375   gcc_assert (old_cand != NULL && best_cp != NULL);
6376   if (cand_idx == old_cand->id)
6377     return best_cp;
6378
6379   cand = data->vcands[cand_idx];
6380   cp = get_group_iv_cost (data, group, cand);
6381   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6382     return cp;
6383
6384   return best_cp;
6385 }
6386
6387 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6388    which are used by more than one iv uses.  For each of those candidates,
6389    this function tries to represent iv uses under that candidate using
6390    other ones with lower local cost, then tries to prune the new set.
6391    If the new set has lower cost, It returns the new cost after recording
6392    candidate replacement in list DELTA.  */
6393
6394 static comp_cost
6395 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6396                struct iv_ca_delta **delta)
6397 {
6398   bitmap_iterator bi, bj;
6399   unsigned int i, j, k;
6400   struct iv_cand *cand;
6401   comp_cost orig_cost, acost;
6402   struct iv_ca_delta *act_delta, *tmp_delta;
6403   struct cost_pair *old_cp, *best_cp = NULL;
6404
6405   *delta = NULL;
6406   orig_cost = iv_ca_cost (ivs);
6407
6408   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6409     {
6410       if (ivs->n_cand_uses[i] == 1
6411           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6412         continue;
6413
6414       cand = data->vcands[i];
6415
6416       act_delta = NULL;
6417       /*  Represent uses under current candidate using other ones with
6418           lower local cost.  */
6419       for (j = 0; j < ivs->upto; j++)
6420         {
6421           struct iv_group *group = data->vgroups[j];
6422           old_cp = iv_ca_cand_for_group (ivs, group);
6423
6424           if (old_cp->cand != cand)
6425             continue;
6426
6427           best_cp = old_cp;
6428           if (data->consider_all_candidates)
6429             for (k = 0; k < data->vcands.length (); k++)
6430               best_cp = cheaper_cost_with_cand (data, group, k,
6431                                                 old_cp->cand, best_cp);
6432           else
6433             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6434               best_cp = cheaper_cost_with_cand (data, group, k,
6435                                                 old_cp->cand, best_cp);
6436
6437           if (best_cp == old_cp)
6438             continue;
6439
6440           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6441         }
6442       /* No need for further prune.  */
6443       if (!act_delta)
6444         continue;
6445
6446       /* Prune the new candidate set.  */
6447       iv_ca_delta_commit (data, ivs, act_delta, true);
6448       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6449       iv_ca_delta_commit (data, ivs, act_delta, false);
6450       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6451
6452       if (acost < orig_cost)
6453         {
6454           *delta = act_delta;
6455           return acost;
6456         }
6457       else
6458         iv_ca_delta_free (&act_delta);
6459     }
6460
6461   return orig_cost;
6462 }
6463
6464 /* Tries to extend the sets IVS in the best possible way in order to
6465    express the GROUP.  If ORIGINALP is true, prefer candidates from
6466    the original set of IVs, otherwise favor important candidates not
6467    based on any memory object.  */
6468
6469 static bool
6470 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6471                   struct iv_group *group, bool originalp)
6472 {
6473   comp_cost best_cost, act_cost;
6474   unsigned i;
6475   bitmap_iterator bi;
6476   struct iv_cand *cand;
6477   struct iv_ca_delta *best_delta = NULL, *act_delta;
6478   struct cost_pair *cp;
6479
6480   iv_ca_add_group (data, ivs, group);
6481   best_cost = iv_ca_cost (ivs);
6482   cp = iv_ca_cand_for_group (ivs, group);
6483   if (cp)
6484     {
6485       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6486       iv_ca_set_no_cp (data, ivs, group);
6487     }
6488
6489   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6490      first try important candidates not based on any memory object.  Only if
6491      this fails, try the specific ones.  Rationale -- in loops with many
6492      variables the best choice often is to use just one generic biv.  If we
6493      added here many ivs specific to the uses, the optimization algorithm later
6494      would be likely to get stuck in a local minimum, thus causing us to create
6495      too many ivs.  The approach from few ivs to more seems more likely to be
6496      successful -- starting from few ivs, replacing an expensive use by a
6497      specific iv should always be a win.  */
6498   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6499     {
6500       cand = data->vcands[i];
6501
6502       if (originalp && cand->pos !=IP_ORIGINAL)
6503         continue;
6504
6505       if (!originalp && cand->iv->base_object != NULL_TREE)
6506         continue;
6507
6508       if (iv_ca_cand_used_p (ivs, cand))
6509         continue;
6510
6511       cp = get_group_iv_cost (data, group, cand);
6512       if (!cp)
6513         continue;
6514
6515       iv_ca_set_cp (data, ivs, group, cp);
6516       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6517                                true);
6518       iv_ca_set_no_cp (data, ivs, group);
6519       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6520
6521       if (act_cost < best_cost)
6522         {
6523           best_cost = act_cost;
6524
6525           iv_ca_delta_free (&best_delta);
6526           best_delta = act_delta;
6527         }
6528       else
6529         iv_ca_delta_free (&act_delta);
6530     }
6531
6532   if (best_cost.infinite_cost_p ())
6533     {
6534       for (i = 0; i < group->n_map_members; i++)
6535         {
6536           cp = group->cost_map + i;
6537           cand = cp->cand;
6538           if (!cand)
6539             continue;
6540
6541           /* Already tried this.  */
6542           if (cand->important)
6543             {
6544               if (originalp && cand->pos == IP_ORIGINAL)
6545                 continue;
6546               if (!originalp && cand->iv->base_object == NULL_TREE)
6547                 continue;
6548             }
6549
6550           if (iv_ca_cand_used_p (ivs, cand))
6551             continue;
6552
6553           act_delta = NULL;
6554           iv_ca_set_cp (data, ivs, group, cp);
6555           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6556           iv_ca_set_no_cp (data, ivs, group);
6557           act_delta = iv_ca_delta_add (group,
6558                                        iv_ca_cand_for_group (ivs, group),
6559                                        cp, act_delta);
6560
6561           if (act_cost < best_cost)
6562             {
6563               best_cost = act_cost;
6564
6565               if (best_delta)
6566                 iv_ca_delta_free (&best_delta);
6567               best_delta = act_delta;
6568             }
6569           else
6570             iv_ca_delta_free (&act_delta);
6571         }
6572     }
6573
6574   iv_ca_delta_commit (data, ivs, best_delta, true);
6575   iv_ca_delta_free (&best_delta);
6576
6577   return !best_cost.infinite_cost_p ();
6578 }
6579
6580 /* Finds an initial assignment of candidates to uses.  */
6581
6582 static struct iv_ca *
6583 get_initial_solution (struct ivopts_data *data, bool originalp)
6584 {
6585   unsigned i;
6586   struct iv_ca *ivs = iv_ca_new (data);
6587
6588   for (i = 0; i < data->vgroups.length (); i++)
6589     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6590       {
6591         iv_ca_free (&ivs);
6592         return NULL;
6593       }
6594
6595   return ivs;
6596 }
6597
6598 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6599    points to a bool variable, this function tries to break local
6600    optimal fixed-point by replacing candidates in IVS if it's true.  */
6601
6602 static bool
6603 try_improve_iv_set (struct ivopts_data *data,
6604                     struct iv_ca *ivs, bool *try_replace_p)
6605 {
6606   unsigned i, n_ivs;
6607   comp_cost acost, best_cost = iv_ca_cost (ivs);
6608   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6609   struct iv_cand *cand;
6610
6611   /* Try extending the set of induction variables by one.  */
6612   for (i = 0; i < data->vcands.length (); i++)
6613     {
6614       cand = data->vcands[i];
6615
6616       if (iv_ca_cand_used_p (ivs, cand))
6617         continue;
6618
6619       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6620       if (!act_delta)
6621         continue;
6622
6623       /* If we successfully added the candidate and the set is small enough,
6624          try optimizing it by removing other candidates.  */
6625       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6626         {
6627           iv_ca_delta_commit (data, ivs, act_delta, true);
6628           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6629           iv_ca_delta_commit (data, ivs, act_delta, false);
6630           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6631         }
6632
6633       if (acost < best_cost)
6634         {
6635           best_cost = acost;
6636           iv_ca_delta_free (&best_delta);
6637           best_delta = act_delta;
6638         }
6639       else
6640         iv_ca_delta_free (&act_delta);
6641     }
6642
6643   if (!best_delta)
6644     {
6645       /* Try removing the candidates from the set instead.  */
6646       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6647
6648       if (!best_delta && *try_replace_p)
6649         {
6650           *try_replace_p = false;
6651           /* So far candidate selecting algorithm tends to choose fewer IVs
6652              so that it can handle cases in which loops have many variables
6653              but the best choice is often to use only one general biv.  One
6654              weakness is it can't handle opposite cases, in which different
6655              candidates should be chosen with respect to each use.  To solve
6656              the problem, we replace candidates in a manner described by the
6657              comments of iv_ca_replace, thus give general algorithm a chance
6658              to break local optimal fixed-point in these cases.  */
6659           best_cost = iv_ca_replace (data, ivs, &best_delta);
6660         }
6661
6662       if (!best_delta)
6663         return false;
6664     }
6665
6666   iv_ca_delta_commit (data, ivs, best_delta, true);
6667   gcc_assert (best_cost == iv_ca_cost (ivs));
6668   iv_ca_delta_free (&best_delta);
6669   return true;
6670 }
6671
6672 /* Attempts to find the optimal set of induction variables.  We do simple
6673    greedy heuristic -- we try to replace at most one candidate in the selected
6674    solution and remove the unused ivs while this improves the cost.  */
6675
6676 static struct iv_ca *
6677 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6678 {
6679   struct iv_ca *set;
6680   bool try_replace_p = true;
6681
6682   /* Get the initial solution.  */
6683   set = get_initial_solution (data, originalp);
6684   if (!set)
6685     {
6686       if (dump_file && (dump_flags & TDF_DETAILS))
6687         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6688       return NULL;
6689     }
6690
6691   if (dump_file && (dump_flags & TDF_DETAILS))
6692     {
6693       fprintf (dump_file, "Initial set of candidates:\n");
6694       iv_ca_dump (data, dump_file, set);
6695     }
6696
6697   while (try_improve_iv_set (data, set, &try_replace_p))
6698     {
6699       if (dump_file && (dump_flags & TDF_DETAILS))
6700         {
6701           fprintf (dump_file, "Improved to:\n");
6702           iv_ca_dump (data, dump_file, set);
6703         }
6704     }
6705
6706   return set;
6707 }
6708
6709 static struct iv_ca *
6710 find_optimal_iv_set (struct ivopts_data *data)
6711 {
6712   unsigned i;
6713   comp_cost cost, origcost;
6714   struct iv_ca *set, *origset;
6715
6716   /* Determine the cost based on a strategy that starts with original IVs,
6717      and try again using a strategy that prefers candidates not based
6718      on any IVs.  */
6719   origset = find_optimal_iv_set_1 (data, true);
6720   set = find_optimal_iv_set_1 (data, false);
6721
6722   if (!origset && !set)
6723     return NULL;
6724
6725   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6726   cost = set ? iv_ca_cost (set) : infinite_cost;
6727
6728   if (dump_file && (dump_flags & TDF_DETAILS))
6729     {
6730       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6731                origcost.cost, origcost.complexity);
6732       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6733                cost.cost, cost.complexity);
6734     }
6735
6736   /* Choose the one with the best cost.  */
6737   if (origcost <= cost)
6738     {
6739       if (set)
6740         iv_ca_free (&set);
6741       set = origset;
6742     }
6743   else if (origset)
6744     iv_ca_free (&origset);
6745
6746   for (i = 0; i < data->vgroups.length (); i++)
6747     {
6748       struct iv_group *group = data->vgroups[i];
6749       group->selected = iv_ca_cand_for_group (set, group)->cand;
6750     }
6751
6752   return set;
6753 }
6754
6755 /* Creates a new induction variable corresponding to CAND.  */
6756
6757 static void
6758 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6759 {
6760   gimple_stmt_iterator incr_pos;
6761   tree base;
6762   struct iv_use *use;
6763   struct iv_group *group;
6764   bool after = false;
6765
6766   gcc_assert (cand->iv != NULL);
6767
6768   switch (cand->pos)
6769     {
6770     case IP_NORMAL:
6771       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6772       break;
6773
6774     case IP_END:
6775       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6776       after = true;
6777       break;
6778
6779     case IP_AFTER_USE:
6780       after = true;
6781       /* fall through */
6782     case IP_BEFORE_USE:
6783       incr_pos = gsi_for_stmt (cand->incremented_at);
6784       break;
6785
6786     case IP_ORIGINAL:
6787       /* Mark that the iv is preserved.  */
6788       name_info (data, cand->var_before)->preserve_biv = true;
6789       name_info (data, cand->var_after)->preserve_biv = true;
6790
6791       /* Rewrite the increment so that it uses var_before directly.  */
6792       use = find_interesting_uses_op (data, cand->var_after);
6793       group = data->vgroups[use->group_id];
6794       group->selected = cand;
6795       return;
6796     }
6797
6798   gimple_add_tmp_var (cand->var_before);
6799
6800   base = unshare_expr (cand->iv->base);
6801
6802   create_iv (base, unshare_expr (cand->iv->step),
6803              cand->var_before, data->current_loop,
6804              &incr_pos, after, &cand->var_before, &cand->var_after);
6805 }
6806
6807 /* Creates new induction variables described in SET.  */
6808
6809 static void
6810 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6811 {
6812   unsigned i;
6813   struct iv_cand *cand;
6814   bitmap_iterator bi;
6815
6816   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6817     {
6818       cand = data->vcands[i];
6819       create_new_iv (data, cand);
6820     }
6821
6822   if (dump_file && (dump_flags & TDF_DETAILS))
6823     {
6824       fprintf (dump_file, "Selected IV set for loop %d",
6825                data->current_loop->num);
6826       if (data->loop_loc != UNKNOWN_LOCATION)
6827         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6828                  LOCATION_LINE (data->loop_loc));
6829       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6830                avg_loop_niter (data->current_loop));
6831       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6832       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6833         {
6834           cand = data->vcands[i];
6835           dump_cand (dump_file, cand);
6836         }
6837       fprintf (dump_file, "\n");
6838     }
6839 }
6840
6841 /* Rewrites USE (definition of iv used in a nonlinear expression)
6842    using candidate CAND.  */
6843
6844 static void
6845 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6846                             struct iv_use *use, struct iv_cand *cand)
6847 {
6848   gassign *ass;
6849   gimple_stmt_iterator bsi;
6850   tree comp, type = get_use_type (use), tgt;
6851
6852   /* An important special case -- if we are asked to express value of
6853      the original iv by itself, just exit; there is no need to
6854      introduce a new computation (that might also need casting the
6855      variable to unsigned and back).  */
6856   if (cand->pos == IP_ORIGINAL
6857       && cand->incremented_at == use->stmt)
6858     {
6859       tree op = NULL_TREE;
6860       enum tree_code stmt_code;
6861
6862       gcc_assert (is_gimple_assign (use->stmt));
6863       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6864
6865       /* Check whether we may leave the computation unchanged.
6866          This is the case only if it does not rely on other
6867          computations in the loop -- otherwise, the computation
6868          we rely upon may be removed in remove_unused_ivs,
6869          thus leading to ICE.  */
6870       stmt_code = gimple_assign_rhs_code (use->stmt);
6871       if (stmt_code == PLUS_EXPR
6872           || stmt_code == MINUS_EXPR
6873           || stmt_code == POINTER_PLUS_EXPR)
6874         {
6875           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6876             op = gimple_assign_rhs2 (use->stmt);
6877           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6878             op = gimple_assign_rhs1 (use->stmt);
6879         }
6880
6881       if (op != NULL_TREE)
6882         {
6883           if (expr_invariant_in_loop_p (data->current_loop, op))
6884             return;
6885           if (TREE_CODE (op) == SSA_NAME)
6886             {
6887               struct iv *iv = get_iv (data, op);
6888               if (iv != NULL && integer_zerop (iv->step))
6889                 return;
6890             }
6891         }
6892     }
6893
6894   switch (gimple_code (use->stmt))
6895     {
6896     case GIMPLE_PHI:
6897       tgt = PHI_RESULT (use->stmt);
6898
6899       /* If we should keep the biv, do not replace it.  */
6900       if (name_info (data, tgt)->preserve_biv)
6901         return;
6902
6903       bsi = gsi_after_labels (gimple_bb (use->stmt));
6904       break;
6905
6906     case GIMPLE_ASSIGN:
6907       tgt = gimple_assign_lhs (use->stmt);
6908       bsi = gsi_for_stmt (use->stmt);
6909       break;
6910
6911     default:
6912       gcc_unreachable ();
6913     }
6914
6915   aff_tree aff_inv, aff_var;
6916   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6917                               use, cand, &aff_inv, &aff_var))
6918     gcc_unreachable ();
6919
6920   unshare_aff_combination (&aff_inv);
6921   unshare_aff_combination (&aff_var);
6922   /* Prefer CSE opportunity than loop invariant by adding offset at last
6923      so that iv_uses have different offsets can be CSEed.  */
6924   poly_widest_int offset = aff_inv.offset;
6925   aff_inv.offset = 0;
6926
6927   gimple_seq stmt_list = NULL, seq = NULL;
6928   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6929   tree comp_op2 = aff_combination_to_tree (&aff_var);
6930   gcc_assert (comp_op1 && comp_op2);
6931
6932   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6933   gimple_seq_add_seq (&stmt_list, seq);
6934   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6935   gimple_seq_add_seq (&stmt_list, seq);
6936
6937   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6938     std::swap (comp_op1, comp_op2);
6939
6940   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6941     {
6942       comp = fold_build_pointer_plus (comp_op1,
6943                                       fold_convert (sizetype, comp_op2));
6944       comp = fold_build_pointer_plus (comp,
6945                                       wide_int_to_tree (sizetype, offset));
6946     }
6947   else
6948     {
6949       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6950                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6951       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6952                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6953     }
6954
6955   comp = fold_convert (type, comp);
6956   if (!valid_gimple_rhs_p (comp)
6957       || (gimple_code (use->stmt) != GIMPLE_PHI
6958           /* We can't allow re-allocating the stmt as it might be pointed
6959              to still.  */
6960           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6961               >= gimple_num_ops (gsi_stmt (bsi)))))
6962     {
6963       comp = force_gimple_operand (comp, &seq, true, NULL);
6964       gimple_seq_add_seq (&stmt_list, seq);
6965       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6966         {
6967           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6968           /* As this isn't a plain copy we have to reset alignment
6969              information.  */
6970           if (SSA_NAME_PTR_INFO (comp))
6971             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6972         }
6973     }
6974
6975   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6976   if (gimple_code (use->stmt) == GIMPLE_PHI)
6977     {
6978       ass = gimple_build_assign (tgt, comp);
6979       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6980
6981       bsi = gsi_for_stmt (use->stmt);
6982       remove_phi_node (&bsi, false);
6983     }
6984   else
6985     {
6986       gimple_assign_set_rhs_from_tree (&bsi, comp);
6987       use->stmt = gsi_stmt (bsi);
6988     }
6989 }
6990
6991 /* Performs a peephole optimization to reorder the iv update statement with
6992    a mem ref to enable instruction combining in later phases. The mem ref uses
6993    the iv value before the update, so the reordering transformation requires
6994    adjustment of the offset. CAND is the selected IV_CAND.
6995
6996    Example:
6997
6998    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6999    iv2 = iv1 + 1;
7000
7001    if (t < val)      (1)
7002      goto L;
7003    goto Head;
7004
7005
7006    directly propagating t over to (1) will introduce overlapping live range
7007    thus increase register pressure. This peephole transform it into:
7008
7009
7010    iv2 = iv1 + 1;
7011    t = MEM_REF (base, iv2, 8, 8);
7012    if (t < val)
7013      goto L;
7014    goto Head;
7015 */
7016
7017 static void
7018 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7019 {
7020   tree var_after;
7021   gimple *iv_update, *stmt;
7022   basic_block bb;
7023   gimple_stmt_iterator gsi, gsi_iv;
7024
7025   if (cand->pos != IP_NORMAL)
7026     return;
7027
7028   var_after = cand->var_after;
7029   iv_update = SSA_NAME_DEF_STMT (var_after);
7030
7031   bb = gimple_bb (iv_update);
7032   gsi = gsi_last_nondebug_bb (bb);
7033   stmt = gsi_stmt (gsi);
7034
7035   /* Only handle conditional statement for now.  */
7036   if (gimple_code (stmt) != GIMPLE_COND)
7037     return;
7038
7039   gsi_prev_nondebug (&gsi);
7040   stmt = gsi_stmt (gsi);
7041   if (stmt != iv_update)
7042     return;
7043
7044   gsi_prev_nondebug (&gsi);
7045   if (gsi_end_p (gsi))
7046     return;
7047
7048   stmt = gsi_stmt (gsi);
7049   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7050     return;
7051
7052   if (stmt != use->stmt)
7053     return;
7054
7055   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7056     return;
7057
7058   if (dump_file && (dump_flags & TDF_DETAILS))
7059     {
7060       fprintf (dump_file, "Reordering \n");
7061       print_gimple_stmt (dump_file, iv_update, 0);
7062       print_gimple_stmt (dump_file, use->stmt, 0);
7063       fprintf (dump_file, "\n");
7064     }
7065
7066   gsi = gsi_for_stmt (use->stmt);
7067   gsi_iv = gsi_for_stmt (iv_update);
7068   gsi_move_before (&gsi_iv, &gsi);
7069
7070   cand->pos = IP_BEFORE_USE;
7071   cand->incremented_at = use->stmt;
7072 }
7073
7074 /* Return the alias pointer type that should be used for a MEM_REF
7075    associated with USE, which has type USE_PTR_ADDRESS.  */
7076
7077 static tree
7078 get_alias_ptr_type_for_ptr_address (iv_use *use)
7079 {
7080   gcall *call = as_a <gcall *> (use->stmt);
7081   switch (gimple_call_internal_fn (call))
7082     {
7083     case IFN_MASK_LOAD:
7084     case IFN_MASK_STORE:
7085       /* The second argument contains the correct alias type.  */
7086       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7087       return TREE_TYPE (gimple_call_arg (call, 1));
7088
7089     default:
7090       gcc_unreachable ();
7091     }
7092 }
7093
7094
7095 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7096
7097 static void
7098 rewrite_use_address (struct ivopts_data *data,
7099                      struct iv_use *use, struct iv_cand *cand)
7100 {
7101   aff_tree aff;
7102   bool ok;
7103
7104   adjust_iv_update_pos (cand, use);
7105   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7106   gcc_assert (ok);
7107   unshare_aff_combination (&aff);
7108
7109   /* To avoid undefined overflow problems, all IV candidates use unsigned
7110      integer types.  The drawback is that this makes it impossible for
7111      create_mem_ref to distinguish an IV that is based on a memory object
7112      from one that represents simply an offset.
7113
7114      To work around this problem, we pass a hint to create_mem_ref that
7115      indicates which variable (if any) in aff is an IV based on a memory
7116      object.  Note that we only consider the candidate.  If this is not
7117      based on an object, the base of the reference is in some subexpression
7118      of the use -- but these will use pointer types, so they are recognized
7119      by the create_mem_ref heuristics anyway.  */
7120   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7121   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7122   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7123   tree type = use->mem_type;
7124   tree alias_ptr_type;
7125   if (use->type == USE_PTR_ADDRESS)
7126     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7127   else
7128     {
7129       gcc_assert (type == TREE_TYPE (*use->op_p));
7130       unsigned int align = get_object_alignment (*use->op_p);
7131       if (align != TYPE_ALIGN (type))
7132         type = build_aligned_type (type, align);
7133       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7134     }
7135   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7136                              iv, base_hint, data->speed);
7137
7138   if (use->type == USE_PTR_ADDRESS)
7139     {
7140       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7141       ref = fold_convert (get_use_type (use), ref);
7142       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7143                                       true, GSI_SAME_STMT);
7144     }
7145   else
7146     copy_ref_info (ref, *use->op_p);
7147
7148   *use->op_p = ref;
7149 }
7150
7151 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7152    candidate CAND.  */
7153
7154 static void
7155 rewrite_use_compare (struct ivopts_data *data,
7156                      struct iv_use *use, struct iv_cand *cand)
7157 {
7158   tree comp, op, bound;
7159   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7160   enum tree_code compare;
7161   struct iv_group *group = data->vgroups[use->group_id];
7162   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7163
7164   bound = cp->value;
7165   if (bound)
7166     {
7167       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7168       tree var_type = TREE_TYPE (var);
7169       gimple_seq stmts;
7170
7171       if (dump_file && (dump_flags & TDF_DETAILS))
7172         {
7173           fprintf (dump_file, "Replacing exit test: ");
7174           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7175         }
7176       compare = cp->comp;
7177       bound = unshare_expr (fold_convert (var_type, bound));
7178       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7179       if (stmts)
7180         gsi_insert_seq_on_edge_immediate (
7181                 loop_preheader_edge (data->current_loop),
7182                 stmts);
7183
7184       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7185       gimple_cond_set_lhs (cond_stmt, var);
7186       gimple_cond_set_code (cond_stmt, compare);
7187       gimple_cond_set_rhs (cond_stmt, op);
7188       return;
7189     }
7190
7191   /* The induction variable elimination failed; just express the original
7192      giv.  */
7193   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7194   gcc_assert (comp != NULL_TREE);
7195   gcc_assert (use->op_p != NULL);
7196   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7197                                          SSA_NAME_VAR (*use->op_p),
7198                                          true, GSI_SAME_STMT);
7199 }
7200
7201 /* Rewrite the groups using the selected induction variables.  */
7202
7203 static void
7204 rewrite_groups (struct ivopts_data *data)
7205 {
7206   unsigned i, j;
7207
7208   for (i = 0; i < data->vgroups.length (); i++)
7209     {
7210       struct iv_group *group = data->vgroups[i];
7211       struct iv_cand *cand = group->selected;
7212
7213       gcc_assert (cand);
7214
7215       if (group->type == USE_NONLINEAR_EXPR)
7216         {
7217           for (j = 0; j < group->vuses.length (); j++)
7218             {
7219               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7220               update_stmt (group->vuses[j]->stmt);
7221             }
7222         }
7223       else if (address_p (group->type))
7224         {
7225           for (j = 0; j < group->vuses.length (); j++)
7226             {
7227               rewrite_use_address (data, group->vuses[j], cand);
7228               update_stmt (group->vuses[j]->stmt);
7229             }
7230         }
7231       else
7232         {
7233           gcc_assert (group->type == USE_COMPARE);
7234
7235           for (j = 0; j < group->vuses.length (); j++)
7236             {
7237               rewrite_use_compare (data, group->vuses[j], cand);
7238               update_stmt (group->vuses[j]->stmt);
7239             }
7240         }
7241     }
7242 }
7243
7244 /* Removes the ivs that are not used after rewriting.  */
7245
7246 static void
7247 remove_unused_ivs (struct ivopts_data *data)
7248 {
7249   unsigned j;
7250   bitmap_iterator bi;
7251   bitmap toremove = BITMAP_ALLOC (NULL);
7252
7253   /* Figure out an order in which to release SSA DEFs so that we don't
7254      release something that we'd have to propagate into a debug stmt
7255      afterwards.  */
7256   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7257     {
7258       struct version_info *info;
7259
7260       info = ver_info (data, j);
7261       if (info->iv
7262           && !integer_zerop (info->iv->step)
7263           && !info->inv_id
7264           && !info->iv->nonlin_use
7265           && !info->preserve_biv)
7266         {
7267           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7268
7269           tree def = info->iv->ssa_name;
7270
7271           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7272             {
7273               imm_use_iterator imm_iter;
7274               use_operand_p use_p;
7275               gimple *stmt;
7276               int count = 0;
7277
7278               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7279                 {
7280                   if (!gimple_debug_bind_p (stmt))
7281                     continue;
7282
7283                   /* We just want to determine whether to do nothing
7284                      (count == 0), to substitute the computed
7285                      expression into a single use of the SSA DEF by
7286                      itself (count == 1), or to use a debug temp
7287                      because the SSA DEF is used multiple times or as
7288                      part of a larger expression (count > 1). */
7289                   count++;
7290                   if (gimple_debug_bind_get_value (stmt) != def)
7291                     count++;
7292
7293                   if (count > 1)
7294                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7295                 }
7296
7297               if (!count)
7298                 continue;
7299
7300               struct iv_use dummy_use;
7301               struct iv_cand *best_cand = NULL, *cand;
7302               unsigned i, best_pref = 0, cand_pref;
7303
7304               memset (&dummy_use, 0, sizeof (dummy_use));
7305               dummy_use.iv = info->iv;
7306               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7307                 {
7308                   cand = data->vgroups[i]->selected;
7309                   if (cand == best_cand)
7310                     continue;
7311                   cand_pref = operand_equal_p (cand->iv->step,
7312                                                info->iv->step, 0)
7313                     ? 4 : 0;
7314                   cand_pref
7315                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7316                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7317                     ? 2 : 0;
7318                   cand_pref
7319                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7320                     ? 1 : 0;
7321                   if (best_cand == NULL || best_pref < cand_pref)
7322                     {
7323                       best_cand = cand;
7324                       best_pref = cand_pref;
7325                     }
7326                 }
7327
7328               if (!best_cand)
7329                 continue;
7330
7331               tree comp = get_computation_at (data->current_loop,
7332                                               SSA_NAME_DEF_STMT (def),
7333                                               &dummy_use, best_cand);
7334               if (!comp)
7335                 continue;
7336
7337               if (count > 1)
7338                 {
7339                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7340                   DECL_ARTIFICIAL (vexpr) = 1;
7341                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7342                   if (SSA_NAME_VAR (def))
7343                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7344                   else
7345                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7346                   gdebug *def_temp
7347                     = gimple_build_debug_bind (vexpr, comp, NULL);
7348                   gimple_stmt_iterator gsi;
7349
7350                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7351                     gsi = gsi_after_labels (gimple_bb
7352                                             (SSA_NAME_DEF_STMT (def)));
7353                   else
7354                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7355
7356                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7357                   comp = vexpr;
7358                 }
7359
7360               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7361                 {
7362                   if (!gimple_debug_bind_p (stmt))
7363                     continue;
7364
7365                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7366                     SET_USE (use_p, comp);
7367
7368                   update_stmt (stmt);
7369                 }
7370             }
7371         }
7372     }
7373
7374   release_defs_bitset (toremove);
7375
7376   BITMAP_FREE (toremove);
7377 }
7378
7379 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7380    for hash_map::traverse.  */
7381
7382 bool
7383 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7384 {
7385   free (value);
7386   return true;
7387 }
7388
7389 /* Frees data allocated by the optimization of a single loop.  */
7390
7391 static void
7392 free_loop_data (struct ivopts_data *data)
7393 {
7394   unsigned i, j;
7395   bitmap_iterator bi;
7396   tree obj;
7397
7398   if (data->niters)
7399     {
7400       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7401       delete data->niters;
7402       data->niters = NULL;
7403     }
7404
7405   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7406     {
7407       struct version_info *info;
7408
7409       info = ver_info (data, i);
7410       info->iv = NULL;
7411       info->has_nonlin_use = false;
7412       info->preserve_biv = false;
7413       info->inv_id = 0;
7414     }
7415   bitmap_clear (data->relevant);
7416   bitmap_clear (data->important_candidates);
7417
7418   for (i = 0; i < data->vgroups.length (); i++)
7419     {
7420       struct iv_group *group = data->vgroups[i];
7421
7422       for (j = 0; j < group->vuses.length (); j++)
7423         free (group->vuses[j]);
7424       group->vuses.release ();
7425
7426       BITMAP_FREE (group->related_cands);
7427       for (j = 0; j < group->n_map_members; j++)
7428         {
7429           if (group->cost_map[j].inv_vars)
7430             BITMAP_FREE (group->cost_map[j].inv_vars);
7431           if (group->cost_map[j].inv_exprs)
7432             BITMAP_FREE (group->cost_map[j].inv_exprs);
7433         }
7434
7435       free (group->cost_map);
7436       free (group);
7437     }
7438   data->vgroups.truncate (0);
7439
7440   for (i = 0; i < data->vcands.length (); i++)
7441     {
7442       struct iv_cand *cand = data->vcands[i];
7443
7444       if (cand->inv_vars)
7445         BITMAP_FREE (cand->inv_vars);
7446       if (cand->inv_exprs)
7447         BITMAP_FREE (cand->inv_exprs);
7448       free (cand);
7449     }
7450   data->vcands.truncate (0);
7451
7452   if (data->version_info_size < num_ssa_names)
7453     {
7454       data->version_info_size = 2 * num_ssa_names;
7455       free (data->version_info);
7456       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7457     }
7458
7459   data->max_inv_var_id = 0;
7460   data->max_inv_expr_id = 0;
7461
7462   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7463     SET_DECL_RTL (obj, NULL_RTX);
7464
7465   decl_rtl_to_reset.truncate (0);
7466
7467   data->inv_expr_tab->empty ();
7468
7469   data->iv_common_cand_tab->empty ();
7470   data->iv_common_cands.truncate (0);
7471 }
7472
7473 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7474    loop tree.  */
7475
7476 static void
7477 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7478 {
7479   free_loop_data (data);
7480   free (data->version_info);
7481   BITMAP_FREE (data->relevant);
7482   BITMAP_FREE (data->important_candidates);
7483
7484   decl_rtl_to_reset.release ();
7485   data->vgroups.release ();
7486   data->vcands.release ();
7487   delete data->inv_expr_tab;
7488   data->inv_expr_tab = NULL;
7489   free_affine_expand_cache (&data->name_expansion_cache);
7490   delete data->iv_common_cand_tab;
7491   data->iv_common_cand_tab = NULL;
7492   data->iv_common_cands.release ();
7493   obstack_free (&data->iv_obstack, NULL);
7494 }
7495
7496 /* Returns true if the loop body BODY includes any function calls.  */
7497
7498 static bool
7499 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7500 {
7501   gimple_stmt_iterator gsi;
7502   unsigned i;
7503
7504   for (i = 0; i < num_nodes; i++)
7505     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7506       {
7507         gimple *stmt = gsi_stmt (gsi);
7508         if (is_gimple_call (stmt)
7509             && !gimple_call_internal_p (stmt)
7510             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7511           return true;
7512       }
7513   return false;
7514 }
7515
7516 /* Optimizes the LOOP.  Returns true if anything changed.  */
7517
7518 static bool
7519 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7520 {
7521   bool changed = false;
7522   struct iv_ca *iv_ca;
7523   edge exit = single_dom_exit (loop);
7524   basic_block *body;
7525
7526   gcc_assert (!data->niters);
7527   data->current_loop = loop;
7528   data->loop_loc = find_loop_location (loop);
7529   data->speed = optimize_loop_for_speed_p (loop);
7530
7531   if (dump_file && (dump_flags & TDF_DETAILS))
7532     {
7533       fprintf (dump_file, "Processing loop %d", loop->num);
7534       if (data->loop_loc != UNKNOWN_LOCATION)
7535         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7536                  LOCATION_LINE (data->loop_loc));
7537       fprintf (dump_file, "\n");
7538
7539       if (exit)
7540         {
7541           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7542                    exit->src->index, exit->dest->index);
7543           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7544           fprintf (dump_file, "\n");
7545         }
7546
7547       fprintf (dump_file, "\n");
7548     }
7549
7550   body = get_loop_body (loop);
7551   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7552   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7553   free (body);
7554
7555   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7556
7557   /* For each ssa name determines whether it behaves as an induction variable
7558      in some loop.  */
7559   if (!find_induction_variables (data))
7560     goto finish;
7561
7562   /* Finds interesting uses (item 1).  */
7563   find_interesting_uses (data);
7564   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7565     goto finish;
7566
7567   /* Finds candidates for the induction variables (item 2).  */
7568   find_iv_candidates (data);
7569
7570   /* Calculates the costs (item 3, part 1).  */
7571   determine_iv_costs (data);
7572   determine_group_iv_costs (data);
7573   determine_set_costs (data);
7574
7575   /* Find the optimal set of induction variables (item 3, part 2).  */
7576   iv_ca = find_optimal_iv_set (data);
7577   if (!iv_ca)
7578     goto finish;
7579   changed = true;
7580
7581   /* Create the new induction variables (item 4, part 1).  */
7582   create_new_ivs (data, iv_ca);
7583   iv_ca_free (&iv_ca);
7584
7585   /* Rewrite the uses (item 4, part 2).  */
7586   rewrite_groups (data);
7587
7588   /* Remove the ivs that are unused after rewriting.  */
7589   remove_unused_ivs (data);
7590
7591   /* We have changed the structure of induction variables; it might happen
7592      that definitions in the scev database refer to some of them that were
7593      eliminated.  */
7594   scev_reset ();
7595
7596 finish:
7597   free_loop_data (data);
7598
7599   return changed;
7600 }
7601
7602 /* Main entry point.  Optimizes induction variables in loops.  */
7603
7604 void
7605 tree_ssa_iv_optimize (void)
7606 {
7607   struct loop *loop;
7608   struct ivopts_data data;
7609
7610   tree_ssa_iv_optimize_init (&data);
7611
7612   /* Optimize the loops starting with the innermost ones.  */
7613   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7614     {
7615       if (dump_file && (dump_flags & TDF_DETAILS))
7616         flow_loop_dump (loop, dump_file, NULL, 1);
7617
7618       tree_ssa_iv_optimize_loop (&data, loop);
7619     }
7620
7621   tree_ssa_iv_optimize_finalize (&data);
7622 }
7623
7624 #include "gt-tree-ssa-loop-ivopts.h"