Update gcc-50 to SVN version 221572
[dragonfly.git] / contrib / gcc-5.0 / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "tree-eh.h"
50 #include "gimple-expr.h"
51 #include "is-a.h"
52 #include "gimple.h"
53 #include "gimplify.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
57 #include "tree-cfg.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
63 #include "cfgloop.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
66 #include "hashtab.h"
67 #include "rtl.h"
68 #include "flags.h"
69 #include "statistics.h"
70 #include "real.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
73 #include "expmed.h"
74 #include "dojump.h"
75 #include "explow.h"
76 #include "calls.h"
77 #include "emit-rtl.h"
78 #include "varasm.h"
79 #include "stmt.h"
80 #include "expr.h"
81 #include "recog.h"              /* FIXME: for insn_data */
82 #include "insn-codes.h"
83 #include "optabs.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
86 #include "hash-map.h"
87 #include "plugin-api.h"
88 #include "ipa-ref.h"
89 #include "cgraph.h"
90 #include "builtins.h"
91
92 /* For lang_hooks.types.type_for_mode.  */
93 #include "langhooks.h"
94
95 /* Return the vectorized type for the given statement.  */
96
97 tree
98 stmt_vectype (struct _stmt_vec_info *stmt_info)
99 {
100   return STMT_VINFO_VECTYPE (stmt_info);
101 }
102
103 /* Return TRUE iff the given statement is in an inner loop relative to
104    the loop being vectorized.  */
105 bool
106 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
107 {
108   gimple stmt = STMT_VINFO_STMT (stmt_info);
109   basic_block bb = gimple_bb (stmt);
110   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111   struct loop* loop;
112
113   if (!loop_vinfo)
114     return false;
115
116   loop = LOOP_VINFO_LOOP (loop_vinfo);
117
118   return (bb->loop_father == loop->inner);
119 }
120
121 /* Record the cost of a statement, either by directly informing the 
122    target model or by saving it in a vector for later processing.
123    Return a preliminary estimate of the statement's cost.  */
124
125 unsigned
126 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
127                   enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
128                   int misalign, enum vect_cost_model_location where)
129 {
130   if (body_cost_vec)
131     {
132       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133       add_stmt_info_to_vec (body_cost_vec, count, kind,
134                             stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135                             misalign);
136       return (unsigned)
137         (builtin_vectorization_cost (kind, vectype, misalign) * count);
138          
139     }
140   else
141     {
142       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143       bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144       void *target_cost_data;
145
146       if (loop_vinfo)
147         target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148       else
149         target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
150
151       return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152                             misalign, where);
153     }
154 }
155
156 /* Return a variable of type ELEM_TYPE[NELEMS].  */
157
158 static tree
159 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
160 {
161   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162                          "vect_array");
163 }
164
165 /* ARRAY is an array of vectors created by create_vector_array.
166    Return an SSA_NAME for the vector in index N.  The reference
167    is part of the vectorization of STMT and the vector is associated
168    with scalar destination SCALAR_DEST.  */
169
170 static tree
171 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172                    tree array, unsigned HOST_WIDE_INT n)
173 {
174   tree vect_type, vect, vect_name, array_ref;
175   gimple new_stmt;
176
177   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178   vect_type = TREE_TYPE (TREE_TYPE (array));
179   vect = vect_create_destination_var (scalar_dest, vect_type);
180   array_ref = build4 (ARRAY_REF, vect_type, array,
181                       build_int_cst (size_type_node, n),
182                       NULL_TREE, NULL_TREE);
183
184   new_stmt = gimple_build_assign (vect, array_ref);
185   vect_name = make_ssa_name (vect, new_stmt);
186   gimple_assign_set_lhs (new_stmt, vect_name);
187   vect_finish_stmt_generation (stmt, new_stmt, gsi);
188
189   return vect_name;
190 }
191
192 /* ARRAY is an array of vectors created by create_vector_array.
193    Emit code to store SSA_NAME VECT in index N of the array.
194    The store is part of the vectorization of STMT.  */
195
196 static void
197 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198                     tree array, unsigned HOST_WIDE_INT n)
199 {
200   tree array_ref;
201   gimple new_stmt;
202
203   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204                       build_int_cst (size_type_node, n),
205                       NULL_TREE, NULL_TREE);
206
207   new_stmt = gimple_build_assign (array_ref, vect);
208   vect_finish_stmt_generation (stmt, new_stmt, gsi);
209 }
210
211 /* PTR is a pointer to an array of type TYPE.  Return a representation
212    of *PTR.  The memory reference replaces those in FIRST_DR
213    (and its group).  */
214
215 static tree
216 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
217 {
218   tree mem_ref, alias_ptr_type;
219
220   alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222   /* Arrays have the same alignment as their type.  */
223   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
224   return mem_ref;
225 }
226
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
228
229 /* Function vect_mark_relevant.
230
231    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
232
233 static void
234 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
235                     enum vect_relevant relevant, bool live_p,
236                     bool used_in_pattern)
237 {
238   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241   gimple pattern_stmt;
242
243   if (dump_enabled_p ())
244     dump_printf_loc (MSG_NOTE, vect_location,
245                      "mark relevant %d, live %d.\n", relevant, live_p);
246
247   /* If this stmt is an original stmt in a pattern, we might need to mark its
248      related pattern stmt instead of the original stmt.  However, such stmts
249      may have their own uses that are not in any pattern, in such cases the
250      stmt itself should be marked.  */
251   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252     {
253       bool found = false;
254       if (!used_in_pattern)
255         {
256           imm_use_iterator imm_iter;
257           use_operand_p use_p;
258           gimple use_stmt;
259           tree lhs;
260           loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
262
263           if (is_gimple_assign (stmt))
264             lhs = gimple_assign_lhs (stmt);
265           else
266             lhs = gimple_call_lhs (stmt);
267
268           /* This use is out of pattern use, if LHS has other uses that are
269              pattern uses, we should mark the stmt itself, and not the pattern
270              stmt.  */
271           if (lhs && TREE_CODE (lhs) == SSA_NAME)
272             FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
273               {
274                 if (is_gimple_debug (USE_STMT (use_p)))
275                   continue;
276                 use_stmt = USE_STMT (use_p);
277
278                 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279                   continue;
280
281                 if (vinfo_for_stmt (use_stmt)
282                     && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
283                   {
284                     found = true;
285                     break;
286                   }
287               }
288         }
289
290       if (!found)
291         {
292           /* This is the last stmt in a sequence that was detected as a
293              pattern that can potentially be vectorized.  Don't mark the stmt
294              as relevant/live because it's not going to be vectorized.
295              Instead mark the pattern-stmt that replaces it.  */
296
297           pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
298
299           if (dump_enabled_p ())
300             dump_printf_loc (MSG_NOTE, vect_location,
301                              "last stmt in pattern. don't mark"
302                              " relevant/live.\n");
303           stmt_info = vinfo_for_stmt (pattern_stmt);
304           gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305           save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306           save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307           stmt = pattern_stmt;
308         }
309     }
310
311   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313     STMT_VINFO_RELEVANT (stmt_info) = relevant;
314
315   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
317     {
318       if (dump_enabled_p ())
319         dump_printf_loc (MSG_NOTE, vect_location,
320                          "already marked relevant/live.\n");
321       return;
322     }
323
324   worklist->safe_push (stmt);
325 }
326
327
328 /* Function vect_stmt_relevant_p.
329
330    Return true if STMT in loop that is represented by LOOP_VINFO is
331    "relevant for vectorization".
332
333    A stmt is considered "relevant for vectorization" if:
334    - it has uses outside the loop.
335    - it has vdefs (it alters memory).
336    - control stmts in the loop (except for the exit condition).
337
338    CHECKME: what other side effects would the vectorizer allow?  */
339
340 static bool
341 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342                       enum vect_relevant *relevant, bool *live_p)
343 {
344   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345   ssa_op_iter op_iter;
346   imm_use_iterator imm_iter;
347   use_operand_p use_p;
348   def_operand_p def_p;
349
350   *relevant = vect_unused_in_scope;
351   *live_p = false;
352
353   /* cond stmt other than loop exit cond.  */
354   if (is_ctrl_stmt (stmt)
355       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356          != loop_exit_ctrl_vec_info_type)
357     *relevant = vect_used_in_scope;
358
359   /* changing memory.  */
360   if (gimple_code (stmt) != GIMPLE_PHI)
361     if (gimple_vdef (stmt)
362         && !gimple_clobber_p (stmt))
363       {
364         if (dump_enabled_p ())
365           dump_printf_loc (MSG_NOTE, vect_location,
366                            "vec_stmt_relevant_p: stmt has vdefs.\n");
367         *relevant = vect_used_in_scope;
368       }
369
370   /* uses outside the loop.  */
371   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
372     {
373       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
374         {
375           basic_block bb = gimple_bb (USE_STMT (use_p));
376           if (!flow_bb_inside_loop_p (loop, bb))
377             {
378               if (dump_enabled_p ())
379                 dump_printf_loc (MSG_NOTE, vect_location,
380                                  "vec_stmt_relevant_p: used out of loop.\n");
381
382               if (is_gimple_debug (USE_STMT (use_p)))
383                 continue;
384
385               /* We expect all such uses to be in the loop exit phis
386                  (because of loop closed form)   */
387               gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388               gcc_assert (bb == single_exit (loop)->dest);
389
390               *live_p = true;
391             }
392         }
393     }
394
395   return (*live_p || *relevant);
396 }
397
398
399 /* Function exist_non_indexing_operands_for_use_p
400
401    USE is one of the uses attached to STMT.  Check if USE is
402    used in STMT for anything other than indexing an array.  */
403
404 static bool
405 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
406 {
407   tree operand;
408   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
409
410   /* USE corresponds to some operand in STMT.  If there is no data
411      reference in STMT, then any operand that corresponds to USE
412      is not indexing an array.  */
413   if (!STMT_VINFO_DATA_REF (stmt_info))
414     return true;
415
416   /* STMT has a data_ref. FORNOW this means that its of one of
417      the following forms:
418      -1- ARRAY_REF = var
419      -2- var = ARRAY_REF
420      (This should have been verified in analyze_data_refs).
421
422      'var' in the second case corresponds to a def, not a use,
423      so USE cannot correspond to any operands that are not used
424      for array indexing.
425
426      Therefore, all we need to check is if STMT falls into the
427      first case, and whether var corresponds to USE.  */
428
429   if (!gimple_assign_copy_p (stmt))
430     {
431       if (is_gimple_call (stmt)
432           && gimple_call_internal_p (stmt))
433         switch (gimple_call_internal_fn (stmt))
434           {
435           case IFN_MASK_STORE:
436             operand = gimple_call_arg (stmt, 3);
437             if (operand == use)
438               return true;
439             /* FALLTHRU */
440           case IFN_MASK_LOAD:
441             operand = gimple_call_arg (stmt, 2);
442             if (operand == use)
443               return true;
444             break;
445           default:
446             break;
447           }
448       return false;
449     }
450
451   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452     return false;
453   operand = gimple_assign_rhs1 (stmt);
454   if (TREE_CODE (operand) != SSA_NAME)
455     return false;
456
457   if (operand == use)
458     return true;
459
460   return false;
461 }
462
463
464 /*
465    Function process_use.
466
467    Inputs:
468    - a USE in STMT in a loop represented by LOOP_VINFO
469    - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470      that defined USE.  This is done by calling mark_relevant and passing it
471      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473      be performed.
474
475    Outputs:
476    Generally, LIVE_P and RELEVANT are used to define the liveness and
477    relevance info of the DEF_STMT of this USE:
478        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480    Exceptions:
481    - case 1: If USE is used only for address computations (e.g. array indexing),
482    which does not need to be directly vectorized, then the liveness/relevance
483    of the respective DEF_STMT is left unchanged.
484    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485    skip DEF_STMT cause it had already been processed.
486    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
487    be modified accordingly.
488
489    Return true if everything is as expected. Return false otherwise.  */
490
491 static bool
492 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
493              enum vect_relevant relevant, vec<gimple> *worklist,
494              bool force)
495 {
496   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498   stmt_vec_info dstmt_vinfo;
499   basic_block bb, def_bb;
500   tree def;
501   gimple def_stmt;
502   enum vect_def_type dt;
503
504   /* case 1: we are only interested in uses that need to be vectorized.  Uses
505      that are used for address computation are not considered relevant.  */
506   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
507      return true;
508
509   if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
510     {
511       if (dump_enabled_p ())
512         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513                          "not vectorized: unsupported use in stmt.\n");
514       return false;
515     }
516
517   if (!def_stmt || gimple_nop_p (def_stmt))
518     return true;
519
520   def_bb = gimple_bb (def_stmt);
521   if (!flow_bb_inside_loop_p (loop, def_bb))
522     {
523       if (dump_enabled_p ())
524         dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
525       return true;
526     }
527
528   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529      DEF_STMT must have already been processed, because this should be the
530      only way that STMT, which is a reduction-phi, was put in the worklist,
531      as there should be no other uses for DEF_STMT in the loop.  So we just
532      check that everything is as expected, and we are done.  */
533   dstmt_vinfo = vinfo_for_stmt (def_stmt);
534   bb = gimple_bb (stmt);
535   if (gimple_code (stmt) == GIMPLE_PHI
536       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537       && gimple_code (def_stmt) != GIMPLE_PHI
538       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539       && bb->loop_father == def_bb->loop_father)
540     {
541       if (dump_enabled_p ())
542         dump_printf_loc (MSG_NOTE, vect_location,
543                          "reduc-stmt defining reduc-phi in the same nest.\n");
544       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545         dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
547       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
548                   || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
549       return true;
550     }
551
552   /* case 3a: outer-loop stmt defining an inner-loop stmt:
553         outer-loop-header-bb:
554                 d = def_stmt
555         inner-loop:
556                 stmt # use (d)
557         outer-loop-tail-bb:
558                 ...               */
559   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
560     {
561       if (dump_enabled_p ())
562         dump_printf_loc (MSG_NOTE, vect_location,
563                          "outer-loop def-stmt defining inner-loop stmt.\n");
564
565       switch (relevant)
566         {
567         case vect_unused_in_scope:
568           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569                       vect_used_in_scope : vect_unused_in_scope;
570           break;
571
572         case vect_used_in_outer_by_reduction:
573           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
574           relevant = vect_used_by_reduction;
575           break;
576
577         case vect_used_in_outer:
578           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
579           relevant = vect_used_in_scope;
580           break;
581
582         case vect_used_in_scope:
583           break;
584
585         default:
586           gcc_unreachable ();
587         }
588     }
589
590   /* case 3b: inner-loop stmt defining an outer-loop stmt:
591         outer-loop-header-bb:
592                 ...
593         inner-loop:
594                 d = def_stmt
595         outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
596                 stmt # use (d)          */
597   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
598     {
599       if (dump_enabled_p ())
600         dump_printf_loc (MSG_NOTE, vect_location,
601                          "inner-loop def-stmt defining outer-loop stmt.\n");
602
603       switch (relevant)
604         {
605         case vect_unused_in_scope:
606           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
607             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
608                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
609           break;
610
611         case vect_used_by_reduction:
612           relevant = vect_used_in_outer_by_reduction;
613           break;
614
615         case vect_used_in_scope:
616           relevant = vect_used_in_outer;
617           break;
618
619         default:
620           gcc_unreachable ();
621         }
622     }
623
624   vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625                       is_pattern_stmt_p (stmt_vinfo));
626   return true;
627 }
628
629
630 /* Function vect_mark_stmts_to_be_vectorized.
631
632    Not all stmts in the loop need to be vectorized. For example:
633
634      for i...
635        for j...
636    1.    T0 = i + j
637    2.    T1 = a[T0]
638
639    3.    j = j + 1
640
641    Stmt 1 and 3 do not need to be vectorized, because loop control and
642    addressing of vectorized data-refs are handled differently.
643
644    This pass detects such stmts.  */
645
646 bool
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
648 {
649   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651   unsigned int nbbs = loop->num_nodes;
652   gimple_stmt_iterator si;
653   gimple stmt;
654   unsigned int i;
655   stmt_vec_info stmt_vinfo;
656   basic_block bb;
657   gimple phi;
658   bool live_p;
659   enum vect_relevant relevant, tmp_relevant;
660   enum vect_def_type def_type;
661
662   if (dump_enabled_p ())
663     dump_printf_loc (MSG_NOTE, vect_location,
664                      "=== vect_mark_stmts_to_be_vectorized ===\n");
665
666   auto_vec<gimple, 64> worklist;
667
668   /* 1. Init worklist.  */
669   for (i = 0; i < nbbs; i++)
670     {
671       bb = bbs[i];
672       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
673         {
674           phi = gsi_stmt (si);
675           if (dump_enabled_p ())
676             {
677               dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
679             }
680
681           if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
682             vect_mark_relevant (&worklist, phi, relevant, live_p, false);
683         }
684       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
685         {
686           stmt = gsi_stmt (si);
687           if (dump_enabled_p ())
688             {
689               dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
690               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
691             }
692
693           if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
694             vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
695         }
696     }
697
698   /* 2. Process_worklist */
699   while (worklist.length () > 0)
700     {
701       use_operand_p use_p;
702       ssa_op_iter iter;
703
704       stmt = worklist.pop ();
705       if (dump_enabled_p ())
706         {
707           dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
708           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
709         }
710
711       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712          (DEF_STMT) as relevant/irrelevant and live/dead according to the
713          liveness and relevance properties of STMT.  */
714       stmt_vinfo = vinfo_for_stmt (stmt);
715       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716       live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
717
718       /* Generally, the liveness and relevance properties of STMT are
719          propagated as is to the DEF_STMTs of its USEs:
720           live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721           relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
722
723          One exception is when STMT has been identified as defining a reduction
724          variable; in this case we set the liveness/relevance as follows:
725            live_p = false
726            relevant = vect_used_by_reduction
727          This is because we distinguish between two kinds of relevant stmts -
728          those that are used by a reduction computation, and those that are
729          (also) used by a regular computation.  This allows us later on to
730          identify stmts that are used solely by a reduction, and therefore the
731          order of the results that they produce does not have to be kept.  */
732
733       def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
734       tmp_relevant = relevant;
735       switch (def_type)
736         {
737           case vect_reduction_def:
738             switch (tmp_relevant)
739               {
740                 case vect_unused_in_scope:
741                   relevant = vect_used_by_reduction;
742                   break;
743
744                 case vect_used_by_reduction:
745                   if (gimple_code (stmt) == GIMPLE_PHI)
746                     break;
747                   /* fall through */
748
749                 default:
750                   if (dump_enabled_p ())
751                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752                                      "unsupported use of reduction.\n");
753                   return false;
754               }
755
756             live_p = false;
757             break;
758
759           case vect_nested_cycle:
760             if (tmp_relevant != vect_unused_in_scope
761                 && tmp_relevant != vect_used_in_outer_by_reduction
762                 && tmp_relevant != vect_used_in_outer)
763               {
764                 if (dump_enabled_p ())
765                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766                                    "unsupported use of nested cycle.\n");
767
768                 return false;
769               }
770
771             live_p = false;
772             break;
773
774           case vect_double_reduction_def:
775             if (tmp_relevant != vect_unused_in_scope
776                 && tmp_relevant != vect_used_by_reduction)
777               {
778                 if (dump_enabled_p ())
779                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
780                                    "unsupported use of double reduction.\n");
781
782                 return false;
783               }
784
785             live_p = false;
786             break;
787
788           default:
789             break;
790         }
791
792       if (is_pattern_stmt_p (stmt_vinfo))
793         {
794           /* Pattern statements are not inserted into the code, so
795              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796              have to scan the RHS or function arguments instead.  */
797           if (is_gimple_assign (stmt))
798             {
799               enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
800               tree op = gimple_assign_rhs1 (stmt);
801
802               i = 1;
803               if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
804                 {
805                   if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
806                                     live_p, relevant, &worklist, false)
807                       || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
808                                        live_p, relevant, &worklist, false))
809                     return false;
810                   i = 2;
811                 }
812               for (; i < gimple_num_ops (stmt); i++)
813                 {
814                   op = gimple_op (stmt, i);
815                   if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
816                                     &worklist, false))
817                     return false;
818                  }
819             }
820           else if (is_gimple_call (stmt))
821             {
822               for (i = 0; i < gimple_call_num_args (stmt); i++)
823                 {
824                   tree arg = gimple_call_arg (stmt, i);
825                   if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
826                                     &worklist, false))
827                     return false;
828                 }
829             }
830         }
831       else
832         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
833           {
834             tree op = USE_FROM_PTR (use_p);
835             if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
836                               &worklist, false))
837               return false;
838           }
839
840       if (STMT_VINFO_GATHER_P (stmt_vinfo))
841         {
842           tree off;
843           tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
844           gcc_assert (decl);
845           if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
846                             &worklist, true))
847             return false;
848         }
849     } /* while worklist */
850
851   return true;
852 }
853
854
855 /* Function vect_model_simple_cost.
856
857    Models cost for simple operations, i.e. those that only emit ncopies of a
858    single op.  Right now, this does not account for multiple insns that could
859    be generated for the single vector op.  We will handle that shortly.  */
860
861 void
862 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
863                         enum vect_def_type *dt,
864                         stmt_vector_for_cost *prologue_cost_vec,
865                         stmt_vector_for_cost *body_cost_vec)
866 {
867   int i;
868   int inside_cost = 0, prologue_cost = 0;
869
870   /* The SLP costs were already calculated during SLP tree build.  */
871   if (PURE_SLP_STMT (stmt_info))
872     return;
873
874   /* FORNOW: Assuming maximum 2 args per stmts.  */
875   for (i = 0; i < 2; i++)
876     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
877       prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
878                                          stmt_info, 0, vect_prologue);
879
880   /* Pass the inside-of-loop statements to the target-specific cost model.  */
881   inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
882                                   stmt_info, 0, vect_body);
883
884   if (dump_enabled_p ())
885     dump_printf_loc (MSG_NOTE, vect_location,
886                      "vect_model_simple_cost: inside_cost = %d, "
887                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
888 }
889
890
891 /* Model cost for type demotion and promotion operations.  PWR is normally
892    zero for single-step promotions and demotions.  It will be one if 
893    two-step promotion/demotion is required, and so on.  Each additional
894    step doubles the number of instructions required.  */
895
896 static void
897 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
898                                     enum vect_def_type *dt, int pwr)
899 {
900   int i, tmp;
901   int inside_cost = 0, prologue_cost = 0;
902   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
903   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
904   void *target_cost_data;
905
906   /* The SLP costs were already calculated during SLP tree build.  */
907   if (PURE_SLP_STMT (stmt_info))
908     return;
909
910   if (loop_vinfo)
911     target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
912   else
913     target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
914
915   for (i = 0; i < pwr + 1; i++)
916     {
917       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
918         (i + 1) : i;
919       inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
920                                     vec_promote_demote, stmt_info, 0,
921                                     vect_body);
922     }
923
924   /* FORNOW: Assuming maximum 2 args per stmts.  */
925   for (i = 0; i < 2; i++)
926     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
927       prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
928                                       stmt_info, 0, vect_prologue);
929
930   if (dump_enabled_p ())
931     dump_printf_loc (MSG_NOTE, vect_location,
932                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
933                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
934 }
935
936 /* Function vect_cost_group_size
937
938    For grouped load or store, return the group_size only if it is the first
939    load or store of a group, else return 1.  This ensures that group size is
940    only returned once per group.  */
941
942 static int
943 vect_cost_group_size (stmt_vec_info stmt_info)
944 {
945   gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
946
947   if (first_stmt == STMT_VINFO_STMT (stmt_info))
948     return GROUP_SIZE (stmt_info);
949
950   return 1;
951 }
952
953
954 /* Function vect_model_store_cost
955
956    Models cost for stores.  In the case of grouped accesses, one access
957    has the overhead of the grouped access attributed to it.  */
958
959 void
960 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
961                        bool store_lanes_p, enum vect_def_type dt,
962                        slp_tree slp_node,
963                        stmt_vector_for_cost *prologue_cost_vec,
964                        stmt_vector_for_cost *body_cost_vec)
965 {
966   int group_size;
967   unsigned int inside_cost = 0, prologue_cost = 0;
968   struct data_reference *first_dr;
969   gimple first_stmt;
970
971   /* The SLP costs were already calculated during SLP tree build.  */
972   if (PURE_SLP_STMT (stmt_info))
973     return;
974
975   if (dt == vect_constant_def || dt == vect_external_def)
976     prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
977                                        stmt_info, 0, vect_prologue);
978
979   /* Grouped access?  */
980   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
981     {
982       if (slp_node)
983         {
984           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
985           group_size = 1;
986         }
987       else
988         {
989           first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
990           group_size = vect_cost_group_size (stmt_info);
991         }
992
993       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
994     }
995   /* Not a grouped access.  */
996   else
997     {
998       group_size = 1;
999       first_dr = STMT_VINFO_DATA_REF (stmt_info);
1000     }
1001
1002   /* We assume that the cost of a single store-lanes instruction is
1003      equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
1004      access is instead being provided by a permute-and-store operation,
1005      include the cost of the permutes.  */
1006   if (!store_lanes_p && group_size > 1)
1007     {
1008       /* Uses a high and low interleave or shuffle operations for each
1009          needed permute.  */
1010       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1011       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1012                                       stmt_info, 0, vect_body);
1013
1014       if (dump_enabled_p ())
1015         dump_printf_loc (MSG_NOTE, vect_location,
1016                          "vect_model_store_cost: strided group_size = %d .\n",
1017                          group_size);
1018     }
1019
1020   /* Costs of the stores.  */
1021   vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1022
1023   if (dump_enabled_p ())
1024     dump_printf_loc (MSG_NOTE, vect_location,
1025                      "vect_model_store_cost: inside_cost = %d, "
1026                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1027 }
1028
1029
1030 /* Calculate cost of DR's memory access.  */
1031 void
1032 vect_get_store_cost (struct data_reference *dr, int ncopies,
1033                      unsigned int *inside_cost,
1034                      stmt_vector_for_cost *body_cost_vec)
1035 {
1036   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1037   gimple stmt = DR_STMT (dr);
1038   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1039
1040   switch (alignment_support_scheme)
1041     {
1042     case dr_aligned:
1043       {
1044         *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1045                                           vector_store, stmt_info, 0,
1046                                           vect_body);
1047
1048         if (dump_enabled_p ())
1049           dump_printf_loc (MSG_NOTE, vect_location,
1050                            "vect_model_store_cost: aligned.\n");
1051         break;
1052       }
1053
1054     case dr_unaligned_supported:
1055       {
1056         /* Here, we assign an additional cost for the unaligned store.  */
1057         *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1058                                           unaligned_store, stmt_info,
1059                                           DR_MISALIGNMENT (dr), vect_body);
1060         if (dump_enabled_p ())
1061           dump_printf_loc (MSG_NOTE, vect_location,
1062                            "vect_model_store_cost: unaligned supported by "
1063                            "hardware.\n");
1064         break;
1065       }
1066
1067     case dr_unaligned_unsupported:
1068       {
1069         *inside_cost = VECT_MAX_COST;
1070
1071         if (dump_enabled_p ())
1072           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1073                            "vect_model_store_cost: unsupported access.\n");
1074         break;
1075       }
1076
1077     default:
1078       gcc_unreachable ();
1079     }
1080 }
1081
1082
1083 /* Function vect_model_load_cost
1084
1085    Models cost for loads.  In the case of grouped accesses, the last access
1086    has the overhead of the grouped access attributed to it.  Since unaligned
1087    accesses are supported for loads, we also account for the costs of the
1088    access scheme chosen.  */
1089
1090 void
1091 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1092                       bool load_lanes_p, slp_tree slp_node,
1093                       stmt_vector_for_cost *prologue_cost_vec,
1094                       stmt_vector_for_cost *body_cost_vec)
1095 {
1096   int group_size;
1097   gimple first_stmt;
1098   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1099   unsigned int inside_cost = 0, prologue_cost = 0;
1100
1101   /* The SLP costs were already calculated during SLP tree build.  */
1102   if (PURE_SLP_STMT (stmt_info))
1103     return;
1104
1105   /* Grouped accesses?  */
1106   first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1107   if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1108     {
1109       group_size = vect_cost_group_size (stmt_info);
1110       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1111     }
1112   /* Not a grouped access.  */
1113   else
1114     {
1115       group_size = 1;
1116       first_dr = dr;
1117     }
1118
1119   /* We assume that the cost of a single load-lanes instruction is
1120      equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
1121      access is instead being provided by a load-and-permute operation,
1122      include the cost of the permutes.  */
1123   if (!load_lanes_p && group_size > 1)
1124     {
1125       /* Uses an even and odd extract operations or shuffle operations
1126          for each needed permute.  */
1127       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1128       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1129                                       stmt_info, 0, vect_body);
1130
1131       if (dump_enabled_p ())
1132         dump_printf_loc (MSG_NOTE, vect_location,
1133                          "vect_model_load_cost: strided group_size = %d .\n",
1134                          group_size);
1135     }
1136
1137   /* The loads themselves.  */
1138   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1139     {
1140       /* N scalar loads plus gathering them into a vector.  */
1141       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1142       inside_cost += record_stmt_cost (body_cost_vec,
1143                                        ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1144                                        scalar_load, stmt_info, 0, vect_body);
1145       inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1146                                        stmt_info, 0, vect_body);
1147     }
1148   else
1149     vect_get_load_cost (first_dr, ncopies,
1150                         ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1151                          || group_size > 1 || slp_node),
1152                         &inside_cost, &prologue_cost, 
1153                         prologue_cost_vec, body_cost_vec, true);
1154
1155   if (dump_enabled_p ())
1156     dump_printf_loc (MSG_NOTE, vect_location,
1157                      "vect_model_load_cost: inside_cost = %d, "
1158                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1159 }
1160
1161
1162 /* Calculate cost of DR's memory access.  */
1163 void
1164 vect_get_load_cost (struct data_reference *dr, int ncopies,
1165                     bool add_realign_cost, unsigned int *inside_cost,
1166                     unsigned int *prologue_cost,
1167                     stmt_vector_for_cost *prologue_cost_vec,
1168                     stmt_vector_for_cost *body_cost_vec,
1169                     bool record_prologue_costs)
1170 {
1171   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1172   gimple stmt = DR_STMT (dr);
1173   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174
1175   switch (alignment_support_scheme)
1176     {
1177     case dr_aligned:
1178       {
1179         *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1180                                           stmt_info, 0, vect_body);
1181
1182         if (dump_enabled_p ())
1183           dump_printf_loc (MSG_NOTE, vect_location,
1184                            "vect_model_load_cost: aligned.\n");
1185
1186         break;
1187       }
1188     case dr_unaligned_supported:
1189       {
1190         /* Here, we assign an additional cost for the unaligned load.  */
1191         *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1192                                           unaligned_load, stmt_info,
1193                                           DR_MISALIGNMENT (dr), vect_body);
1194
1195         if (dump_enabled_p ())
1196           dump_printf_loc (MSG_NOTE, vect_location,
1197                            "vect_model_load_cost: unaligned supported by "
1198                            "hardware.\n");
1199
1200         break;
1201       }
1202     case dr_explicit_realign:
1203       {
1204         *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1205                                           vector_load, stmt_info, 0, vect_body);
1206         *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1207                                           vec_perm, stmt_info, 0, vect_body);
1208
1209         /* FIXME: If the misalignment remains fixed across the iterations of
1210            the containing loop, the following cost should be added to the
1211            prologue costs.  */
1212         if (targetm.vectorize.builtin_mask_for_load)
1213           *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1214                                             stmt_info, 0, vect_body);
1215
1216         if (dump_enabled_p ())
1217           dump_printf_loc (MSG_NOTE, vect_location,
1218                            "vect_model_load_cost: explicit realign\n");
1219
1220         break;
1221       }
1222     case dr_explicit_realign_optimized:
1223       {
1224         if (dump_enabled_p ())
1225           dump_printf_loc (MSG_NOTE, vect_location,
1226                            "vect_model_load_cost: unaligned software "
1227                            "pipelined.\n");
1228
1229         /* Unaligned software pipeline has a load of an address, an initial
1230            load, and possibly a mask operation to "prime" the loop.  However,
1231            if this is an access in a group of loads, which provide grouped
1232            access, then the above cost should only be considered for one
1233            access in the group.  Inside the loop, there is a load op
1234            and a realignment op.  */
1235
1236         if (add_realign_cost && record_prologue_costs)
1237           {
1238             *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1239                                                 vector_stmt, stmt_info,
1240                                                 0, vect_prologue);
1241             if (targetm.vectorize.builtin_mask_for_load)
1242               *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1243                                                   vector_stmt, stmt_info,
1244                                                   0, vect_prologue);
1245           }
1246
1247         *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1248                                           stmt_info, 0, vect_body);
1249         *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1250                                           stmt_info, 0, vect_body);
1251
1252         if (dump_enabled_p ())
1253           dump_printf_loc (MSG_NOTE, vect_location,
1254                            "vect_model_load_cost: explicit realign optimized"
1255                            "\n");
1256
1257         break;
1258       }
1259
1260     case dr_unaligned_unsupported:
1261       {
1262         *inside_cost = VECT_MAX_COST;
1263
1264         if (dump_enabled_p ())
1265           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1266                            "vect_model_load_cost: unsupported access.\n");
1267         break;
1268       }
1269
1270     default:
1271       gcc_unreachable ();
1272     }
1273 }
1274
1275 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1276    the loop preheader for the vectorized stmt STMT.  */
1277
1278 static void
1279 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1280 {
1281   if (gsi)
1282     vect_finish_stmt_generation (stmt, new_stmt, gsi);
1283   else
1284     {
1285       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1286       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1287
1288       if (loop_vinfo)
1289         {
1290           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1291           basic_block new_bb;
1292           edge pe;
1293
1294           if (nested_in_vect_loop_p (loop, stmt))
1295             loop = loop->inner;
1296
1297           pe = loop_preheader_edge (loop);
1298           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1299           gcc_assert (!new_bb);
1300         }
1301       else
1302        {
1303           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1304           basic_block bb;
1305           gimple_stmt_iterator gsi_bb_start;
1306
1307           gcc_assert (bb_vinfo);
1308           bb = BB_VINFO_BB (bb_vinfo);
1309           gsi_bb_start = gsi_after_labels (bb);
1310           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1311        }
1312     }
1313
1314   if (dump_enabled_p ())
1315     {
1316       dump_printf_loc (MSG_NOTE, vect_location,
1317                        "created new init_stmt: ");
1318       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1319     }
1320 }
1321
1322 /* Function vect_init_vector.
1323
1324    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1325    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1326    vector type a vector with all elements equal to VAL is created first.
1327    Place the initialization at BSI if it is not NULL.  Otherwise, place the
1328    initialization at the loop preheader.
1329    Return the DEF of INIT_STMT.
1330    It will be used in the vectorization of STMT.  */
1331
1332 tree
1333 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1334 {
1335   tree new_var;
1336   gimple init_stmt;
1337   tree vec_oprnd;
1338   tree new_temp;
1339
1340   if (TREE_CODE (type) == VECTOR_TYPE
1341       && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1342     {
1343       if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1344         {
1345           if (CONSTANT_CLASS_P (val))
1346             val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1347           else
1348             {
1349               new_temp = make_ssa_name (TREE_TYPE (type));
1350               init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1351               vect_init_vector_1 (stmt, init_stmt, gsi);
1352               val = new_temp;
1353             }
1354         }
1355       val = build_vector_from_val (type, val);
1356     }
1357
1358   new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1359   init_stmt = gimple_build_assign  (new_var, val);
1360   new_temp = make_ssa_name (new_var, init_stmt);
1361   gimple_assign_set_lhs (init_stmt, new_temp);
1362   vect_init_vector_1 (stmt, init_stmt, gsi);
1363   vec_oprnd = gimple_assign_lhs (init_stmt);
1364   return vec_oprnd;
1365 }
1366
1367
1368 /* Function vect_get_vec_def_for_operand.
1369
1370    OP is an operand in STMT.  This function returns a (vector) def that will be
1371    used in the vectorized stmt for STMT.
1372
1373    In the case that OP is an SSA_NAME which is defined in the loop, then
1374    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1375
1376    In case OP is an invariant or constant, a new stmt that creates a vector def
1377    needs to be introduced.  */
1378
1379 tree
1380 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1381 {
1382   tree vec_oprnd;
1383   gimple vec_stmt;
1384   gimple def_stmt;
1385   stmt_vec_info def_stmt_info = NULL;
1386   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1387   unsigned int nunits;
1388   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1389   tree def;
1390   enum vect_def_type dt;
1391   bool is_simple_use;
1392   tree vector_type;
1393
1394   if (dump_enabled_p ())
1395     {
1396       dump_printf_loc (MSG_NOTE, vect_location,
1397                        "vect_get_vec_def_for_operand: ");
1398       dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1399       dump_printf (MSG_NOTE, "\n");
1400     }
1401
1402   is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1403                                       &def_stmt, &def, &dt);
1404   gcc_assert (is_simple_use);
1405   if (dump_enabled_p ())
1406     {
1407       int loc_printed = 0;
1408       if (def)
1409         {
1410           dump_printf_loc (MSG_NOTE, vect_location, "def =  ");
1411           loc_printed = 1;
1412           dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1413           dump_printf (MSG_NOTE, "\n");
1414         }
1415       if (def_stmt)
1416         {
1417           if (loc_printed)
1418             dump_printf (MSG_NOTE, "  def_stmt =  ");
1419           else
1420             dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
1421           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1422         }
1423     }
1424
1425   switch (dt)
1426     {
1427     /* Case 1: operand is a constant.  */
1428     case vect_constant_def:
1429       {
1430         vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1431         gcc_assert (vector_type);
1432         nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1433
1434         if (scalar_def)
1435           *scalar_def = op;
1436
1437         /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
1438         if (dump_enabled_p ())
1439           dump_printf_loc (MSG_NOTE, vect_location,
1440                            "Create vector_cst. nunits = %d\n", nunits);
1441
1442         return vect_init_vector (stmt, op, vector_type, NULL);
1443       }
1444
1445     /* Case 2: operand is defined outside the loop - loop invariant.  */
1446     case vect_external_def:
1447       {
1448         vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1449         gcc_assert (vector_type);
1450
1451         if (scalar_def)
1452           *scalar_def = def;
1453
1454         /* Create 'vec_inv = {inv,inv,..,inv}'  */
1455         if (dump_enabled_p ())
1456           dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1457
1458         return vect_init_vector (stmt, def, vector_type, NULL);
1459       }
1460
1461     /* Case 3: operand is defined inside the loop.  */
1462     case vect_internal_def:
1463       {
1464         if (scalar_def)
1465           *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1466
1467         /* Get the def from the vectorized stmt.  */
1468         def_stmt_info = vinfo_for_stmt (def_stmt);
1469
1470         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1471         /* Get vectorized pattern statement.  */
1472         if (!vec_stmt
1473             && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1474             && !STMT_VINFO_RELEVANT (def_stmt_info))
1475           vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1476                        STMT_VINFO_RELATED_STMT (def_stmt_info)));
1477         gcc_assert (vec_stmt);
1478         if (gimple_code (vec_stmt) == GIMPLE_PHI)
1479           vec_oprnd = PHI_RESULT (vec_stmt);
1480         else if (is_gimple_call (vec_stmt))
1481           vec_oprnd = gimple_call_lhs (vec_stmt);
1482         else
1483           vec_oprnd = gimple_assign_lhs (vec_stmt);
1484         return vec_oprnd;
1485       }
1486
1487     /* Case 4: operand is defined by a loop header phi - reduction  */
1488     case vect_reduction_def:
1489     case vect_double_reduction_def:
1490     case vect_nested_cycle:
1491       {
1492         struct loop *loop;
1493
1494         gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1495         loop = (gimple_bb (def_stmt))->loop_father;
1496
1497         /* Get the def before the loop  */
1498         op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1499         return get_initial_def_for_reduction (stmt, op, scalar_def);
1500      }
1501
1502     /* Case 5: operand is defined by loop-header phi - induction.  */
1503     case vect_induction_def:
1504       {
1505         gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1506
1507         /* Get the def from the vectorized stmt.  */
1508         def_stmt_info = vinfo_for_stmt (def_stmt);
1509         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1510         if (gimple_code (vec_stmt) == GIMPLE_PHI)
1511           vec_oprnd = PHI_RESULT (vec_stmt);
1512         else
1513           vec_oprnd = gimple_get_lhs (vec_stmt);
1514         return vec_oprnd;
1515       }
1516
1517     default:
1518       gcc_unreachable ();
1519     }
1520 }
1521
1522
1523 /* Function vect_get_vec_def_for_stmt_copy
1524
1525    Return a vector-def for an operand.  This function is used when the
1526    vectorized stmt to be created (by the caller to this function) is a "copy"
1527    created in case the vectorized result cannot fit in one vector, and several
1528    copies of the vector-stmt are required.  In this case the vector-def is
1529    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1530    of the stmt that defines VEC_OPRND.
1531    DT is the type of the vector def VEC_OPRND.
1532
1533    Context:
1534         In case the vectorization factor (VF) is bigger than the number
1535    of elements that can fit in a vectype (nunits), we have to generate
1536    more than one vector stmt to vectorize the scalar stmt.  This situation
1537    arises when there are multiple data-types operated upon in the loop; the
1538    smallest data-type determines the VF, and as a result, when vectorizing
1539    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1540    vector stmt (each computing a vector of 'nunits' results, and together
1541    computing 'VF' results in each iteration).  This function is called when
1542    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1543    which VF=16 and nunits=4, so the number of copies required is 4):
1544
1545    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1546
1547    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1548                         VS1.1:  vx.1 = memref1      VS1.2
1549                         VS1.2:  vx.2 = memref2      VS1.3
1550                         VS1.3:  vx.3 = memref3
1551
1552    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1553                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1554                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1555                         VSnew.3:  vz3 = vx.3 + ...
1556
1557    The vectorization of S1 is explained in vectorizable_load.
1558    The vectorization of S2:
1559         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1560    the function 'vect_get_vec_def_for_operand' is called to
1561    get the relevant vector-def for each operand of S2.  For operand x it
1562    returns  the vector-def 'vx.0'.
1563
1564         To create the remaining copies of the vector-stmt (VSnew.j), this
1565    function is called to get the relevant vector-def for each operand.  It is
1566    obtained from the respective VS1.j stmt, which is recorded in the
1567    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1568
1569         For example, to obtain the vector-def 'vx.1' in order to create the
1570    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1571    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1572    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1573    and return its def ('vx.1').
1574    Overall, to create the above sequence this function will be called 3 times:
1575         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1576         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1577         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1578
1579 tree
1580 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1581 {
1582   gimple vec_stmt_for_operand;
1583   stmt_vec_info def_stmt_info;
1584
1585   /* Do nothing; can reuse same def.  */
1586   if (dt == vect_external_def || dt == vect_constant_def )
1587     return vec_oprnd;
1588
1589   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1590   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1591   gcc_assert (def_stmt_info);
1592   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1593   gcc_assert (vec_stmt_for_operand);
1594   vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1595   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1596     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1597   else
1598     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1599   return vec_oprnd;
1600 }
1601
1602
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1605
1606 static void
1607 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1608                                  vec<tree> *vec_oprnds0,
1609                                  vec<tree> *vec_oprnds1)
1610 {
1611   tree vec_oprnd = vec_oprnds0->pop ();
1612
1613   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1614   vec_oprnds0->quick_push (vec_oprnd);
1615
1616   if (vec_oprnds1 && vec_oprnds1->length ())
1617     {
1618       vec_oprnd = vec_oprnds1->pop ();
1619       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1620       vec_oprnds1->quick_push (vec_oprnd);
1621     }
1622 }
1623
1624
1625 /* Get vectorized definitions for OP0 and OP1.
1626    REDUC_INDEX is the index of reduction operand in case of reduction,
1627    and -1 otherwise.  */
1628
1629 void
1630 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1631                    vec<tree> *vec_oprnds0,
1632                    vec<tree> *vec_oprnds1,
1633                    slp_tree slp_node, int reduc_index)
1634 {
1635   if (slp_node)
1636     {
1637       int nops = (op1 == NULL_TREE) ? 1 : 2;
1638       auto_vec<tree> ops (nops);
1639       auto_vec<vec<tree> > vec_defs (nops);
1640
1641       ops.quick_push (op0);
1642       if (op1)
1643         ops.quick_push (op1);
1644
1645       vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1646
1647       *vec_oprnds0 = vec_defs[0];
1648       if (op1)
1649         *vec_oprnds1 = vec_defs[1];
1650     }
1651   else
1652     {
1653       tree vec_oprnd;
1654
1655       vec_oprnds0->create (1);
1656       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1657       vec_oprnds0->quick_push (vec_oprnd);
1658
1659       if (op1)
1660         {
1661           vec_oprnds1->create (1);
1662           vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1663           vec_oprnds1->quick_push (vec_oprnd);
1664         }
1665     }
1666 }
1667
1668
1669 /* Function vect_finish_stmt_generation.
1670
1671    Insert a new stmt.  */
1672
1673 void
1674 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1675                              gimple_stmt_iterator *gsi)
1676 {
1677   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1678   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1679   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1680
1681   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1682
1683   if (!gsi_end_p (*gsi)
1684       && gimple_has_mem_ops (vec_stmt))
1685     {
1686       gimple at_stmt = gsi_stmt (*gsi);
1687       tree vuse = gimple_vuse (at_stmt);
1688       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1689         {
1690           tree vdef = gimple_vdef (at_stmt);
1691           gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1692           /* If we have an SSA vuse and insert a store, update virtual
1693              SSA form to avoid triggering the renamer.  Do so only
1694              if we can easily see all uses - which is what almost always
1695              happens with the way vectorized stmts are inserted.  */
1696           if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1697               && ((is_gimple_assign (vec_stmt)
1698                    && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1699                   || (is_gimple_call (vec_stmt)
1700                       && !(gimple_call_flags (vec_stmt)
1701                            & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1702             {
1703               tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1704               gimple_set_vdef (vec_stmt, new_vdef);
1705               SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1706             }
1707         }
1708     }
1709   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1710
1711   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1712                                                    bb_vinfo));
1713
1714   if (dump_enabled_p ())
1715     {
1716       dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1717       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1718     }
1719
1720   gimple_set_location (vec_stmt, gimple_location (stmt));
1721
1722   /* While EH edges will generally prevent vectorization, stmt might
1723      e.g. be in a must-not-throw region.  Ensure newly created stmts
1724      that could throw are part of the same region.  */
1725   int lp_nr = lookup_stmt_eh_lp (stmt);
1726   if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1727     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1728 }
1729
1730 /* Checks if CALL can be vectorized in type VECTYPE.  Returns
1731    a function declaration if the target has a vectorized version
1732    of the function, or NULL_TREE if the function cannot be vectorized.  */
1733
1734 tree
1735 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1736 {
1737   tree fndecl = gimple_call_fndecl (call);
1738
1739   /* We only handle functions that do not read or clobber memory -- i.e.
1740      const or novops ones.  */
1741   if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1742     return NULL_TREE;
1743
1744   if (!fndecl
1745       || TREE_CODE (fndecl) != FUNCTION_DECL
1746       || !DECL_BUILT_IN (fndecl))
1747     return NULL_TREE;
1748
1749   return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1750                                                         vectype_in);
1751 }
1752
1753
1754 static tree permute_vec_elements (tree, tree, tree, gimple,
1755                                   gimple_stmt_iterator *);
1756
1757
1758 /* Function vectorizable_mask_load_store.
1759
1760    Check if STMT performs a conditional load or store that can be vectorized.
1761    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1762    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1763    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1764
1765 static bool
1766 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1767                               gimple *vec_stmt, slp_tree slp_node)
1768 {
1769   tree vec_dest = NULL;
1770   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1771   stmt_vec_info prev_stmt_info;
1772   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1773   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1774   bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1775   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1776   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1777   tree elem_type;
1778   gimple new_stmt;
1779   tree dummy;
1780   tree dataref_ptr = NULL_TREE;
1781   gimple ptr_incr;
1782   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1783   int ncopies;
1784   int i, j;
1785   bool inv_p;
1786   tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1787   tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1788   int gather_scale = 1;
1789   enum vect_def_type gather_dt = vect_unknown_def_type;
1790   bool is_store;
1791   tree mask;
1792   gimple def_stmt;
1793   tree def;
1794   enum vect_def_type dt;
1795
1796   if (slp_node != NULL)
1797     return false;
1798
1799   ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1800   gcc_assert (ncopies >= 1);
1801
1802   is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1803   mask = gimple_call_arg (stmt, 2);
1804   if (TYPE_PRECISION (TREE_TYPE (mask))
1805       != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1806     return false;
1807
1808   /* FORNOW. This restriction should be relaxed.  */
1809   if (nested_in_vect_loop && ncopies > 1)
1810     {
1811       if (dump_enabled_p ())
1812         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1813                          "multiple types in nested loop.");
1814       return false;
1815     }
1816
1817   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1818     return false;
1819
1820   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1821     return false;
1822
1823   if (!STMT_VINFO_DATA_REF (stmt_info))
1824     return false;
1825
1826   elem_type = TREE_TYPE (vectype);
1827
1828   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1829     return false;
1830
1831   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1832     return false;
1833
1834   if (STMT_VINFO_GATHER_P (stmt_info))
1835     {
1836       gimple def_stmt;
1837       tree def;
1838       gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1839                                        &gather_off, &gather_scale);
1840       gcc_assert (gather_decl);
1841       if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1842                                  &def_stmt, &def, &gather_dt,
1843                                  &gather_off_vectype))
1844         {
1845           if (dump_enabled_p ())
1846             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847                              "gather index use not simple.");
1848           return false;
1849         }
1850
1851       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1852       tree masktype
1853         = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1854       if (TREE_CODE (masktype) == INTEGER_TYPE)
1855         {
1856           if (dump_enabled_p ())
1857             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858                              "masked gather with integer mask not supported.");
1859           return false;
1860         }
1861     }
1862   else if (tree_int_cst_compare (nested_in_vect_loop
1863                                  ? STMT_VINFO_DR_STEP (stmt_info)
1864                                  : DR_STEP (dr), size_zero_node) <= 0)
1865     return false;
1866   else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1867            || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1868     return false;
1869
1870   if (TREE_CODE (mask) != SSA_NAME)
1871     return false;
1872
1873   if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1874                            &def_stmt, &def, &dt))
1875     return false;
1876
1877   if (is_store)
1878     {
1879       tree rhs = gimple_call_arg (stmt, 3);
1880       if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1881                                &def_stmt, &def, &dt))
1882         return false;
1883     }
1884
1885   if (!vec_stmt) /* transformation not required.  */
1886     {
1887       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1888       if (is_store)
1889         vect_model_store_cost (stmt_info, ncopies, false, dt,
1890                                NULL, NULL, NULL);
1891       else
1892         vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1893       return true;
1894     }
1895
1896   /** Transform.  **/
1897
1898   if (STMT_VINFO_GATHER_P (stmt_info))
1899     {
1900       tree vec_oprnd0 = NULL_TREE, op;
1901       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1902       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1903       tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1904       tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1905       tree mask_perm_mask = NULL_TREE;
1906       edge pe = loop_preheader_edge (loop);
1907       gimple_seq seq;
1908       basic_block new_bb;
1909       enum { NARROW, NONE, WIDEN } modifier;
1910       int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1911
1912       rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1913       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1914       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1917       scaletype = TREE_VALUE (arglist);
1918       gcc_checking_assert (types_compatible_p (srctype, rettype)
1919                            && types_compatible_p (srctype, masktype));
1920
1921       if (nunits == gather_off_nunits)
1922         modifier = NONE;
1923       else if (nunits == gather_off_nunits / 2)
1924         {
1925           unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1926           modifier = WIDEN;
1927
1928           for (i = 0; i < gather_off_nunits; ++i)
1929             sel[i] = i | nunits;
1930
1931           perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1932         }
1933       else if (nunits == gather_off_nunits * 2)
1934         {
1935           unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1936           modifier = NARROW;
1937
1938           for (i = 0; i < nunits; ++i)
1939             sel[i] = i < gather_off_nunits
1940                      ? i : i + nunits - gather_off_nunits;
1941
1942           perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1943           ncopies *= 2;
1944           for (i = 0; i < nunits; ++i)
1945             sel[i] = i | gather_off_nunits;
1946           mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1947         }
1948       else
1949         gcc_unreachable ();
1950
1951       vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1952
1953       ptr = fold_convert (ptrtype, gather_base);
1954       if (!is_gimple_min_invariant (ptr))
1955         {
1956           ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1957           new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1958           gcc_assert (!new_bb);
1959         }
1960
1961       scale = build_int_cst (scaletype, gather_scale);
1962
1963       prev_stmt_info = NULL;
1964       for (j = 0; j < ncopies; ++j)
1965         {
1966           if (modifier == WIDEN && (j & 1))
1967             op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1968                                        perm_mask, stmt, gsi);
1969           else if (j == 0)
1970             op = vec_oprnd0
1971               = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1972           else
1973             op = vec_oprnd0
1974               = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1975
1976           if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1977             {
1978               gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1979                           == TYPE_VECTOR_SUBPARTS (idxtype));
1980               var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1981               var = make_ssa_name (var);
1982               op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1983               new_stmt
1984                 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1985               vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986               op = var;
1987             }
1988
1989           if (mask_perm_mask && (j & 1))
1990             mask_op = permute_vec_elements (mask_op, mask_op,
1991                                             mask_perm_mask, stmt, gsi);
1992           else
1993             {
1994               if (j == 0)
1995                 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1996               else
1997                 {
1998                   vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1999                                       &def_stmt, &def, &dt);
2000                   vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2001                 }
2002
2003               mask_op = vec_mask;
2004               if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2005                 {
2006                   gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2007                               == TYPE_VECTOR_SUBPARTS (masktype));
2008                   var = vect_get_new_vect_var (masktype, vect_simple_var,
2009                                                NULL);
2010                   var = make_ssa_name (var);
2011                   mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2012                   new_stmt
2013                     = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2014                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
2015                   mask_op = var;
2016                 }
2017             }
2018
2019           new_stmt
2020             = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2021                                  scale);
2022
2023           if (!useless_type_conversion_p (vectype, rettype))
2024             {
2025               gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2026                           == TYPE_VECTOR_SUBPARTS (rettype));
2027               var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2028               op = make_ssa_name (var, new_stmt);
2029               gimple_call_set_lhs (new_stmt, op);
2030               vect_finish_stmt_generation (stmt, new_stmt, gsi);
2031               var = make_ssa_name (vec_dest);
2032               op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2033               new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2034             }
2035           else
2036             {
2037               var = make_ssa_name (vec_dest, new_stmt);
2038               gimple_call_set_lhs (new_stmt, var);
2039             }
2040
2041           vect_finish_stmt_generation (stmt, new_stmt, gsi);
2042
2043           if (modifier == NARROW)
2044             {
2045               if ((j & 1) == 0)
2046                 {
2047                   prev_res = var;
2048                   continue;
2049                 }
2050               var = permute_vec_elements (prev_res, var,
2051                                           perm_mask, stmt, gsi);
2052               new_stmt = SSA_NAME_DEF_STMT (var);
2053             }
2054
2055           if (prev_stmt_info == NULL)
2056             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2057           else
2058             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2059           prev_stmt_info = vinfo_for_stmt (new_stmt);
2060         }
2061
2062       /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2063          from the IL.  */
2064       tree lhs = gimple_call_lhs (stmt);
2065       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2066       set_vinfo_for_stmt (new_stmt, stmt_info);
2067       set_vinfo_for_stmt (stmt, NULL);
2068       STMT_VINFO_STMT (stmt_info) = new_stmt;
2069       gsi_replace (gsi, new_stmt, true);
2070       return true;
2071     }
2072   else if (is_store)
2073     {
2074       tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2075       prev_stmt_info = NULL;
2076       for (i = 0; i < ncopies; i++)
2077         {
2078           unsigned align, misalign;
2079
2080           if (i == 0)
2081             {
2082               tree rhs = gimple_call_arg (stmt, 3);
2083               vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2084               vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2085               /* We should have catched mismatched types earlier.  */
2086               gcc_assert (useless_type_conversion_p (vectype,
2087                                                      TREE_TYPE (vec_rhs)));
2088               dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2089                                                       NULL_TREE, &dummy, gsi,
2090                                                       &ptr_incr, false, &inv_p);
2091               gcc_assert (!inv_p);
2092             }
2093           else
2094             {
2095               vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2096                                   &def, &dt);
2097               vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2098               vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2099                                   &def, &dt);
2100               vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2101               dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2102                                              TYPE_SIZE_UNIT (vectype));
2103             }
2104
2105           align = TYPE_ALIGN_UNIT (vectype);
2106           if (aligned_access_p (dr))
2107             misalign = 0;
2108           else if (DR_MISALIGNMENT (dr) == -1)
2109             {
2110               align = TYPE_ALIGN_UNIT (elem_type);
2111               misalign = 0;
2112             }
2113           else
2114             misalign = DR_MISALIGNMENT (dr);
2115           set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2116                                   misalign);
2117           new_stmt
2118             = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2119                                           gimple_call_arg (stmt, 1),
2120                                           vec_mask, vec_rhs);
2121           vect_finish_stmt_generation (stmt, new_stmt, gsi);
2122           if (i == 0)
2123             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2124           else
2125             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2126           prev_stmt_info = vinfo_for_stmt (new_stmt);
2127         }
2128     }
2129   else
2130     {
2131       tree vec_mask = NULL_TREE;
2132       prev_stmt_info = NULL;
2133       vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2134       for (i = 0; i < ncopies; i++)
2135         {
2136           unsigned align, misalign;
2137
2138           if (i == 0)
2139             {
2140               vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2141               dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2142                                                       NULL_TREE, &dummy, gsi,
2143                                                       &ptr_incr, false, &inv_p);
2144               gcc_assert (!inv_p);
2145             }
2146           else
2147             {
2148               vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2149                                   &def, &dt);
2150               vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2151               dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2152                                              TYPE_SIZE_UNIT (vectype));
2153             }
2154
2155           align = TYPE_ALIGN_UNIT (vectype);
2156           if (aligned_access_p (dr))
2157             misalign = 0;
2158           else if (DR_MISALIGNMENT (dr) == -1)
2159             {
2160               align = TYPE_ALIGN_UNIT (elem_type);
2161               misalign = 0;
2162             }
2163           else
2164             misalign = DR_MISALIGNMENT (dr);
2165           set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2166                                   misalign);
2167           new_stmt
2168             = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2169                                           gimple_call_arg (stmt, 1),
2170                                           vec_mask);
2171           gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2172           vect_finish_stmt_generation (stmt, new_stmt, gsi);
2173           if (i == 0)
2174             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2175           else
2176             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2177           prev_stmt_info = vinfo_for_stmt (new_stmt);
2178         }
2179     }
2180
2181   if (!is_store)
2182     {
2183       /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2184          from the IL.  */
2185       tree lhs = gimple_call_lhs (stmt);
2186       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2187       set_vinfo_for_stmt (new_stmt, stmt_info);
2188       set_vinfo_for_stmt (stmt, NULL);
2189       STMT_VINFO_STMT (stmt_info) = new_stmt;
2190       gsi_replace (gsi, new_stmt, true);
2191     }
2192
2193   return true;
2194 }
2195
2196
2197 /* Function vectorizable_call.
2198
2199    Check if GS performs a function call that can be vectorized.
2200    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2201    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2202    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2203
2204 static bool
2205 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2206                    slp_tree slp_node)
2207 {
2208   gcall *stmt;
2209   tree vec_dest;
2210   tree scalar_dest;
2211   tree op, type;
2212   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2213   stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2214   tree vectype_out, vectype_in;
2215   int nunits_in;
2216   int nunits_out;
2217   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2218   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2219   tree fndecl, new_temp, def, rhs_type;
2220   gimple def_stmt;
2221   enum vect_def_type dt[3]
2222     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2223   gimple new_stmt = NULL;
2224   int ncopies, j;
2225   vec<tree> vargs = vNULL;
2226   enum { NARROW, NONE, WIDEN } modifier;
2227   size_t i, nargs;
2228   tree lhs;
2229
2230   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2231     return false;
2232
2233   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2234     return false;
2235
2236   /* Is GS a vectorizable call?   */
2237   stmt = dyn_cast <gcall *> (gs);
2238   if (!stmt)
2239     return false;
2240
2241   if (gimple_call_internal_p (stmt)
2242       && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2243           || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2244     return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2245                                          slp_node);
2246
2247   if (gimple_call_lhs (stmt) == NULL_TREE
2248       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2249     return false;
2250
2251   gcc_checking_assert (!stmt_can_throw_internal (stmt));
2252
2253   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2254
2255   /* Process function arguments.  */
2256   rhs_type = NULL_TREE;
2257   vectype_in = NULL_TREE;
2258   nargs = gimple_call_num_args (stmt);
2259
2260   /* Bail out if the function has more than three arguments, we do not have
2261      interesting builtin functions to vectorize with more than two arguments
2262      except for fma.  No arguments is also not good.  */
2263   if (nargs == 0 || nargs > 3)
2264     return false;
2265
2266   /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
2267   if (gimple_call_internal_p (stmt)
2268       && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2269     {
2270       nargs = 0;
2271       rhs_type = unsigned_type_node;
2272     }
2273
2274   for (i = 0; i < nargs; i++)
2275     {
2276       tree opvectype;
2277
2278       op = gimple_call_arg (stmt, i);
2279
2280       /* We can only handle calls with arguments of the same type.  */
2281       if (rhs_type
2282           && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2283         {
2284           if (dump_enabled_p ())
2285             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286                              "argument types differ.\n");
2287           return false;
2288         }
2289       if (!rhs_type)
2290         rhs_type = TREE_TYPE (op);
2291
2292       if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2293                                  &def_stmt, &def, &dt[i], &opvectype))
2294         {
2295           if (dump_enabled_p ())
2296             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2297                              "use not simple.\n");
2298           return false;
2299         }
2300
2301       if (!vectype_in)
2302         vectype_in = opvectype;
2303       else if (opvectype
2304                && opvectype != vectype_in)
2305         {
2306           if (dump_enabled_p ())
2307             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308                              "argument vector types differ.\n");
2309           return false;
2310         }
2311     }
2312   /* If all arguments are external or constant defs use a vector type with
2313      the same size as the output vector type.  */
2314   if (!vectype_in)
2315     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2316   if (vec_stmt)
2317     gcc_assert (vectype_in);
2318   if (!vectype_in)
2319     {
2320       if (dump_enabled_p ())
2321         {
2322           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2323                            "no vectype for scalar type ");
2324           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2325           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2326         }
2327
2328       return false;
2329     }
2330
2331   /* FORNOW */
2332   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2333   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2334   if (nunits_in == nunits_out / 2)
2335     modifier = NARROW;
2336   else if (nunits_out == nunits_in)
2337     modifier = NONE;
2338   else if (nunits_out == nunits_in / 2)
2339     modifier = WIDEN;
2340   else
2341     return false;
2342
2343   /* For now, we only vectorize functions if a target specific builtin
2344      is available.  TODO -- in some cases, it might be profitable to
2345      insert the calls for pieces of the vector, in order to be able
2346      to vectorize other operations in the loop.  */
2347   fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2348   if (fndecl == NULL_TREE)
2349     {
2350       if (gimple_call_internal_p (stmt)
2351           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2352           && !slp_node
2353           && loop_vinfo
2354           && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2355           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2356           && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2357              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2358         {
2359           /* We can handle IFN_GOMP_SIMD_LANE by returning a
2360              { 0, 1, 2, ... vf - 1 } vector.  */
2361           gcc_assert (nargs == 0);
2362         }
2363       else
2364         {
2365           if (dump_enabled_p ())
2366             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367                              "function is not vectorizable.\n");
2368           return false;
2369         }
2370     }
2371
2372   gcc_assert (!gimple_vuse (stmt));
2373
2374   if (slp_node || PURE_SLP_STMT (stmt_info))
2375     ncopies = 1;
2376   else if (modifier == NARROW)
2377     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2378   else
2379     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2380
2381   /* Sanity check: make sure that at least one copy of the vectorized stmt
2382      needs to be generated.  */
2383   gcc_assert (ncopies >= 1);
2384
2385   if (!vec_stmt) /* transformation not required.  */
2386     {
2387       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2388       if (dump_enabled_p ())
2389         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2390                          "\n");
2391       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2392       return true;
2393     }
2394
2395   /** Transform.  **/
2396
2397   if (dump_enabled_p ())
2398     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2399
2400   /* Handle def.  */
2401   scalar_dest = gimple_call_lhs (stmt);
2402   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2403
2404   prev_stmt_info = NULL;
2405   switch (modifier)
2406     {
2407     case NONE:
2408       for (j = 0; j < ncopies; ++j)
2409         {
2410           /* Build argument list for the vectorized call.  */
2411           if (j == 0)
2412             vargs.create (nargs);
2413           else
2414             vargs.truncate (0);
2415
2416           if (slp_node)
2417             {
2418               auto_vec<vec<tree> > vec_defs (nargs);
2419               vec<tree> vec_oprnds0;
2420
2421               for (i = 0; i < nargs; i++)
2422                 vargs.quick_push (gimple_call_arg (stmt, i));
2423               vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2424               vec_oprnds0 = vec_defs[0];
2425
2426               /* Arguments are ready.  Create the new vector stmt.  */
2427               FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2428                 {
2429                   size_t k;
2430                   for (k = 0; k < nargs; k++)
2431                     {
2432                       vec<tree> vec_oprndsk = vec_defs[k];
2433                       vargs[k] = vec_oprndsk[i];
2434                     }
2435                   new_stmt = gimple_build_call_vec (fndecl, vargs);
2436                   new_temp = make_ssa_name (vec_dest, new_stmt);
2437                   gimple_call_set_lhs (new_stmt, new_temp);
2438                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
2439                   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2440                 }
2441
2442               for (i = 0; i < nargs; i++)
2443                 {
2444                   vec<tree> vec_oprndsi = vec_defs[i];
2445                   vec_oprndsi.release ();
2446                 }
2447               continue;
2448             }
2449
2450           for (i = 0; i < nargs; i++)
2451             {
2452               op = gimple_call_arg (stmt, i);
2453               if (j == 0)
2454                 vec_oprnd0
2455                   = vect_get_vec_def_for_operand (op, stmt, NULL);
2456               else
2457                 {
2458                   vec_oprnd0 = gimple_call_arg (new_stmt, i);
2459                   vec_oprnd0
2460                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2461                 }
2462
2463               vargs.quick_push (vec_oprnd0);
2464             }
2465
2466           if (gimple_call_internal_p (stmt)
2467               && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2468             {
2469               tree *v = XALLOCAVEC (tree, nunits_out);
2470               int k;
2471               for (k = 0; k < nunits_out; ++k)
2472                 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2473               tree cst = build_vector (vectype_out, v);
2474               tree new_var
2475                 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2476               gimple init_stmt = gimple_build_assign (new_var, cst);
2477               new_temp = make_ssa_name (new_var, init_stmt);
2478               gimple_assign_set_lhs (init_stmt, new_temp);
2479               vect_init_vector_1 (stmt, init_stmt, NULL);
2480               new_temp = make_ssa_name (vec_dest);
2481               new_stmt = gimple_build_assign (new_temp,
2482                                               gimple_assign_lhs (init_stmt));
2483             }
2484           else
2485             {
2486               new_stmt = gimple_build_call_vec (fndecl, vargs);
2487               new_temp = make_ssa_name (vec_dest, new_stmt);
2488               gimple_call_set_lhs (new_stmt, new_temp);
2489             }
2490           vect_finish_stmt_generation (stmt, new_stmt, gsi);
2491
2492           if (j == 0)
2493             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2494           else
2495             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2496
2497           prev_stmt_info = vinfo_for_stmt (new_stmt);
2498         }
2499
2500       break;
2501
2502     case NARROW:
2503       for (j = 0; j < ncopies; ++j)
2504         {
2505           /* Build argument list for the vectorized call.  */
2506           if (j == 0)
2507             vargs.create (nargs * 2);
2508           else
2509             vargs.truncate (0);
2510
2511           if (slp_node)
2512             {
2513               auto_vec<vec<tree> > vec_defs (nargs);
2514               vec<tree> vec_oprnds0;
2515
2516               for (i = 0; i < nargs; i++)
2517                 vargs.quick_push (gimple_call_arg (stmt, i));
2518               vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2519               vec_oprnds0 = vec_defs[0];
2520
2521               /* Arguments are ready.  Create the new vector stmt.  */
2522               for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2523                 {
2524                   size_t k;
2525                   vargs.truncate (0);
2526                   for (k = 0; k < nargs; k++)
2527                     {
2528                       vec<tree> vec_oprndsk = vec_defs[k];
2529                       vargs.quick_push (vec_oprndsk[i]);
2530                       vargs.quick_push (vec_oprndsk[i + 1]);
2531                     }
2532                   new_stmt = gimple_build_call_vec (fndecl, vargs);
2533                   new_temp = make_ssa_name (vec_dest, new_stmt);
2534                   gimple_call_set_lhs (new_stmt, new_temp);
2535                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
2536                   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2537                 }
2538
2539               for (i = 0; i < nargs; i++)
2540                 {
2541                   vec<tree> vec_oprndsi = vec_defs[i];
2542                   vec_oprndsi.release ();
2543                 }
2544               continue;
2545             }
2546
2547           for (i = 0; i < nargs; i++)
2548             {
2549               op = gimple_call_arg (stmt, i);
2550               if (j == 0)
2551                 {
2552                   vec_oprnd0
2553                     = vect_get_vec_def_for_operand (op, stmt, NULL);
2554                   vec_oprnd1
2555                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2556                 }
2557               else
2558                 {
2559                   vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2560                   vec_oprnd0
2561                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2562                   vec_oprnd1
2563                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2564                 }
2565
2566               vargs.quick_push (vec_oprnd0);
2567               vargs.quick_push (vec_oprnd1);
2568             }
2569
2570           new_stmt = gimple_build_call_vec (fndecl, vargs);
2571           new_temp = make_ssa_name (vec_dest, new_stmt);
2572           gimple_call_set_lhs (new_stmt, new_temp);
2573           vect_finish_stmt_generation (stmt, new_stmt, gsi);
2574
2575           if (j == 0)
2576             STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2577           else
2578             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2579
2580           prev_stmt_info = vinfo_for_stmt (new_stmt);
2581         }
2582
2583       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2584
2585       break;
2586
2587     case WIDEN:
2588       /* No current target implements this case.  */
2589       return false;
2590     }
2591
2592   vargs.release ();
2593
2594   /* The call in STMT might prevent it from being removed in dce.
2595      We however cannot remove it here, due to the way the ssa name
2596      it defines is mapped to the new definition.  So just replace
2597      rhs of the statement with something harmless.  */
2598
2599   if (slp_node)
2600     return true;
2601
2602   type = TREE_TYPE (scalar_dest);
2603   if (is_pattern_stmt_p (stmt_info))
2604     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2605   else
2606     lhs = gimple_call_lhs (stmt);
2607   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2608   set_vinfo_for_stmt (new_stmt, stmt_info);
2609   set_vinfo_for_stmt (stmt, NULL);
2610   STMT_VINFO_STMT (stmt_info) = new_stmt;
2611   gsi_replace (gsi, new_stmt, false);
2612
2613   return true;
2614 }
2615
2616
2617 struct simd_call_arg_info
2618 {
2619   tree vectype;
2620   tree op;
2621   enum vect_def_type dt;
2622   HOST_WIDE_INT linear_step;
2623   unsigned int align;
2624 };
2625
2626 /* Function vectorizable_simd_clone_call.
2627
2628    Check if STMT performs a function call that can be vectorized
2629    by calling a simd clone of the function.
2630    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2631    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2632    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2633
2634 static bool
2635 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2636                               gimple *vec_stmt, slp_tree slp_node)
2637 {
2638   tree vec_dest;
2639   tree scalar_dest;
2640   tree op, type;
2641   tree vec_oprnd0 = NULL_TREE;
2642   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2643   tree vectype;
2644   unsigned int nunits;
2645   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2646   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2647   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2648   tree fndecl, new_temp, def;
2649   gimple def_stmt;
2650   gimple new_stmt = NULL;
2651   int ncopies, j;
2652   vec<simd_call_arg_info> arginfo = vNULL;
2653   vec<tree> vargs = vNULL;
2654   size_t i, nargs;
2655   tree lhs, rtype, ratype;
2656   vec<constructor_elt, va_gc> *ret_ctor_elts;
2657
2658   /* Is STMT a vectorizable call?   */
2659   if (!is_gimple_call (stmt))
2660     return false;
2661
2662   fndecl = gimple_call_fndecl (stmt);
2663   if (fndecl == NULL_TREE)
2664     return false;
2665
2666   struct cgraph_node *node = cgraph_node::get (fndecl);
2667   if (node == NULL || node->simd_clones == NULL)
2668     return false;
2669
2670   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2671     return false;
2672
2673   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2674     return false;
2675
2676   if (gimple_call_lhs (stmt)
2677       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2678     return false;
2679
2680   gcc_checking_assert (!stmt_can_throw_internal (stmt));
2681
2682   vectype = STMT_VINFO_VECTYPE (stmt_info);
2683
2684   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2685     return false;
2686
2687   /* FORNOW */
2688   if (slp_node || PURE_SLP_STMT (stmt_info))
2689     return false;
2690
2691   /* Process function arguments.  */
2692   nargs = gimple_call_num_args (stmt);
2693
2694   /* Bail out if the function has zero arguments.  */
2695   if (nargs == 0)
2696     return false;
2697
2698   arginfo.create (nargs);
2699
2700   for (i = 0; i < nargs; i++)
2701     {
2702       simd_call_arg_info thisarginfo;
2703       affine_iv iv;
2704
2705       thisarginfo.linear_step = 0;
2706       thisarginfo.align = 0;
2707       thisarginfo.op = NULL_TREE;
2708
2709       op = gimple_call_arg (stmt, i);
2710       if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2711                                  &def_stmt, &def, &thisarginfo.dt,
2712                                  &thisarginfo.vectype)
2713           || thisarginfo.dt == vect_uninitialized_def)
2714         {
2715           if (dump_enabled_p ())
2716             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2717                              "use not simple.\n");
2718           arginfo.release ();
2719           return false;
2720         }
2721
2722       if (thisarginfo.dt == vect_constant_def
2723           || thisarginfo.dt == vect_external_def)
2724         gcc_assert (thisarginfo.vectype == NULL_TREE);
2725       else
2726         gcc_assert (thisarginfo.vectype != NULL_TREE);
2727
2728       /* For linear arguments, the analyze phase should have saved
2729          the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
2730       if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2731           && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2732         {
2733           gcc_assert (vec_stmt);
2734           thisarginfo.linear_step
2735             = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2736           thisarginfo.op
2737             = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2738           /* If loop has been peeled for alignment, we need to adjust it.  */
2739           tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2740           tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2741           if (n1 != n2)
2742             {
2743               tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2744               tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2745               tree opt = TREE_TYPE (thisarginfo.op);
2746               bias = fold_convert (TREE_TYPE (step), bias);
2747               bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2748               thisarginfo.op
2749                 = fold_build2 (POINTER_TYPE_P (opt)
2750                                ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2751                                thisarginfo.op, bias);
2752             }
2753         }
2754       else if (!vec_stmt
2755                && thisarginfo.dt != vect_constant_def
2756                && thisarginfo.dt != vect_external_def
2757                && loop_vinfo
2758                && TREE_CODE (op) == SSA_NAME
2759                && simple_iv (loop, loop_containing_stmt (stmt), op,
2760                              &iv, false)
2761                && tree_fits_shwi_p (iv.step))
2762         {
2763           thisarginfo.linear_step = tree_to_shwi (iv.step);
2764           thisarginfo.op = iv.base;
2765         }
2766       else if ((thisarginfo.dt == vect_constant_def
2767                 || thisarginfo.dt == vect_external_def)
2768                && POINTER_TYPE_P (TREE_TYPE (op)))
2769         thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2770
2771       arginfo.quick_push (thisarginfo);
2772     }
2773
2774   unsigned int badness = 0;
2775   struct cgraph_node *bestn = NULL;
2776   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2777     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2778   else
2779     for (struct cgraph_node *n = node->simd_clones; n != NULL;
2780          n = n->simdclone->next_clone)
2781       {
2782         unsigned int this_badness = 0;
2783         if (n->simdclone->simdlen
2784             > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2785             || n->simdclone->nargs != nargs)
2786           continue;
2787         if (n->simdclone->simdlen
2788             < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2789           this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2790                            - exact_log2 (n->simdclone->simdlen)) * 1024;
2791         if (n->simdclone->inbranch)
2792           this_badness += 2048;
2793         int target_badness = targetm.simd_clone.usable (n);
2794         if (target_badness < 0)
2795           continue;
2796         this_badness += target_badness * 512;
2797         /* FORNOW: Have to add code to add the mask argument.  */
2798         if (n->simdclone->inbranch)
2799           continue;
2800         for (i = 0; i < nargs; i++)
2801           {
2802             switch (n->simdclone->args[i].arg_type)
2803               {
2804               case SIMD_CLONE_ARG_TYPE_VECTOR:
2805                 if (!useless_type_conversion_p
2806                         (n->simdclone->args[i].orig_type,
2807                          TREE_TYPE (gimple_call_arg (stmt, i))))
2808                   i = -1;
2809                 else if (arginfo[i].dt == vect_constant_def
2810                          || arginfo[i].dt == vect_external_def
2811                          || arginfo[i].linear_step)
2812                   this_badness += 64;
2813                 break;
2814               case SIMD_CLONE_ARG_TYPE_UNIFORM:
2815                 if (arginfo[i].dt != vect_constant_def
2816                     && arginfo[i].dt != vect_external_def)
2817                   i = -1;
2818                 break;
2819               case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2820                 if (arginfo[i].dt == vect_constant_def
2821                     || arginfo[i].dt == vect_external_def
2822                     || (arginfo[i].linear_step
2823                         != n->simdclone->args[i].linear_step))
2824                   i = -1;
2825                 break;
2826               case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2827                 /* FORNOW */
2828                 i = -1;
2829                 break;
2830               case SIMD_CLONE_ARG_TYPE_MASK:
2831                 gcc_unreachable ();
2832               }
2833             if (i == (size_t) -1)
2834               break;
2835             if (n->simdclone->args[i].alignment > arginfo[i].align)
2836               {
2837                 i = -1;
2838                 break;
2839               }
2840             if (arginfo[i].align)
2841               this_badness += (exact_log2 (arginfo[i].align)
2842                                - exact_log2 (n->simdclone->args[i].alignment));
2843           }
2844         if (i == (size_t) -1)
2845           continue;
2846         if (bestn == NULL || this_badness < badness)
2847           {
2848             bestn = n;
2849             badness = this_badness;
2850           }
2851       }
2852
2853   if (bestn == NULL)
2854     {
2855       arginfo.release ();
2856       return false;
2857     }
2858
2859   for (i = 0; i < nargs; i++)
2860     if ((arginfo[i].dt == vect_constant_def
2861          || arginfo[i].dt == vect_external_def)
2862         && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2863       {
2864         arginfo[i].vectype
2865           = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2866                                                                      i)));
2867         if (arginfo[i].vectype == NULL
2868             || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2869                 > bestn->simdclone->simdlen))
2870           {
2871             arginfo.release ();
2872             return false;
2873           }
2874       }
2875
2876   fndecl = bestn->decl;
2877   nunits = bestn->simdclone->simdlen;
2878   ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2879
2880   /* If the function isn't const, only allow it in simd loops where user
2881      has asserted that at least nunits consecutive iterations can be
2882      performed using SIMD instructions.  */
2883   if ((loop == NULL || (unsigned) loop->safelen < nunits)
2884       && gimple_vuse (stmt))
2885     {
2886       arginfo.release ();
2887       return false;
2888     }
2889
2890   /* Sanity check: make sure that at least one copy of the vectorized stmt
2891      needs to be generated.  */
2892   gcc_assert (ncopies >= 1);
2893
2894   if (!vec_stmt) /* transformation not required.  */
2895     {
2896       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2897       for (i = 0; i < nargs; i++)
2898         if (bestn->simdclone->args[i].arg_type
2899             == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2900           {
2901             STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2902                                                                         + 1);
2903             STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2904             tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2905                        ? size_type_node : TREE_TYPE (arginfo[i].op);
2906             tree ls = build_int_cst (lst, arginfo[i].linear_step);
2907             STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2908           }
2909       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2910       if (dump_enabled_p ())
2911         dump_printf_loc (MSG_NOTE, vect_location,
2912                          "=== vectorizable_simd_clone_call ===\n");
2913 /*      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2914       arginfo.release ();
2915       return true;
2916     }
2917
2918   /** Transform.  **/
2919
2920   if (dump_enabled_p ())
2921     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2922
2923   /* Handle def.  */
2924   scalar_dest = gimple_call_lhs (stmt);
2925   vec_dest = NULL_TREE;
2926   rtype = NULL_TREE;
2927   ratype = NULL_TREE;
2928   if (scalar_dest)
2929     {
2930       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2931       rtype = TREE_TYPE (TREE_TYPE (fndecl));
2932       if (TREE_CODE (rtype) == ARRAY_TYPE)
2933         {
2934           ratype = rtype;
2935           rtype = TREE_TYPE (ratype);
2936         }
2937     }
2938
2939   prev_stmt_info = NULL;
2940   for (j = 0; j < ncopies; ++j)
2941     {
2942       /* Build argument list for the vectorized call.  */
2943       if (j == 0)
2944         vargs.create (nargs);
2945       else
2946         vargs.truncate (0);
2947
2948       for (i = 0; i < nargs; i++)
2949         {
2950           unsigned int k, l, m, o;
2951           tree atype;
2952           op = gimple_call_arg (stmt, i);
2953           switch (bestn->simdclone->args[i].arg_type)
2954             {
2955             case SIMD_CLONE_ARG_TYPE_VECTOR:
2956               atype = bestn->simdclone->args[i].vector_type;
2957               o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2958               for (m = j * o; m < (j + 1) * o; m++)
2959                 {
2960                   if (TYPE_VECTOR_SUBPARTS (atype)
2961                       < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2962                     {
2963                       unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2964                       k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2965                            / TYPE_VECTOR_SUBPARTS (atype));
2966                       gcc_assert ((k & (k - 1)) == 0);
2967                       if (m == 0)
2968                         vec_oprnd0
2969                           = vect_get_vec_def_for_operand (op, stmt, NULL);
2970                       else
2971                         {
2972                           vec_oprnd0 = arginfo[i].op;
2973                           if ((m & (k - 1)) == 0)
2974                             vec_oprnd0
2975                               = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2976                                                                 vec_oprnd0);
2977                         }
2978                       arginfo[i].op = vec_oprnd0;
2979                       vec_oprnd0
2980                         = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2981                                   size_int (prec),
2982                                   bitsize_int ((m & (k - 1)) * prec));
2983                       new_stmt
2984                         = gimple_build_assign (make_ssa_name (atype),
2985                                                vec_oprnd0);
2986                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
2987                       vargs.safe_push (gimple_assign_lhs (new_stmt));
2988                     }
2989                   else
2990                     {
2991                       k = (TYPE_VECTOR_SUBPARTS (atype)
2992                            / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2993                       gcc_assert ((k & (k - 1)) == 0);
2994                       vec<constructor_elt, va_gc> *ctor_elts;
2995                       if (k != 1)
2996                         vec_alloc (ctor_elts, k);
2997                       else
2998                         ctor_elts = NULL;
2999                       for (l = 0; l < k; l++)
3000                         {
3001                           if (m == 0 && l == 0)
3002                             vec_oprnd0
3003                               = vect_get_vec_def_for_operand (op, stmt, NULL);
3004                           else
3005                             vec_oprnd0
3006                               = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3007                                                                 arginfo[i].op);
3008                           arginfo[i].op = vec_oprnd0;
3009                           if (k == 1)
3010                             break;
3011                           CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3012                                                   vec_oprnd0);
3013                         }
3014                       if (k == 1)
3015                         vargs.safe_push (vec_oprnd0);
3016                       else
3017                         {
3018                           vec_oprnd0 = build_constructor (atype, ctor_elts);
3019                           new_stmt
3020                             = gimple_build_assign (make_ssa_name (atype),
3021                                                    vec_oprnd0);
3022                           vect_finish_stmt_generation (stmt, new_stmt, gsi);
3023                           vargs.safe_push (gimple_assign_lhs (new_stmt));
3024                         }
3025                     }
3026                 }
3027               break;
3028             case SIMD_CLONE_ARG_TYPE_UNIFORM:
3029               vargs.safe_push (op);
3030               break;
3031             case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3032               if (j == 0)
3033                 {
3034                   gimple_seq stmts;
3035                   arginfo[i].op
3036                     = force_gimple_operand (arginfo[i].op, &stmts, true,
3037                                             NULL_TREE);
3038                   if (stmts != NULL)
3039                     {
3040                       basic_block new_bb;
3041                       edge pe = loop_preheader_edge (loop);
3042                       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3043                       gcc_assert (!new_bb);
3044                     }
3045                   tree phi_res = copy_ssa_name (op);
3046                   gphi *new_phi = create_phi_node (phi_res, loop->header);
3047                   set_vinfo_for_stmt (new_phi,
3048                                       new_stmt_vec_info (new_phi, loop_vinfo,
3049                                                          NULL));
3050                   add_phi_arg (new_phi, arginfo[i].op,
3051                                loop_preheader_edge (loop), UNKNOWN_LOCATION);
3052                   enum tree_code code
3053                     = POINTER_TYPE_P (TREE_TYPE (op))
3054                       ? POINTER_PLUS_EXPR : PLUS_EXPR;
3055                   tree type = POINTER_TYPE_P (TREE_TYPE (op))
3056                               ? sizetype : TREE_TYPE (op);
3057                   widest_int cst
3058                     = wi::mul (bestn->simdclone->args[i].linear_step,
3059                                ncopies * nunits);
3060                   tree tcst = wide_int_to_tree (type, cst);
3061                   tree phi_arg = copy_ssa_name (op);
3062                   new_stmt
3063                     = gimple_build_assign (phi_arg, code, phi_res, tcst);
3064                   gimple_stmt_iterator si = gsi_after_labels (loop->header);
3065                   gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3066                   set_vinfo_for_stmt (new_stmt,
3067                                       new_stmt_vec_info (new_stmt, loop_vinfo,
3068                                                          NULL));
3069                   add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3070                                UNKNOWN_LOCATION);
3071                   arginfo[i].op = phi_res;
3072                   vargs.safe_push (phi_res);
3073                 }
3074               else
3075                 {
3076                   enum tree_code code
3077                     = POINTER_TYPE_P (TREE_TYPE (op))
3078                       ? POINTER_PLUS_EXPR : PLUS_EXPR;
3079                   tree type = POINTER_TYPE_P (TREE_TYPE (op))
3080                               ? sizetype : TREE_TYPE (op);
3081                   widest_int cst
3082                     = wi::mul (bestn->simdclone->args[i].linear_step,
3083                                j * nunits);
3084                   tree tcst = wide_int_to_tree (type, cst);
3085                   new_temp = make_ssa_name (TREE_TYPE (op));
3086                   new_stmt = gimple_build_assign (new_temp, code,
3087                                                   arginfo[i].op, tcst);
3088                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
3089                   vargs.safe_push (new_temp);
3090                 }
3091               break;
3092             case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3093             default:
3094               gcc_unreachable ();
3095             }
3096         }
3097
3098       new_stmt = gimple_build_call_vec (fndecl, vargs);
3099       if (vec_dest)
3100         {
3101           gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3102           if (ratype)
3103             new_temp = create_tmp_var (ratype);
3104           else if (TYPE_VECTOR_SUBPARTS (vectype)
3105                    == TYPE_VECTOR_SUBPARTS (rtype))
3106             new_temp = make_ssa_name (vec_dest, new_stmt);
3107           else
3108             new_temp = make_ssa_name (rtype, new_stmt);
3109           gimple_call_set_lhs (new_stmt, new_temp);
3110         }
3111       vect_finish_stmt_generation (stmt, new_stmt, gsi);
3112
3113       if (vec_dest)
3114         {
3115           if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3116             {
3117               unsigned int k, l;
3118               unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3119               k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3120               gcc_assert ((k & (k - 1)) == 0);
3121               for (l = 0; l < k; l++)
3122                 {
3123                   tree t;
3124                   if (ratype)
3125                     {
3126                       t = build_fold_addr_expr (new_temp);
3127                       t = build2 (MEM_REF, vectype, t,
3128                                   build_int_cst (TREE_TYPE (t),
3129                                                  l * prec / BITS_PER_UNIT));
3130                     }
3131                   else
3132                     t = build3 (BIT_FIELD_REF, vectype, new_temp,
3133                                 size_int (prec), bitsize_int (l * prec));
3134                   new_stmt
3135                     = gimple_build_assign (make_ssa_name (vectype), t);
3136                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
3137                   if (j == 0 && l == 0)
3138                     STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3139                   else
3140                     STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3141
3142                   prev_stmt_info = vinfo_for_stmt (new_stmt);
3143                 }
3144
3145               if (ratype)
3146                 {
3147                   tree clobber = build_constructor (ratype, NULL);
3148                   TREE_THIS_VOLATILE (clobber) = 1;
3149                   new_stmt = gimple_build_assign (new_temp, clobber);
3150                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
3151                 }
3152               continue;
3153             }
3154           else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3155             {
3156               unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3157                                 / TYPE_VECTOR_SUBPARTS (rtype));
3158               gcc_assert ((k & (k - 1)) == 0);
3159               if ((j & (k - 1)) == 0)
3160                 vec_alloc (ret_ctor_elts, k);
3161               if (ratype)
3162                 {
3163                   unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3164                   for (m = 0; m < o; m++)
3165                     {
3166                       tree tem = build4 (ARRAY_REF, rtype, new_temp,
3167                                          size_int (m), NULL_TREE, NULL_TREE);
3168                       new_stmt
3169                         = gimple_build_assign (make_ssa_name (rtype), tem);
3170                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
3171                       CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3172                                               gimple_assign_lhs (new_stmt));
3173                     }
3174                   tree clobber = build_constructor (ratype, NULL);
3175                   TREE_THIS_VOLATILE (clobber) = 1;
3176                   new_stmt = gimple_build_assign (new_temp, clobber);
3177                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
3178                 }
3179               else
3180                 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3181               if ((j & (k - 1)) != k - 1)
3182                 continue;
3183               vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3184               new_stmt
3185                 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3186               vect_finish_stmt_generation (stmt, new_stmt, gsi);
3187
3188               if ((unsigned) j == k - 1)
3189                 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3190               else
3191                 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3192
3193               prev_stmt_info = vinfo_for_stmt (new_stmt);
3194               continue;
3195             }
3196           else if (ratype)
3197             {
3198               tree t = build_fold_addr_expr (new_temp);
3199               t = build2 (MEM_REF, vectype, t,
3200                           build_int_cst (TREE_TYPE (t), 0));
3201               new_stmt
3202                 = gimple_build_assign (make_ssa_name (vec_dest), t);
3203               vect_finish_stmt_generation (stmt, new_stmt, gsi);
3204               tree clobber = build_constructor (ratype, NULL);
3205               TREE_THIS_VOLATILE (clobber) = 1;
3206               vect_finish_stmt_generation (stmt,
3207                                            gimple_build_assign (new_temp,
3208                                                                 clobber), gsi);
3209             }
3210         }
3211
3212       if (j == 0)
3213         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3214       else
3215         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3216
3217       prev_stmt_info = vinfo_for_stmt (new_stmt);
3218     }
3219
3220   vargs.release ();
3221
3222   /* The call in STMT might prevent it from being removed in dce.
3223      We however cannot remove it here, due to the way the ssa name
3224      it defines is mapped to the new definition.  So just replace
3225      rhs of the statement with something harmless.  */
3226
3227   if (slp_node)
3228     return true;
3229
3230   if (scalar_dest)
3231     {
3232       type = TREE_TYPE (scalar_dest);
3233       if (is_pattern_stmt_p (stmt_info))
3234         lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3235       else
3236         lhs = gimple_call_lhs (stmt);
3237       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3238     }
3239   else
3240     new_stmt = gimple_build_nop ();
3241   set_vinfo_for_stmt (new_stmt, stmt_info);
3242   set_vinfo_for_stmt (stmt, NULL);
3243   STMT_VINFO_STMT (stmt_info) = new_stmt;
3244   gsi_replace (gsi, new_stmt, true);
3245   unlink_stmt_vdef (stmt);
3246
3247   return true;
3248 }
3249
3250
3251 /* Function vect_gen_widened_results_half
3252
3253    Create a vector stmt whose code, type, number of arguments, and result
3254    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3255    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
3256    In the case that CODE is a CALL_EXPR, this means that a call to DECL
3257    needs to be created (DECL is a function-decl of a target-builtin).
3258    STMT is the original scalar stmt that we are vectorizing.  */
3259
3260 static gimple
3261 vect_gen_widened_results_half (enum tree_code code,
3262                                tree decl,
3263                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
3264                                tree vec_dest, gimple_stmt_iterator *gsi,
3265                                gimple stmt)
3266 {
3267   gimple new_stmt;
3268   tree new_temp;
3269
3270   /* Generate half of the widened result:  */
3271   if (code == CALL_EXPR)
3272     {
3273       /* Target specific support  */
3274       if (op_type == binary_op)
3275         new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3276       else
3277         new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3278       new_temp = make_ssa_name (vec_dest, new_stmt);
3279       gimple_call_set_lhs (new_stmt, new_temp);
3280     }
3281   else
3282     {
3283       /* Generic support */
3284       gcc_assert (op_type == TREE_CODE_LENGTH (code));
3285       if (op_type != binary_op)
3286         vec_oprnd1 = NULL;
3287       new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3288       new_temp = make_ssa_name (vec_dest, new_stmt);
3289       gimple_assign_set_lhs (new_stmt, new_temp);
3290     }
3291   vect_finish_stmt_generation (stmt, new_stmt, gsi);
3292
3293   return new_stmt;
3294 }
3295
3296
3297 /* Get vectorized definitions for loop-based vectorization.  For the first
3298    operand we call vect_get_vec_def_for_operand() (with OPRND containing
3299    scalar operand), and for the rest we get a copy with
3300    vect_get_vec_def_for_stmt_copy() using the previous vector definition
3301    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3302    The vectors are collected into VEC_OPRNDS.  */
3303
3304 static void
3305 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3306                           vec<tree> *vec_oprnds, int multi_step_cvt)
3307 {
3308   tree vec_oprnd;
3309
3310   /* Get first vector operand.  */
3311   /* All the vector operands except the very first one (that is scalar oprnd)
3312      are stmt copies.  */
3313   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3314     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3315   else
3316     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3317
3318   vec_oprnds->quick_push (vec_oprnd);
3319
3320   /* Get second vector operand.  */
3321   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3322   vec_oprnds->quick_push (vec_oprnd);
3323
3324   *oprnd = vec_oprnd;
3325
3326   /* For conversion in multiple steps, continue to get operands
3327      recursively.  */
3328   if (multi_step_cvt)
3329     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
3330 }
3331
3332
3333 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3334    For multi-step conversions store the resulting vectors and call the function
3335    recursively.  */
3336
3337 static void
3338 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3339                                        int multi_step_cvt, gimple stmt,
3340                                        vec<tree> vec_dsts,
3341                                        gimple_stmt_iterator *gsi,
3342                                        slp_tree slp_node, enum tree_code code,
3343                                        stmt_vec_info *prev_stmt_info)
3344 {
3345   unsigned int i;
3346   tree vop0, vop1, new_tmp, vec_dest;
3347   gimple new_stmt;
3348   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3349
3350   vec_dest = vec_dsts.pop ();
3351
3352   for (i = 0; i < vec_oprnds->length (); i += 2)
3353     {
3354       /* Create demotion operation.  */
3355       vop0 = (*vec_oprnds)[i];
3356       vop1 = (*vec_oprnds)[i + 1];
3357       new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3358       new_tmp = make_ssa_name (vec_dest, new_stmt);
3359       gimple_assign_set_lhs (new_stmt, new_tmp);
3360       vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361
3362       if (multi_step_cvt)
3363         /* Store the resulting vector for next recursive call.  */
3364         (*vec_oprnds)[i/2] = new_tmp;
3365       else
3366         {
3367           /* This is the last step of the conversion sequence. Store the
3368              vectors in SLP_NODE or in vector info of the scalar statement
3369              (or in STMT_VINFO_RELATED_STMT chain).  */
3370           if (slp_node)
3371             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3372           else
3373             {
3374               if (!*prev_stmt_info)
3375                 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3376               else
3377                 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3378
3379               *prev_stmt_info = vinfo_for_stmt (new_stmt);
3380             }
3381         }
3382     }
3383
3384   /* For multi-step demotion operations we first generate demotion operations
3385      from the source type to the intermediate types, and then combine the
3386      results (stored in VEC_OPRNDS) in demotion operation to the destination
3387      type.  */
3388   if (multi_step_cvt)
3389     {
3390       /* At each level of recursion we have half of the operands we had at the
3391          previous level.  */
3392       vec_oprnds->truncate ((i+1)/2);
3393       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3394                                              stmt, vec_dsts, gsi, slp_node,
3395                                              VEC_PACK_TRUNC_EXPR,
3396                                              prev_stmt_info);
3397     }
3398
3399   vec_dsts.quick_push (vec_dest);
3400 }
3401
3402
3403 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3404    and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
3405    the resulting vectors and call the function recursively.  */
3406
3407 static void
3408 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3409                                         vec<tree> *vec_oprnds1,
3410                                         gimple stmt, tree vec_dest,
3411                                         gimple_stmt_iterator *gsi,
3412                                         enum tree_code code1,
3413                                         enum tree_code code2, tree decl1,
3414                                         tree decl2, int op_type)
3415 {
3416   int i;
3417   tree vop0, vop1, new_tmp1, new_tmp2;
3418   gimple new_stmt1, new_stmt2;
3419   vec<tree> vec_tmp = vNULL;
3420
3421   vec_tmp.create (vec_oprnds0->length () * 2);
3422   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3423     {
3424       if (op_type == binary_op)
3425         vop1 = (*vec_oprnds1)[i];
3426       else
3427         vop1 = NULL_TREE;
3428
3429       /* Generate the two halves of promotion operation.  */
3430       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3431                                                  op_type, vec_dest, gsi, stmt);
3432       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3433                                                  op_type, vec_dest, gsi, stmt);
3434       if (is_gimple_call (new_stmt1))
3435         {
3436           new_tmp1 = gimple_call_lhs (new_stmt1);
3437           new_tmp2 = gimple_call_lhs (new_stmt2);
3438         }
3439       else
3440         {
3441           new_tmp1 = gimple_assign_lhs (new_stmt1);
3442           new_tmp2 = gimple_assign_lhs (new_stmt2);
3443         }
3444
3445       /* Store the results for the next step.  */
3446       vec_tmp.quick_push (new_tmp1);
3447       vec_tmp.quick_push (new_tmp2);
3448     }
3449
3450   vec_oprnds0->release ();
3451   *vec_oprnds0 = vec_tmp;
3452 }
3453
3454
3455 /* Check if STMT performs a conversion operation, that can be vectorized.
3456    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3457    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3458    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3459
3460 static bool
3461 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3462                          gimple *vec_stmt, slp_tree slp_node)
3463 {
3464   tree vec_dest;
3465   tree scalar_dest;
3466   tree op0, op1 = NULL_TREE;
3467   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3468   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3469   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3470   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3471   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3472   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3473   tree new_temp;
3474   tree def;
3475   gimple def_stmt;
3476   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3477   gimple new_stmt = NULL;
3478   stmt_vec_info prev_stmt_info;
3479   int nunits_in;
3480   int nunits_out;
3481   tree vectype_out, vectype_in;
3482   int ncopies, i, j;
3483   tree lhs_type, rhs_type;
3484   enum { NARROW, NONE, WIDEN } modifier;
3485   vec<tree> vec_oprnds0 = vNULL;
3486   vec<tree> vec_oprnds1 = vNULL;
3487   tree vop0;
3488   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3489   int multi_step_cvt = 0;
3490   vec<tree> vec_dsts = vNULL;
3491   vec<tree> interm_types = vNULL;
3492   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3493   int op_type;
3494   machine_mode rhs_mode;
3495   unsigned short fltsz;
3496
3497   /* Is STMT a vectorizable conversion?   */
3498
3499   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3500     return false;
3501
3502   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3503     return false;
3504
3505   if (!is_gimple_assign (stmt))
3506     return false;
3507
3508   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3509     return false;
3510
3511   code = gimple_assign_rhs_code (stmt);
3512   if (!CONVERT_EXPR_CODE_P (code)
3513       && code != FIX_TRUNC_EXPR
3514       && code != FLOAT_EXPR
3515       && code != WIDEN_MULT_EXPR
3516       && code != WIDEN_LSHIFT_EXPR)
3517     return false;
3518
3519   op_type = TREE_CODE_LENGTH (code);
3520
3521   /* Check types of lhs and rhs.  */
3522   scalar_dest = gimple_assign_lhs (stmt);
3523   lhs_type = TREE_TYPE (scalar_dest);
3524   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3525
3526   op0 = gimple_assign_rhs1 (stmt);
3527   rhs_type = TREE_TYPE (op0);
3528
3529   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3530       && !((INTEGRAL_TYPE_P (lhs_type)
3531             && INTEGRAL_TYPE_P (rhs_type))
3532            || (SCALAR_FLOAT_TYPE_P (lhs_type)
3533                && SCALAR_FLOAT_TYPE_P (rhs_type))))
3534     return false;
3535
3536   if ((INTEGRAL_TYPE_P (lhs_type)
3537        && (TYPE_PRECISION (lhs_type)
3538            != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3539       || (INTEGRAL_TYPE_P (rhs_type)
3540           && (TYPE_PRECISION (rhs_type)
3541               != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3542     {
3543       if (dump_enabled_p ())
3544         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3545                          "type conversion to/from bit-precision unsupported."
3546                          "\n");
3547       return false;
3548     }
3549
3550   /* Check the operands of the operation.  */
3551   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3552                              &def_stmt, &def, &dt[0], &vectype_in))
3553     {
3554       if (dump_enabled_p ())
3555         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3556                          "use not simple.\n");
3557       return false;
3558     }
3559   if (op_type == binary_op)
3560     {
3561       bool ok;
3562
3563       op1 = gimple_assign_rhs2 (stmt);
3564       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3565       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3566          OP1.  */
3567       if (CONSTANT_CLASS_P (op0))
3568         ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3569                                    &def_stmt, &def, &dt[1], &vectype_in);
3570       else
3571         ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3572                                  &def, &dt[1]);
3573
3574       if (!ok)
3575         {
3576           if (dump_enabled_p ())
3577             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3578                              "use not simple.\n");
3579           return false;
3580         }
3581     }
3582
3583   /* If op0 is an external or constant defs use a vector type of
3584      the same size as the output vector type.  */
3585   if (!vectype_in)
3586     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3587   if (vec_stmt)
3588     gcc_assert (vectype_in);
3589   if (!vectype_in)
3590     {
3591       if (dump_enabled_p ())
3592         {
3593           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3594                            "no vectype for scalar type ");
3595           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3596           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3597         }
3598
3599       return false;
3600     }
3601
3602   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3603   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3604   if (nunits_in < nunits_out)
3605     modifier = NARROW;
3606   else if (nunits_out == nunits_in)
3607     modifier = NONE;
3608   else
3609     modifier = WIDEN;
3610
3611   /* Multiple types in SLP are handled by creating the appropriate number of
3612      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3613      case of SLP.  */
3614   if (slp_node || PURE_SLP_STMT (stmt_info))
3615     ncopies = 1;
3616   else if (modifier == NARROW)
3617     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3618   else
3619     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3620
3621   /* Sanity check: make sure that at least one copy of the vectorized stmt
3622      needs to be generated.  */
3623   gcc_assert (ncopies >= 1);
3624
3625   /* Supportable by target?  */
3626   switch (modifier)
3627     {
3628     case NONE:
3629       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3630         return false;
3631       if (supportable_convert_operation (code, vectype_out, vectype_in,
3632                                          &decl1, &code1))
3633         break;
3634       /* FALLTHRU */
3635     unsupported:
3636       if (dump_enabled_p ())
3637         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3638                          "conversion not supported by target.\n");
3639       return false;
3640
3641     case WIDEN:
3642       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3643                                           &code1, &code2, &multi_step_cvt,
3644                                           &interm_types))
3645         {
3646           /* Binary widening operation can only be supported directly by the
3647              architecture.  */
3648           gcc_assert (!(multi_step_cvt && op_type == binary_op));
3649           break;
3650         }
3651
3652       if (code != FLOAT_EXPR
3653           || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3654               <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3655         goto unsupported;
3656
3657       rhs_mode = TYPE_MODE (rhs_type);
3658       fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3659       for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3660            rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3661            rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3662         {
3663           cvt_type
3664             = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3665           cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3666           if (cvt_type == NULL_TREE)
3667             goto unsupported;
3668
3669           if (GET_MODE_SIZE (rhs_mode) == fltsz)
3670             {
3671               if (!supportable_convert_operation (code, vectype_out,
3672                                                   cvt_type, &decl1, &codecvt1))
3673                 goto unsupported;
3674             }
3675           else if (!supportable_widening_operation (code, stmt, vectype_out,
3676                                                     cvt_type, &codecvt1,
3677                                                     &codecvt2, &multi_step_cvt,
3678                                                     &interm_types))
3679             continue;
3680           else
3681             gcc_assert (multi_step_cvt == 0);
3682
3683           if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3684                                               vectype_in, &code1, &code2,
3685                                               &multi_step_cvt, &interm_types))
3686             break;
3687         }
3688
3689       if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3690         goto unsupported;
3691
3692       if (GET_MODE_SIZE (rhs_mode) == fltsz)
3693         codecvt2 = ERROR_MARK;
3694       else
3695         {
3696           multi_step_cvt++;
3697           interm_types.safe_push (cvt_type);
3698           cvt_type = NULL_TREE;
3699         }
3700       break;
3701
3702     case NARROW:
3703       gcc_assert (op_type == unary_op);
3704       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3705                                            &code1, &multi_step_cvt,
3706                                            &interm_types))
3707         break;
3708
3709       if (code != FIX_TRUNC_EXPR
3710           || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3711               >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3712         goto unsupported;
3713
3714       rhs_mode = TYPE_MODE (rhs_type);
3715       cvt_type
3716         = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3717       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3718       if (cvt_type == NULL_TREE)
3719         goto unsupported;
3720       if (!supportable_convert_operation (code, cvt_type, vectype_in,
3721                                           &decl1, &codecvt1))
3722         goto unsupported;
3723       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3724                                            &code1, &multi_step_cvt,
3725                                            &interm_types))
3726         break;
3727       goto unsupported;
3728
3729     default:
3730       gcc_unreachable ();
3731     }
3732
3733   if (!vec_stmt)                /* transformation not required.  */
3734     {
3735       if (dump_enabled_p ())
3736         dump_printf_loc (MSG_NOTE, vect_location,
3737                          "=== vectorizable_conversion ===\n");
3738       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3739         {
3740           STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3741           vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3742         }
3743       else if (modifier == NARROW)
3744         {
3745           STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3746           vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3747         }
3748       else
3749         {
3750           STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3751           vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3752         }
3753       interm_types.release ();
3754       return true;
3755     }
3756
3757   /** Transform.  **/
3758   if (dump_enabled_p ())
3759     dump_printf_loc (MSG_NOTE, vect_location,
3760                      "transform conversion. ncopies = %d.\n", ncopies);
3761
3762   if (op_type == binary_op)
3763     {
3764       if (CONSTANT_CLASS_P (op0))
3765         op0 = fold_convert (TREE_TYPE (op1), op0);
3766       else if (CONSTANT_CLASS_P (op1))
3767         op1 = fold_convert (TREE_TYPE (op0), op1);
3768     }
3769
3770   /* In case of multi-step conversion, we first generate conversion operations
3771      to the intermediate types, and then from that types to the final one.
3772      We create vector destinations for the intermediate type (TYPES) received
3773      from supportable_*_operation, and store them in the correct order
3774      for future use in vect_create_vectorized_*_stmts ().  */
3775   vec_dsts.create (multi_step_cvt + 1);
3776   vec_dest = vect_create_destination_var (scalar_dest,
3777                                           (cvt_type && modifier == WIDEN)
3778                                           ? cvt_type : vectype_out);
3779   vec_dsts.quick_push (vec_dest);
3780
3781   if (multi_step_cvt)
3782     {
3783       for (i = interm_types.length () - 1;
3784            interm_types.iterate (i, &intermediate_type); i--)
3785         {
3786           vec_dest = vect_create_destination_var (scalar_dest,
3787                                                   intermediate_type);
3788           vec_dsts.quick_push (vec_dest);
3789         }
3790     }
3791
3792   if (cvt_type)
3793     vec_dest = vect_create_destination_var (scalar_dest,
3794                                             modifier == WIDEN
3795                                             ? vectype_out : cvt_type);
3796
3797   if (!slp_node)
3798     {
3799       if (modifier == WIDEN)
3800         {
3801           vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3802           if (op_type == binary_op)
3803             vec_oprnds1.create (1);
3804         }
3805       else if (modifier == NARROW)
3806         vec_oprnds0.create (
3807                    2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3808     }
3809   else if (code == WIDEN_LSHIFT_EXPR)
3810     vec_oprnds1.create (slp_node->vec_stmts_size);
3811
3812   last_oprnd = op0;
3813   prev_stmt_info = NULL;
3814   switch (modifier)
3815     {
3816     case NONE:
3817       for (j = 0; j < ncopies; j++)
3818         {
3819           if (j == 0)
3820             vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3821                                -1);
3822           else
3823             vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0,&nbs