Update gcc-50 to SVN version 221572
[dragonfly.git] / contrib / gcc-5.0 / gcc / tree-vect-stmts.c
CommitLineData
dda118e3
JM
1/* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "dumpfile.h"
26#include "tm.h"
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
36#include "tree.h"
37#include "fold-const.h"
38#include "stor-layout.h"
39#include "target.h"
40#include "predict.h"
41#include "hard-reg-set.h"
42#include "function.h"
43#include "dominance.h"
44#include "cfg.h"
45#include "basic-block.h"
46#include "gimple-pretty-print.h"
47#include "tree-ssa-alias.h"
48#include "internal-fn.h"
49#include "tree-eh.h"
50#include "gimple-expr.h"
51#include "is-a.h"
52#include "gimple.h"
53#include "gimplify.h"
54#include "gimple-iterator.h"
55#include "gimplify-me.h"
56#include "gimple-ssa.h"
57#include "tree-cfg.h"
58#include "tree-phinodes.h"
59#include "ssa-iterators.h"
60#include "stringpool.h"
61#include "tree-ssanames.h"
62#include "tree-ssa-loop-manip.h"
63#include "cfgloop.h"
64#include "tree-ssa-loop.h"
65#include "tree-scalar-evolution.h"
66#include "hashtab.h"
67#include "rtl.h"
68#include "flags.h"
69#include "statistics.h"
70#include "real.h"
71#include "fixed-value.h"
72#include "insn-config.h"
73#include "expmed.h"
74#include "dojump.h"
75#include "explow.h"
76#include "calls.h"
77#include "emit-rtl.h"
78#include "varasm.h"
79#include "stmt.h"
80#include "expr.h"
81#include "recog.h" /* FIXME: for insn_data */
82#include "insn-codes.h"
83#include "optabs.h"
84#include "diagnostic-core.h"
85#include "tree-vectorizer.h"
86#include "hash-map.h"
87#include "plugin-api.h"
88#include "ipa-ref.h"
89#include "cgraph.h"
90#include "builtins.h"
91
92/* For lang_hooks.types.type_for_mode. */
93#include "langhooks.h"
94
95/* Return the vectorized type for the given statement. */
96
97tree
98stmt_vectype (struct _stmt_vec_info *stmt_info)
99{
100 return STMT_VINFO_VECTYPE (stmt_info);
101}
102
103/* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
105bool
106stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
107{
108 gimple stmt = STMT_VINFO_STMT (stmt_info);
109 basic_block bb = gimple_bb (stmt);
110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111 struct loop* loop;
112
113 if (!loop_vinfo)
114 return false;
115
116 loop = LOOP_VINFO_LOOP (loop_vinfo);
117
118 return (bb->loop_father == loop->inner);
119}
120
121/* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
124
125unsigned
126record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
127 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
128 int misalign, enum vect_cost_model_location where)
129{
130 if (body_cost_vec)
131 {
132 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133 add_stmt_info_to_vec (body_cost_vec, count, kind,
134 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135 misalign);
136 return (unsigned)
137 (builtin_vectorization_cost (kind, vectype, misalign) * count);
138
139 }
140 else
141 {
142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144 void *target_cost_data;
145
146 if (loop_vinfo)
147 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148 else
149 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
150
151 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152 misalign, where);
153 }
154}
155
156/* Return a variable of type ELEM_TYPE[NELEMS]. */
157
158static tree
159create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
160{
161 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162 "vect_array");
163}
164
165/* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
169
170static tree
171read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172 tree array, unsigned HOST_WIDE_INT n)
173{
174 tree vect_type, vect, vect_name, array_ref;
175 gimple new_stmt;
176
177 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178 vect_type = TREE_TYPE (TREE_TYPE (array));
179 vect = vect_create_destination_var (scalar_dest, vect_type);
180 array_ref = build4 (ARRAY_REF, vect_type, array,
181 build_int_cst (size_type_node, n),
182 NULL_TREE, NULL_TREE);
183
184 new_stmt = gimple_build_assign (vect, array_ref);
185 vect_name = make_ssa_name (vect, new_stmt);
186 gimple_assign_set_lhs (new_stmt, vect_name);
187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
188
189 return vect_name;
190}
191
192/* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
195
196static void
197write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198 tree array, unsigned HOST_WIDE_INT n)
199{
200 tree array_ref;
201 gimple new_stmt;
202
203 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204 build_int_cst (size_type_node, n),
205 NULL_TREE, NULL_TREE);
206
207 new_stmt = gimple_build_assign (array_ref, vect);
208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
209}
210
211/* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
213 (and its group). */
214
215static tree
216create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
217{
218 tree mem_ref, alias_ptr_type;
219
220 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222 /* Arrays have the same alignment as their type. */
223 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
224 return mem_ref;
225}
226
227/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
228
229/* Function vect_mark_relevant.
230
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
232
233static void
234vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
235 enum vect_relevant relevant, bool live_p,
236 bool used_in_pattern)
237{
238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241 gimple pattern_stmt;
242
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "mark relevant %d, live %d.\n", relevant, live_p);
246
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252 {
253 bool found = false;
254 if (!used_in_pattern)
255 {
256 imm_use_iterator imm_iter;
257 use_operand_p use_p;
258 gimple use_stmt;
259 tree lhs;
260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
262
263 if (is_gimple_assign (stmt))
264 lhs = gimple_assign_lhs (stmt);
265 else
266 lhs = gimple_call_lhs (stmt);
267
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
270 stmt. */
271 if (lhs && TREE_CODE (lhs) == SSA_NAME)
272 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
273 {
274 if (is_gimple_debug (USE_STMT (use_p)))
275 continue;
276 use_stmt = USE_STMT (use_p);
277
278 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279 continue;
280
281 if (vinfo_for_stmt (use_stmt)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
283 {
284 found = true;
285 break;
286 }
287 }
288 }
289
290 if (!found)
291 {
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
296
297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
298
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE, vect_location,
301 "last stmt in pattern. don't mark"
302 " relevant/live.\n");
303 stmt_info = vinfo_for_stmt (pattern_stmt);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307 stmt = pattern_stmt;
308 }
309 }
310
311 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313 STMT_VINFO_RELEVANT (stmt_info) = relevant;
314
315 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
317 {
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "already marked relevant/live.\n");
321 return;
322 }
323
324 worklist->safe_push (stmt);
325}
326
327
328/* Function vect_stmt_relevant_p.
329
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
332
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
337
338 CHECKME: what other side effects would the vectorizer allow? */
339
340static bool
341vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342 enum vect_relevant *relevant, bool *live_p)
343{
344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345 ssa_op_iter op_iter;
346 imm_use_iterator imm_iter;
347 use_operand_p use_p;
348 def_operand_p def_p;
349
350 *relevant = vect_unused_in_scope;
351 *live_p = false;
352
353 /* cond stmt other than loop exit cond. */
354 if (is_ctrl_stmt (stmt)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356 != loop_exit_ctrl_vec_info_type)
357 *relevant = vect_used_in_scope;
358
359 /* changing memory. */
360 if (gimple_code (stmt) != GIMPLE_PHI)
361 if (gimple_vdef (stmt)
362 && !gimple_clobber_p (stmt))
363 {
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_NOTE, vect_location,
366 "vec_stmt_relevant_p: stmt has vdefs.\n");
367 *relevant = vect_used_in_scope;
368 }
369
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
372 {
373 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
374 {
375 basic_block bb = gimple_bb (USE_STMT (use_p));
376 if (!flow_bb_inside_loop_p (loop, bb))
377 {
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_NOTE, vect_location,
380 "vec_stmt_relevant_p: used out of loop.\n");
381
382 if (is_gimple_debug (USE_STMT (use_p)))
383 continue;
384
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388 gcc_assert (bb == single_exit (loop)->dest);
389
390 *live_p = true;
391 }
392 }
393 }
394
395 return (*live_p || *relevant);
396}
397
398
399/* Function exist_non_indexing_operands_for_use_p
400
401 USE is one of the uses attached to STMT. Check if USE is
402 used in STMT for anything other than indexing an array. */
403
404static bool
405exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
406{
407 tree operand;
408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
409
410 /* USE corresponds to some operand in STMT. If there is no data
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info))
414 return true;
415
416 /* STMT has a data_ref. FORNOW this means that its of one of
417 the following forms:
418 -1- ARRAY_REF = var
419 -2- var = ARRAY_REF
420 (This should have been verified in analyze_data_refs).
421
422 'var' in the second case corresponds to a def, not a use,
423 so USE cannot correspond to any operands that are not used
424 for array indexing.
425
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
428
429 if (!gimple_assign_copy_p (stmt))
430 {
431 if (is_gimple_call (stmt)
432 && gimple_call_internal_p (stmt))
433 switch (gimple_call_internal_fn (stmt))
434 {
435 case IFN_MASK_STORE:
436 operand = gimple_call_arg (stmt, 3);
437 if (operand == use)
438 return true;
439 /* FALLTHRU */
440 case IFN_MASK_LOAD:
441 operand = gimple_call_arg (stmt, 2);
442 if (operand == use)
443 return true;
444 break;
445 default:
446 break;
447 }
448 return false;
449 }
450
451 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452 return false;
453 operand = gimple_assign_rhs1 (stmt);
454 if (TREE_CODE (operand) != SSA_NAME)
455 return false;
456
457 if (operand == use)
458 return true;
459
460 return false;
461}
462
463
464/*
465 Function process_use.
466
467 Inputs:
468 - a USE in STMT in a loop represented by LOOP_VINFO
469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470 that defined USE. This is done by calling mark_relevant and passing it
471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473 be performed.
474
475 Outputs:
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480 Exceptions:
481 - case 1: If USE is used only for address computations (e.g. array indexing),
482 which does not need to be directly vectorized, then the liveness/relevance
483 of the respective DEF_STMT is left unchanged.
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
488
489 Return true if everything is as expected. Return false otherwise. */
490
491static bool
492process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
493 enum vect_relevant relevant, vec<gimple> *worklist,
494 bool force)
495{
496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498 stmt_vec_info dstmt_vinfo;
499 basic_block bb, def_bb;
500 tree def;
501 gimple def_stmt;
502 enum vect_def_type dt;
503
504 /* case 1: we are only interested in uses that need to be vectorized. Uses
505 that are used for address computation are not considered relevant. */
506 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
507 return true;
508
509 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
510 {
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "not vectorized: unsupported use in stmt.\n");
514 return false;
515 }
516
517 if (!def_stmt || gimple_nop_p (def_stmt))
518 return true;
519
520 def_bb = gimple_bb (def_stmt);
521 if (!flow_bb_inside_loop_p (loop, def_bb))
522 {
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
525 return true;
526 }
527
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo = vinfo_for_stmt (def_stmt);
534 bb = gimple_bb (stmt);
535 if (gimple_code (stmt) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537 && gimple_code (def_stmt) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539 && bb->loop_father == def_bb->loop_father)
540 {
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "reduc-stmt defining reduc-phi in the same nest.\n");
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
548 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
549 return true;
550 }
551
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
554 d = def_stmt
555 inner-loop:
556 stmt # use (d)
557 outer-loop-tail-bb:
558 ... */
559 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
560 {
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE, vect_location,
563 "outer-loop def-stmt defining inner-loop stmt.\n");
564
565 switch (relevant)
566 {
567 case vect_unused_in_scope:
568 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569 vect_used_in_scope : vect_unused_in_scope;
570 break;
571
572 case vect_used_in_outer_by_reduction:
573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
574 relevant = vect_used_by_reduction;
575 break;
576
577 case vect_used_in_outer:
578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
579 relevant = vect_used_in_scope;
580 break;
581
582 case vect_used_in_scope:
583 break;
584
585 default:
586 gcc_unreachable ();
587 }
588 }
589
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
592 ...
593 inner-loop:
594 d = def_stmt
595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
596 stmt # use (d) */
597 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
598 {
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE, vect_location,
601 "inner-loop def-stmt defining outer-loop stmt.\n");
602
603 switch (relevant)
604 {
605 case vect_unused_in_scope:
606 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
607 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
608 vect_used_in_outer_by_reduction : vect_unused_in_scope;
609 break;
610
611 case vect_used_by_reduction:
612 relevant = vect_used_in_outer_by_reduction;
613 break;
614
615 case vect_used_in_scope:
616 relevant = vect_used_in_outer;
617 break;
618
619 default:
620 gcc_unreachable ();
621 }
622 }
623
624 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625 is_pattern_stmt_p (stmt_vinfo));
626 return true;
627}
628
629
630/* Function vect_mark_stmts_to_be_vectorized.
631
632 Not all stmts in the loop need to be vectorized. For example:
633
634 for i...
635 for j...
636 1. T0 = i + j
637 2. T1 = a[T0]
638
639 3. j = j + 1
640
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
643
644 This pass detects such stmts. */
645
646bool
647vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
648{
649 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651 unsigned int nbbs = loop->num_nodes;
652 gimple_stmt_iterator si;
653 gimple stmt;
654 unsigned int i;
655 stmt_vec_info stmt_vinfo;
656 basic_block bb;
657 gimple phi;
658 bool live_p;
659 enum vect_relevant relevant, tmp_relevant;
660 enum vect_def_type def_type;
661
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_NOTE, vect_location,
664 "=== vect_mark_stmts_to_be_vectorized ===\n");
665
666 auto_vec<gimple, 64> worklist;
667
668 /* 1. Init worklist. */
669 for (i = 0; i < nbbs; i++)
670 {
671 bb = bbs[i];
672 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
673 {
674 phi = gsi_stmt (si);
675 if (dump_enabled_p ())
676 {
677 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
679 }
680
681 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
682 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
683 }
684 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
685 {
686 stmt = gsi_stmt (si);
687 if (dump_enabled_p ())
688 {
689 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
691 }
692
693 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
694 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
695 }
696 }
697
698 /* 2. Process_worklist */
699 while (worklist.length () > 0)
700 {
701 use_operand_p use_p;
702 ssa_op_iter iter;
703
704 stmt = worklist.pop ();
705 if (dump_enabled_p ())
706 {
707 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
709 }
710
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713 liveness and relevance properties of STMT. */
714 stmt_vinfo = vinfo_for_stmt (stmt);
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
717
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
722
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
725 live_p = false
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
728 those that are used by a reduction computation, and those that are
729 (also) used by a regular computation. This allows us later on to
730 identify stmts that are used solely by a reduction, and therefore the
731 order of the results that they produce does not have to be kept. */
732
733 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
734 tmp_relevant = relevant;
735 switch (def_type)
736 {
737 case vect_reduction_def:
738 switch (tmp_relevant)
739 {
740 case vect_unused_in_scope:
741 relevant = vect_used_by_reduction;
742 break;
743
744 case vect_used_by_reduction:
745 if (gimple_code (stmt) == GIMPLE_PHI)
746 break;
747 /* fall through */
748
749 default:
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752 "unsupported use of reduction.\n");
753 return false;
754 }
755
756 live_p = false;
757 break;
758
759 case vect_nested_cycle:
760 if (tmp_relevant != vect_unused_in_scope
761 && tmp_relevant != vect_used_in_outer_by_reduction
762 && tmp_relevant != vect_used_in_outer)
763 {
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766 "unsupported use of nested cycle.\n");
767
768 return false;
769 }
770
771 live_p = false;
772 break;
773
774 case vect_double_reduction_def:
775 if (tmp_relevant != vect_unused_in_scope
776 && tmp_relevant != vect_used_by_reduction)
777 {
778 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
780 "unsupported use of double reduction.\n");
781
782 return false;
783 }
784
785 live_p = false;
786 break;
787
788 default:
789 break;
790 }
791
792 if (is_pattern_stmt_p (stmt_vinfo))
793 {
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt))
798 {
799 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
800 tree op = gimple_assign_rhs1 (stmt);
801
802 i = 1;
803 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
804 {
805 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
806 live_p, relevant, &worklist, false)
807 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
808 live_p, relevant, &worklist, false))
809 return false;
810 i = 2;
811 }
812 for (; i < gimple_num_ops (stmt); i++)
813 {
814 op = gimple_op (stmt, i);
815 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
816 &worklist, false))
817 return false;
818 }
819 }
820 else if (is_gimple_call (stmt))
821 {
822 for (i = 0; i < gimple_call_num_args (stmt); i++)
823 {
824 tree arg = gimple_call_arg (stmt, i);
825 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
826 &worklist, false))
827 return false;
828 }
829 }
830 }
831 else
832 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
833 {
834 tree op = USE_FROM_PTR (use_p);
835 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
836 &worklist, false))
837 return false;
838 }
839
840 if (STMT_VINFO_GATHER_P (stmt_vinfo))
841 {
842 tree off;
843 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
844 gcc_assert (decl);
845 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
846 &worklist, true))
847 return false;
848 }
849 } /* while worklist */
850
851 return true;
852}
853
854
855/* Function vect_model_simple_cost.
856
857 Models cost for simple operations, i.e. those that only emit ncopies of a
858 single op. Right now, this does not account for multiple insns that could
859 be generated for the single vector op. We will handle that shortly. */
860
861void
862vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
863 enum vect_def_type *dt,
864 stmt_vector_for_cost *prologue_cost_vec,
865 stmt_vector_for_cost *body_cost_vec)
866{
867 int i;
868 int inside_cost = 0, prologue_cost = 0;
869
870 /* The SLP costs were already calculated during SLP tree build. */
871 if (PURE_SLP_STMT (stmt_info))
872 return;
873
874 /* FORNOW: Assuming maximum 2 args per stmts. */
875 for (i = 0; i < 2; i++)
876 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
877 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
878 stmt_info, 0, vect_prologue);
879
880 /* Pass the inside-of-loop statements to the target-specific cost model. */
881 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
882 stmt_info, 0, vect_body);
883
884 if (dump_enabled_p ())
885 dump_printf_loc (MSG_NOTE, vect_location,
886 "vect_model_simple_cost: inside_cost = %d, "
887 "prologue_cost = %d .\n", inside_cost, prologue_cost);
888}
889
890
891/* Model cost for type demotion and promotion operations. PWR is normally
892 zero for single-step promotions and demotions. It will be one if
893 two-step promotion/demotion is required, and so on. Each additional
894 step doubles the number of instructions required. */
895
896static void
897vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
898 enum vect_def_type *dt, int pwr)
899{
900 int i, tmp;
901 int inside_cost = 0, prologue_cost = 0;
902 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
903 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
904 void *target_cost_data;
905
906 /* The SLP costs were already calculated during SLP tree build. */
907 if (PURE_SLP_STMT (stmt_info))
908 return;
909
910 if (loop_vinfo)
911 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
912 else
913 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
914
915 for (i = 0; i < pwr + 1; i++)
916 {
917 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
918 (i + 1) : i;
919 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
920 vec_promote_demote, stmt_info, 0,
921 vect_body);
922 }
923
924 /* FORNOW: Assuming maximum 2 args per stmts. */
925 for (i = 0; i < 2; i++)
926 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
927 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
928 stmt_info, 0, vect_prologue);
929
930 if (dump_enabled_p ())
931 dump_printf_loc (MSG_NOTE, vect_location,
932 "vect_model_promotion_demotion_cost: inside_cost = %d, "
933 "prologue_cost = %d .\n", inside_cost, prologue_cost);
934}
935
936/* Function vect_cost_group_size
937
938 For grouped load or store, return the group_size only if it is the first
939 load or store of a group, else return 1. This ensures that group size is
940 only returned once per group. */
941
942static int
943vect_cost_group_size (stmt_vec_info stmt_info)
944{
945 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
946
947 if (first_stmt == STMT_VINFO_STMT (stmt_info))
948 return GROUP_SIZE (stmt_info);
949
950 return 1;
951}
952
953
954/* Function vect_model_store_cost
955
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
958
959void
960vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
961 bool store_lanes_p, enum vect_def_type dt,
962 slp_tree slp_node,
963 stmt_vector_for_cost *prologue_cost_vec,
964 stmt_vector_for_cost *body_cost_vec)
965{
966 int group_size;
967 unsigned int inside_cost = 0, prologue_cost = 0;
968 struct data_reference *first_dr;
969 gimple first_stmt;
970
971 /* The SLP costs were already calculated during SLP tree build. */
972 if (PURE_SLP_STMT (stmt_info))
973 return;
974
975 if (dt == vect_constant_def || dt == vect_external_def)
976 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
977 stmt_info, 0, vect_prologue);
978
979 /* Grouped access? */
980 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
981 {
982 if (slp_node)
983 {
984 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
985 group_size = 1;
986 }
987 else
988 {
989 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
990 group_size = vect_cost_group_size (stmt_info);
991 }
992
993 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
994 }
995 /* Not a grouped access. */
996 else
997 {
998 group_size = 1;
999 first_dr = STMT_VINFO_DATA_REF (stmt_info);
1000 }
1001
1002 /* We assume that the cost of a single store-lanes instruction is
1003 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
1004 access is instead being provided by a permute-and-store operation,
1005 include the cost of the permutes. */
1006 if (!store_lanes_p && group_size > 1)
1007 {
1008 /* Uses a high and low interleave or shuffle operations for each
1009 needed permute. */
1010 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1011 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1012 stmt_info, 0, vect_body);
1013
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_NOTE, vect_location,
1016 "vect_model_store_cost: strided group_size = %d .\n",
1017 group_size);
1018 }
1019
1020 /* Costs of the stores. */
1021 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1022
1023 if (dump_enabled_p ())
1024 dump_printf_loc (MSG_NOTE, vect_location,
1025 "vect_model_store_cost: inside_cost = %d, "
1026 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1027}
1028
1029
1030/* Calculate cost of DR's memory access. */
1031void
1032vect_get_store_cost (struct data_reference *dr, int ncopies,
1033 unsigned int *inside_cost,
1034 stmt_vector_for_cost *body_cost_vec)
1035{
1036 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1037 gimple stmt = DR_STMT (dr);
1038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1039
1040 switch (alignment_support_scheme)
1041 {
1042 case dr_aligned:
1043 {
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1045 vector_store, stmt_info, 0,
1046 vect_body);
1047
1048 if (dump_enabled_p ())
1049 dump_printf_loc (MSG_NOTE, vect_location,
1050 "vect_model_store_cost: aligned.\n");
1051 break;
1052 }
1053
1054 case dr_unaligned_supported:
1055 {
1056 /* Here, we assign an additional cost for the unaligned store. */
1057 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1058 unaligned_store, stmt_info,
1059 DR_MISALIGNMENT (dr), vect_body);
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_model_store_cost: unaligned supported by "
1063 "hardware.\n");
1064 break;
1065 }
1066
1067 case dr_unaligned_unsupported:
1068 {
1069 *inside_cost = VECT_MAX_COST;
1070
1071 if (dump_enabled_p ())
1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1073 "vect_model_store_cost: unsupported access.\n");
1074 break;
1075 }
1076
1077 default:
1078 gcc_unreachable ();
1079 }
1080}
1081
1082
1083/* Function vect_model_load_cost
1084
1085 Models cost for loads. In the case of grouped accesses, the last access
1086 has the overhead of the grouped access attributed to it. Since unaligned
1087 accesses are supported for loads, we also account for the costs of the
1088 access scheme chosen. */
1089
1090void
1091vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1092 bool load_lanes_p, slp_tree slp_node,
1093 stmt_vector_for_cost *prologue_cost_vec,
1094 stmt_vector_for_cost *body_cost_vec)
1095{
1096 int group_size;
1097 gimple first_stmt;
1098 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1099 unsigned int inside_cost = 0, prologue_cost = 0;
1100
1101 /* The SLP costs were already calculated during SLP tree build. */
1102 if (PURE_SLP_STMT (stmt_info))
1103 return;
1104
1105 /* Grouped accesses? */
1106 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1107 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1108 {
1109 group_size = vect_cost_group_size (stmt_info);
1110 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1111 }
1112 /* Not a grouped access. */
1113 else
1114 {
1115 group_size = 1;
1116 first_dr = dr;
1117 }
1118
1119 /* We assume that the cost of a single load-lanes instruction is
1120 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1121 access is instead being provided by a load-and-permute operation,
1122 include the cost of the permutes. */
1123 if (!load_lanes_p && group_size > 1)
1124 {
1125 /* Uses an even and odd extract operations or shuffle operations
1126 for each needed permute. */
1127 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1128 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1129 stmt_info, 0, vect_body);
1130
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE, vect_location,
1133 "vect_model_load_cost: strided group_size = %d .\n",
1134 group_size);
1135 }
1136
1137 /* The loads themselves. */
1138 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1139 {
1140 /* N scalar loads plus gathering them into a vector. */
1141 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1142 inside_cost += record_stmt_cost (body_cost_vec,
1143 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1144 scalar_load, stmt_info, 0, vect_body);
1145 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1146 stmt_info, 0, vect_body);
1147 }
1148 else
1149 vect_get_load_cost (first_dr, ncopies,
1150 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1151 || group_size > 1 || slp_node),
1152 &inside_cost, &prologue_cost,
1153 prologue_cost_vec, body_cost_vec, true);
1154
1155 if (dump_enabled_p ())
1156 dump_printf_loc (MSG_NOTE, vect_location,
1157 "vect_model_load_cost: inside_cost = %d, "
1158 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1159}
1160
1161
1162/* Calculate cost of DR's memory access. */
1163void
1164vect_get_load_cost (struct data_reference *dr, int ncopies,
1165 bool add_realign_cost, unsigned int *inside_cost,
1166 unsigned int *prologue_cost,
1167 stmt_vector_for_cost *prologue_cost_vec,
1168 stmt_vector_for_cost *body_cost_vec,
1169 bool record_prologue_costs)
1170{
1171 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1172 gimple stmt = DR_STMT (dr);
1173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174
1175 switch (alignment_support_scheme)
1176 {
1177 case dr_aligned:
1178 {
1179 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1180 stmt_info, 0, vect_body);
1181
1182 if (dump_enabled_p ())
1183 dump_printf_loc (MSG_NOTE, vect_location,
1184 "vect_model_load_cost: aligned.\n");
1185
1186 break;
1187 }
1188 case dr_unaligned_supported:
1189 {
1190 /* Here, we assign an additional cost for the unaligned load. */
1191 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1192 unaligned_load, stmt_info,
1193 DR_MISALIGNMENT (dr), vect_body);
1194
1195 if (dump_enabled_p ())
1196 dump_printf_loc (MSG_NOTE, vect_location,
1197 "vect_model_load_cost: unaligned supported by "
1198 "hardware.\n");
1199
1200 break;
1201 }
1202 case dr_explicit_realign:
1203 {
1204 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1205 vector_load, stmt_info, 0, vect_body);
1206 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1207 vec_perm, stmt_info, 0, vect_body);
1208
1209 /* FIXME: If the misalignment remains fixed across the iterations of
1210 the containing loop, the following cost should be added to the
1211 prologue costs. */
1212 if (targetm.vectorize.builtin_mask_for_load)
1213 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1214 stmt_info, 0, vect_body);
1215
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_model_load_cost: explicit realign\n");
1219
1220 break;
1221 }
1222 case dr_explicit_realign_optimized:
1223 {
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_model_load_cost: unaligned software "
1227 "pipelined.\n");
1228
1229 /* Unaligned software pipeline has a load of an address, an initial
1230 load, and possibly a mask operation to "prime" the loop. However,
1231 if this is an access in a group of loads, which provide grouped
1232 access, then the above cost should only be considered for one
1233 access in the group. Inside the loop, there is a load op
1234 and a realignment op. */
1235
1236 if (add_realign_cost && record_prologue_costs)
1237 {
1238 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1239 vector_stmt, stmt_info,
1240 0, vect_prologue);
1241 if (targetm.vectorize.builtin_mask_for_load)
1242 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1243 vector_stmt, stmt_info,
1244 0, vect_prologue);
1245 }
1246
1247 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1248 stmt_info, 0, vect_body);
1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1250 stmt_info, 0, vect_body);
1251
1252 if (dump_enabled_p ())
1253 dump_printf_loc (MSG_NOTE, vect_location,
1254 "vect_model_load_cost: explicit realign optimized"
1255 "\n");
1256
1257 break;
1258 }
1259
1260 case dr_unaligned_unsupported:
1261 {
1262 *inside_cost = VECT_MAX_COST;
1263
1264 if (dump_enabled_p ())
1265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1266 "vect_model_load_cost: unsupported access.\n");
1267 break;
1268 }
1269
1270 default:
1271 gcc_unreachable ();
1272 }
1273}
1274
1275/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1276 the loop preheader for the vectorized stmt STMT. */
1277
1278static void
1279vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1280{
1281 if (gsi)
1282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1283 else
1284 {
1285 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1286 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1287
1288 if (loop_vinfo)
1289 {
1290 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1291 basic_block new_bb;
1292 edge pe;
1293
1294 if (nested_in_vect_loop_p (loop, stmt))
1295 loop = loop->inner;
1296
1297 pe = loop_preheader_edge (loop);
1298 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1299 gcc_assert (!new_bb);
1300 }
1301 else
1302 {
1303 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1304 basic_block bb;
1305 gimple_stmt_iterator gsi_bb_start;
1306
1307 gcc_assert (bb_vinfo);
1308 bb = BB_VINFO_BB (bb_vinfo);
1309 gsi_bb_start = gsi_after_labels (bb);
1310 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1311 }
1312 }
1313
1314 if (dump_enabled_p ())
1315 {
1316 dump_printf_loc (MSG_NOTE, vect_location,
1317 "created new init_stmt: ");
1318 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1319 }
1320}
1321
1322/* Function vect_init_vector.
1323
1324 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1325 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1326 vector type a vector with all elements equal to VAL is created first.
1327 Place the initialization at BSI if it is not NULL. Otherwise, place the
1328 initialization at the loop preheader.
1329 Return the DEF of INIT_STMT.
1330 It will be used in the vectorization of STMT. */
1331
1332tree
1333vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1334{
1335 tree new_var;
1336 gimple init_stmt;
1337 tree vec_oprnd;
1338 tree new_temp;
1339
1340 if (TREE_CODE (type) == VECTOR_TYPE
1341 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1342 {
1343 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1344 {
1345 if (CONSTANT_CLASS_P (val))
1346 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1347 else
1348 {
1349 new_temp = make_ssa_name (TREE_TYPE (type));
1350 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1351 vect_init_vector_1 (stmt, init_stmt, gsi);
1352 val = new_temp;
1353 }
1354 }
1355 val = build_vector_from_val (type, val);
1356 }
1357
1358 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1359 init_stmt = gimple_build_assign (new_var, val);
1360 new_temp = make_ssa_name (new_var, init_stmt);
1361 gimple_assign_set_lhs (init_stmt, new_temp);
1362 vect_init_vector_1 (stmt, init_stmt, gsi);
1363 vec_oprnd = gimple_assign_lhs (init_stmt);
1364 return vec_oprnd;
1365}
1366
1367
1368/* Function vect_get_vec_def_for_operand.
1369
1370 OP is an operand in STMT. This function returns a (vector) def that will be
1371 used in the vectorized stmt for STMT.
1372
1373 In the case that OP is an SSA_NAME which is defined in the loop, then
1374 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1375
1376 In case OP is an invariant or constant, a new stmt that creates a vector def
1377 needs to be introduced. */
1378
1379tree
1380vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1381{
1382 tree vec_oprnd;
1383 gimple vec_stmt;
1384 gimple def_stmt;
1385 stmt_vec_info def_stmt_info = NULL;
1386 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1387 unsigned int nunits;
1388 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1389 tree def;
1390 enum vect_def_type dt;
1391 bool is_simple_use;
1392 tree vector_type;
1393
1394 if (dump_enabled_p ())
1395 {
1396 dump_printf_loc (MSG_NOTE, vect_location,
1397 "vect_get_vec_def_for_operand: ");
1398 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1399 dump_printf (MSG_NOTE, "\n");
1400 }
1401
1402 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1403 &def_stmt, &def, &dt);
1404 gcc_assert (is_simple_use);
1405 if (dump_enabled_p ())
1406 {
1407 int loc_printed = 0;
1408 if (def)
1409 {
1410 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1411 loc_printed = 1;
1412 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1413 dump_printf (MSG_NOTE, "\n");
1414 }
1415 if (def_stmt)
1416 {
1417 if (loc_printed)
1418 dump_printf (MSG_NOTE, " def_stmt = ");
1419 else
1420 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1421 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1422 }
1423 }
1424
1425 switch (dt)
1426 {
1427 /* Case 1: operand is a constant. */
1428 case vect_constant_def:
1429 {
1430 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1431 gcc_assert (vector_type);
1432 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1433
1434 if (scalar_def)
1435 *scalar_def = op;
1436
1437 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1438 if (dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE, vect_location,
1440 "Create vector_cst. nunits = %d\n", nunits);
1441
1442 return vect_init_vector (stmt, op, vector_type, NULL);
1443 }
1444
1445 /* Case 2: operand is defined outside the loop - loop invariant. */
1446 case vect_external_def:
1447 {
1448 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1449 gcc_assert (vector_type);
1450
1451 if (scalar_def)
1452 *scalar_def = def;
1453
1454 /* Create 'vec_inv = {inv,inv,..,inv}' */
1455 if (dump_enabled_p ())
1456 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1457
1458 return vect_init_vector (stmt, def, vector_type, NULL);
1459 }
1460
1461 /* Case 3: operand is defined inside the loop. */
1462 case vect_internal_def:
1463 {
1464 if (scalar_def)
1465 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1466
1467 /* Get the def from the vectorized stmt. */
1468 def_stmt_info = vinfo_for_stmt (def_stmt);
1469
1470 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1471 /* Get vectorized pattern statement. */
1472 if (!vec_stmt
1473 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1474 && !STMT_VINFO_RELEVANT (def_stmt_info))
1475 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1476 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1477 gcc_assert (vec_stmt);
1478 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1479 vec_oprnd = PHI_RESULT (vec_stmt);
1480 else if (is_gimple_call (vec_stmt))
1481 vec_oprnd = gimple_call_lhs (vec_stmt);
1482 else
1483 vec_oprnd = gimple_assign_lhs (vec_stmt);
1484 return vec_oprnd;
1485 }
1486
1487 /* Case 4: operand is defined by a loop header phi - reduction */
1488 case vect_reduction_def:
1489 case vect_double_reduction_def:
1490 case vect_nested_cycle:
1491 {
1492 struct loop *loop;
1493
1494 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1495 loop = (gimple_bb (def_stmt))->loop_father;
1496
1497 /* Get the def before the loop */
1498 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1499 return get_initial_def_for_reduction (stmt, op, scalar_def);
1500 }
1501
1502 /* Case 5: operand is defined by loop-header phi - induction. */
1503 case vect_induction_def:
1504 {
1505 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1506
1507 /* Get the def from the vectorized stmt. */
1508 def_stmt_info = vinfo_for_stmt (def_stmt);
1509 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1510 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1511 vec_oprnd = PHI_RESULT (vec_stmt);
1512 else
1513 vec_oprnd = gimple_get_lhs (vec_stmt);
1514 return vec_oprnd;
1515 }
1516
1517 default:
1518 gcc_unreachable ();
1519 }
1520}
1521
1522
1523/* Function vect_get_vec_def_for_stmt_copy
1524
1525 Return a vector-def for an operand. This function is used when the
1526 vectorized stmt to be created (by the caller to this function) is a "copy"
1527 created in case the vectorized result cannot fit in one vector, and several
1528 copies of the vector-stmt are required. In this case the vector-def is
1529 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1530 of the stmt that defines VEC_OPRND.
1531 DT is the type of the vector def VEC_OPRND.
1532
1533 Context:
1534 In case the vectorization factor (VF) is bigger than the number
1535 of elements that can fit in a vectype (nunits), we have to generate
1536 more than one vector stmt to vectorize the scalar stmt. This situation
1537 arises when there are multiple data-types operated upon in the loop; the
1538 smallest data-type determines the VF, and as a result, when vectorizing
1539 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1540 vector stmt (each computing a vector of 'nunits' results, and together
1541 computing 'VF' results in each iteration). This function is called when
1542 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1543 which VF=16 and nunits=4, so the number of copies required is 4):
1544
1545 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1546
1547 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1548 VS1.1: vx.1 = memref1 VS1.2
1549 VS1.2: vx.2 = memref2 VS1.3
1550 VS1.3: vx.3 = memref3
1551
1552 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1553 VSnew.1: vz1 = vx.1 + ... VSnew.2
1554 VSnew.2: vz2 = vx.2 + ... VSnew.3
1555 VSnew.3: vz3 = vx.3 + ...
1556
1557 The vectorization of S1 is explained in vectorizable_load.
1558 The vectorization of S2:
1559 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1560 the function 'vect_get_vec_def_for_operand' is called to
1561 get the relevant vector-def for each operand of S2. For operand x it
1562 returns the vector-def 'vx.0'.
1563
1564 To create the remaining copies of the vector-stmt (VSnew.j), this
1565 function is called to get the relevant vector-def for each operand. It is
1566 obtained from the respective VS1.j stmt, which is recorded in the
1567 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1568
1569 For example, to obtain the vector-def 'vx.1' in order to create the
1570 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1571 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1572 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1573 and return its def ('vx.1').
1574 Overall, to create the above sequence this function will be called 3 times:
1575 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1576 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1577 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1578
1579tree
1580vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1581{
1582 gimple vec_stmt_for_operand;
1583 stmt_vec_info def_stmt_info;
1584
1585 /* Do nothing; can reuse same def. */
1586 if (dt == vect_external_def || dt == vect_constant_def )
1587 return vec_oprnd;
1588
1589 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1590 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1591 gcc_assert (def_stmt_info);
1592 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1593 gcc_assert (vec_stmt_for_operand);
1594 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1595 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1596 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1597 else
1598 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1599 return vec_oprnd;
1600}
1601
1602
1603/* Get vectorized definitions for the operands to create a copy of an original
1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1605
1606static void
1607vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1608 vec<tree> *vec_oprnds0,
1609 vec<tree> *vec_oprnds1)
1610{
1611 tree vec_oprnd = vec_oprnds0->pop ();
1612
1613 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1614 vec_oprnds0->quick_push (vec_oprnd);
1615
1616 if (vec_oprnds1 && vec_oprnds1->length ())
1617 {
1618 vec_oprnd = vec_oprnds1->pop ();
1619 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1620 vec_oprnds1->quick_push (vec_oprnd);
1621 }
1622}
1623
1624
1625/* Get vectorized definitions for OP0 and OP1.
1626 REDUC_INDEX is the index of reduction operand in case of reduction,
1627 and -1 otherwise. */
1628
1629void
1630vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1631 vec<tree> *vec_oprnds0,
1632 vec<tree> *vec_oprnds1,
1633 slp_tree slp_node, int reduc_index)
1634{
1635 if (slp_node)
1636 {
1637 int nops = (op1 == NULL_TREE) ? 1 : 2;
1638 auto_vec<tree> ops (nops);
1639 auto_vec<vec<tree> > vec_defs (nops);
1640
1641 ops.quick_push (op0);
1642 if (op1)
1643 ops.quick_push (op1);
1644
1645 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1646
1647 *vec_oprnds0 = vec_defs[0];
1648 if (op1)
1649 *vec_oprnds1 = vec_defs[1];
1650 }
1651 else
1652 {
1653 tree vec_oprnd;
1654
1655 vec_oprnds0->create (1);
1656 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1657 vec_oprnds0->quick_push (vec_oprnd);
1658
1659 if (op1)
1660 {
1661 vec_oprnds1->create (1);
1662 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1663 vec_oprnds1->quick_push (vec_oprnd);
1664 }
1665 }
1666}
1667
1668
1669/* Function vect_finish_stmt_generation.
1670
1671 Insert a new stmt. */
1672
1673void
1674vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1675 gimple_stmt_iterator *gsi)
1676{
1677 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1678 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1679 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1680
1681 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1682
1683 if (!gsi_end_p (*gsi)
1684 && gimple_has_mem_ops (vec_stmt))
1685 {
1686 gimple at_stmt = gsi_stmt (*gsi);
1687 tree vuse = gimple_vuse (at_stmt);
1688 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1689 {
1690 tree vdef = gimple_vdef (at_stmt);
1691 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1692 /* If we have an SSA vuse and insert a store, update virtual
1693 SSA form to avoid triggering the renamer. Do so only
1694 if we can easily see all uses - which is what almost always
1695 happens with the way vectorized stmts are inserted. */
1696 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1697 && ((is_gimple_assign (vec_stmt)
1698 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1699 || (is_gimple_call (vec_stmt)
1700 && !(gimple_call_flags (vec_stmt)
1701 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1702 {
1703 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1704 gimple_set_vdef (vec_stmt, new_vdef);
1705 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1706 }
1707 }
1708 }
1709 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1710
1711 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1712 bb_vinfo));
1713
1714 if (dump_enabled_p ())
1715 {
1716 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1717 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1718 }
1719
1720 gimple_set_location (vec_stmt, gimple_location (stmt));
1721
1722 /* While EH edges will generally prevent vectorization, stmt might
1723 e.g. be in a must-not-throw region. Ensure newly created stmts
1724 that could throw are part of the same region. */
1725 int lp_nr = lookup_stmt_eh_lp (stmt);
1726 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1727 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1728}
1729
1730/* Checks if CALL can be vectorized in type VECTYPE. Returns
1731 a function declaration if the target has a vectorized version
1732 of the function, or NULL_TREE if the function cannot be vectorized. */
1733
1734tree
1735vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1736{
1737 tree fndecl = gimple_call_fndecl (call);
1738
1739 /* We only handle functions that do not read or clobber memory -- i.e.
1740 const or novops ones. */
1741 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1742 return NULL_TREE;
1743
1744 if (!fndecl
1745 || TREE_CODE (fndecl) != FUNCTION_DECL
1746 || !DECL_BUILT_IN (fndecl))
1747 return NULL_TREE;
1748
1749 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1750 vectype_in);
1751}
1752
1753
1754static tree permute_vec_elements (tree, tree, tree, gimple,
1755 gimple_stmt_iterator *);
1756
1757
1758/* Function vectorizable_mask_load_store.
1759
1760 Check if STMT performs a conditional load or store that can be vectorized.
1761 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1762 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1763 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1764
1765static bool
1766vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1767 gimple *vec_stmt, slp_tree slp_node)
1768{
1769 tree vec_dest = NULL;
1770 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1771 stmt_vec_info prev_stmt_info;
1772 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1773 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1774 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1775 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1776 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1777 tree elem_type;
1778 gimple new_stmt;
1779 tree dummy;
1780 tree dataref_ptr = NULL_TREE;
1781 gimple ptr_incr;
1782 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1783 int ncopies;
1784 int i, j;
1785 bool inv_p;
1786 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1787 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1788 int gather_scale = 1;
1789 enum vect_def_type gather_dt = vect_unknown_def_type;
1790 bool is_store;
1791 tree mask;
1792 gimple def_stmt;
1793 tree def;
1794 enum vect_def_type dt;
1795
1796 if (slp_node != NULL)
1797 return false;
1798
1799 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1800 gcc_assert (ncopies >= 1);
1801
1802 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1803 mask = gimple_call_arg (stmt, 2);
1804 if (TYPE_PRECISION (TREE_TYPE (mask))
1805 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1806 return false;
1807
1808 /* FORNOW. This restriction should be relaxed. */
1809 if (nested_in_vect_loop && ncopies > 1)
1810 {
1811 if (dump_enabled_p ())
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1813 "multiple types in nested loop.");
1814 return false;
1815 }
1816
1817 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1818 return false;
1819
1820 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1821 return false;
1822
1823 if (!STMT_VINFO_DATA_REF (stmt_info))
1824 return false;
1825
1826 elem_type = TREE_TYPE (vectype);
1827
1828 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1829 return false;
1830
1831 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1832 return false;
1833
1834 if (STMT_VINFO_GATHER_P (stmt_info))
1835 {
1836 gimple def_stmt;
1837 tree def;
1838 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1839 &gather_off, &gather_scale);
1840 gcc_assert (gather_decl);
1841 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1842 &def_stmt, &def, &gather_dt,
1843 &gather_off_vectype))
1844 {
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 "gather index use not simple.");
1848 return false;
1849 }
1850
1851 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1852 tree masktype
1853 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1854 if (TREE_CODE (masktype) == INTEGER_TYPE)
1855 {
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858 "masked gather with integer mask not supported.");
1859 return false;
1860 }
1861 }
1862 else if (tree_int_cst_compare (nested_in_vect_loop
1863 ? STMT_VINFO_DR_STEP (stmt_info)
1864 : DR_STEP (dr), size_zero_node) <= 0)
1865 return false;
1866 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1867 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1868 return false;
1869
1870 if (TREE_CODE (mask) != SSA_NAME)
1871 return false;
1872
1873 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1874 &def_stmt, &def, &dt))
1875 return false;
1876
1877 if (is_store)
1878 {
1879 tree rhs = gimple_call_arg (stmt, 3);
1880 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1881 &def_stmt, &def, &dt))
1882 return false;
1883 }
1884
1885 if (!vec_stmt) /* transformation not required. */
1886 {
1887 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1888 if (is_store)
1889 vect_model_store_cost (stmt_info, ncopies, false, dt,
1890 NULL, NULL, NULL);
1891 else
1892 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1893 return true;
1894 }
1895
1896 /** Transform. **/
1897
1898 if (STMT_VINFO_GATHER_P (stmt_info))
1899 {
1900 tree vec_oprnd0 = NULL_TREE, op;
1901 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1902 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1903 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1904 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1905 tree mask_perm_mask = NULL_TREE;
1906 edge pe = loop_preheader_edge (loop);
1907 gimple_seq seq;
1908 basic_block new_bb;
1909 enum { NARROW, NONE, WIDEN } modifier;
1910 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1911
1912 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1913 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1914 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1917 scaletype = TREE_VALUE (arglist);
1918 gcc_checking_assert (types_compatible_p (srctype, rettype)
1919 && types_compatible_p (srctype, masktype));
1920
1921 if (nunits == gather_off_nunits)
1922 modifier = NONE;
1923 else if (nunits == gather_off_nunits / 2)
1924 {
1925 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1926 modifier = WIDEN;
1927
1928 for (i = 0; i < gather_off_nunits; ++i)
1929 sel[i] = i | nunits;
1930
1931 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1932 }
1933 else if (nunits == gather_off_nunits * 2)
1934 {
1935 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1936 modifier = NARROW;
1937
1938 for (i = 0; i < nunits; ++i)
1939 sel[i] = i < gather_off_nunits
1940 ? i : i + nunits - gather_off_nunits;
1941
1942 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1943 ncopies *= 2;
1944 for (i = 0; i < nunits; ++i)
1945 sel[i] = i | gather_off_nunits;
1946 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1947 }
1948 else
1949 gcc_unreachable ();
1950
1951 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1952
1953 ptr = fold_convert (ptrtype, gather_base);
1954 if (!is_gimple_min_invariant (ptr))
1955 {
1956 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1957 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1958 gcc_assert (!new_bb);
1959 }
1960
1961 scale = build_int_cst (scaletype, gather_scale);
1962
1963 prev_stmt_info = NULL;
1964 for (j = 0; j < ncopies; ++j)
1965 {
1966 if (modifier == WIDEN && (j & 1))
1967 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1968 perm_mask, stmt, gsi);
1969 else if (j == 0)
1970 op = vec_oprnd0
1971 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1972 else
1973 op = vec_oprnd0
1974 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1975
1976 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1977 {
1978 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1979 == TYPE_VECTOR_SUBPARTS (idxtype));
1980 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1981 var = make_ssa_name (var);
1982 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1983 new_stmt
1984 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986 op = var;
1987 }
1988
1989 if (mask_perm_mask && (j & 1))
1990 mask_op = permute_vec_elements (mask_op, mask_op,
1991 mask_perm_mask, stmt, gsi);
1992 else
1993 {
1994 if (j == 0)
1995 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1996 else
1997 {
1998 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1999 &def_stmt, &def, &dt);
2000 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2001 }
2002
2003 mask_op = vec_mask;
2004 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2005 {
2006 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2007 == TYPE_VECTOR_SUBPARTS (masktype));
2008 var = vect_get_new_vect_var (masktype, vect_simple_var,
2009 NULL);
2010 var = make_ssa_name (var);
2011 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2012 new_stmt
2013 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2015 mask_op = var;
2016 }
2017 }
2018
2019 new_stmt
2020 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2021 scale);
2022
2023 if (!useless_type_conversion_p (vectype, rettype))
2024 {
2025 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2026 == TYPE_VECTOR_SUBPARTS (rettype));
2027 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2028 op = make_ssa_name (var, new_stmt);
2029 gimple_call_set_lhs (new_stmt, op);
2030 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2031 var = make_ssa_name (vec_dest);
2032 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2033 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2034 }
2035 else
2036 {
2037 var = make_ssa_name (vec_dest, new_stmt);
2038 gimple_call_set_lhs (new_stmt, var);
2039 }
2040
2041 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2042
2043 if (modifier == NARROW)
2044 {
2045 if ((j & 1) == 0)
2046 {
2047 prev_res = var;
2048 continue;
2049 }
2050 var = permute_vec_elements (prev_res, var,
2051 perm_mask, stmt, gsi);
2052 new_stmt = SSA_NAME_DEF_STMT (var);
2053 }
2054
2055 if (prev_stmt_info == NULL)
2056 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2057 else
2058 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2059 prev_stmt_info = vinfo_for_stmt (new_stmt);
2060 }
2061
2062 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2063 from the IL. */
2064 tree lhs = gimple_call_lhs (stmt);
2065 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2066 set_vinfo_for_stmt (new_stmt, stmt_info);
2067 set_vinfo_for_stmt (stmt, NULL);
2068 STMT_VINFO_STMT (stmt_info) = new_stmt;
2069 gsi_replace (gsi, new_stmt, true);
2070 return true;
2071 }
2072 else if (is_store)
2073 {
2074 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2075 prev_stmt_info = NULL;
2076 for (i = 0; i < ncopies; i++)
2077 {
2078 unsigned align, misalign;
2079
2080 if (i == 0)
2081 {
2082 tree rhs = gimple_call_arg (stmt, 3);
2083 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2084 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2085 /* We should have catched mismatched types earlier. */
2086 gcc_assert (useless_type_conversion_p (vectype,
2087 TREE_TYPE (vec_rhs)));
2088 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2089 NULL_TREE, &dummy, gsi,
2090 &ptr_incr, false, &inv_p);
2091 gcc_assert (!inv_p);
2092 }
2093 else
2094 {
2095 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2096 &def, &dt);
2097 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2098 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2099 &def, &dt);
2100 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2101 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2102 TYPE_SIZE_UNIT (vectype));
2103 }
2104
2105 align = TYPE_ALIGN_UNIT (vectype);
2106 if (aligned_access_p (dr))
2107 misalign = 0;
2108 else if (DR_MISALIGNMENT (dr) == -1)
2109 {
2110 align = TYPE_ALIGN_UNIT (elem_type);
2111 misalign = 0;
2112 }
2113 else
2114 misalign = DR_MISALIGNMENT (dr);
2115 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2116 misalign);
2117 new_stmt
2118 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2119 gimple_call_arg (stmt, 1),
2120 vec_mask, vec_rhs);
2121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2122 if (i == 0)
2123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2124 else
2125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2126 prev_stmt_info = vinfo_for_stmt (new_stmt);
2127 }
2128 }
2129 else
2130 {
2131 tree vec_mask = NULL_TREE;
2132 prev_stmt_info = NULL;
2133 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2134 for (i = 0; i < ncopies; i++)
2135 {
2136 unsigned align, misalign;
2137
2138 if (i == 0)
2139 {
2140 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2141 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2142 NULL_TREE, &dummy, gsi,
2143 &ptr_incr, false, &inv_p);
2144 gcc_assert (!inv_p);
2145 }
2146 else
2147 {
2148 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2149 &def, &dt);
2150 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2151 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2152 TYPE_SIZE_UNIT (vectype));
2153 }
2154
2155 align = TYPE_ALIGN_UNIT (vectype);
2156 if (aligned_access_p (dr))
2157 misalign = 0;
2158 else if (DR_MISALIGNMENT (dr) == -1)
2159 {
2160 align = TYPE_ALIGN_UNIT (elem_type);
2161 misalign = 0;
2162 }
2163 else
2164 misalign = DR_MISALIGNMENT (dr);
2165 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2166 misalign);
2167 new_stmt
2168 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2169 gimple_call_arg (stmt, 1),
2170 vec_mask);
2171 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2173 if (i == 0)
2174 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2175 else
2176 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2177 prev_stmt_info = vinfo_for_stmt (new_stmt);
2178 }
2179 }
2180
2181 if (!is_store)
2182 {
2183 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2184 from the IL. */
2185 tree lhs = gimple_call_lhs (stmt);
2186 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2187 set_vinfo_for_stmt (new_stmt, stmt_info);
2188 set_vinfo_for_stmt (stmt, NULL);
2189 STMT_VINFO_STMT (stmt_info) = new_stmt;
2190 gsi_replace (gsi, new_stmt, true);
2191 }
2192
2193 return true;
2194}
2195
2196
2197/* Function vectorizable_call.
2198
2199 Check if GS performs a function call that can be vectorized.
2200 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2201 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2202 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2203
2204static bool
2205vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2206 slp_tree slp_node)
2207{
2208 gcall *stmt;
2209 tree vec_dest;
2210 tree scalar_dest;
2211 tree op, type;
2212 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2213 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2214 tree vectype_out, vectype_in;
2215 int nunits_in;
2216 int nunits_out;
2217 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2218 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2219 tree fndecl, new_temp, def, rhs_type;
2220 gimple def_stmt;
2221 enum vect_def_type dt[3]
2222 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2223 gimple new_stmt = NULL;
2224 int ncopies, j;
2225 vec<tree> vargs = vNULL;
2226 enum { NARROW, NONE, WIDEN } modifier;
2227 size_t i, nargs;
2228 tree lhs;
2229
2230 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2231 return false;
2232
2233 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2234 return false;
2235
2236 /* Is GS a vectorizable call? */
2237 stmt = dyn_cast <gcall *> (gs);
2238 if (!stmt)
2239 return false;
2240
2241 if (gimple_call_internal_p (stmt)
2242 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2243 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2244 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2245 slp_node);
2246
2247 if (gimple_call_lhs (stmt) == NULL_TREE
2248 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2249 return false;
2250
2251 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2252
2253 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2254
2255 /* Process function arguments. */
2256 rhs_type = NULL_TREE;
2257 vectype_in = NULL_TREE;
2258 nargs = gimple_call_num_args (stmt);
2259
2260 /* Bail out if the function has more than three arguments, we do not have
2261 interesting builtin functions to vectorize with more than two arguments
2262 except for fma. No arguments is also not good. */
2263 if (nargs == 0 || nargs > 3)
2264 return false;
2265
2266 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2267 if (gimple_call_internal_p (stmt)
2268 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2269 {
2270 nargs = 0;
2271 rhs_type = unsigned_type_node;
2272 }
2273
2274 for (i = 0; i < nargs; i++)
2275 {
2276 tree opvectype;
2277
2278 op = gimple_call_arg (stmt, i);
2279
2280 /* We can only handle calls with arguments of the same type. */
2281 if (rhs_type
2282 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2283 {
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "argument types differ.\n");
2287 return false;
2288 }
2289 if (!rhs_type)
2290 rhs_type = TREE_TYPE (op);
2291
2292 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2293 &def_stmt, &def, &dt[i], &opvectype))
2294 {
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2297 "use not simple.\n");
2298 return false;
2299 }
2300
2301 if (!vectype_in)
2302 vectype_in = opvectype;
2303 else if (opvectype
2304 && opvectype != vectype_in)
2305 {
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "argument vector types differ.\n");
2309 return false;
2310 }
2311 }
2312 /* If all arguments are external or constant defs use a vector type with
2313 the same size as the output vector type. */
2314 if (!vectype_in)
2315 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2316 if (vec_stmt)
2317 gcc_assert (vectype_in);
2318 if (!vectype_in)
2319 {
2320 if (dump_enabled_p ())
2321 {
2322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2323 "no vectype for scalar type ");
2324 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2325 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2326 }
2327
2328 return false;
2329 }
2330
2331 /* FORNOW */
2332 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2333 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2334 if (nunits_in == nunits_out / 2)
2335 modifier = NARROW;
2336 else if (nunits_out == nunits_in)
2337 modifier = NONE;
2338 else if (nunits_out == nunits_in / 2)
2339 modifier = WIDEN;
2340 else
2341 return false;
2342
2343 /* For now, we only vectorize functions if a target specific builtin
2344 is available. TODO -- in some cases, it might be profitable to
2345 insert the calls for pieces of the vector, in order to be able
2346 to vectorize other operations in the loop. */
2347 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2348 if (fndecl == NULL_TREE)
2349 {
2350 if (gimple_call_internal_p (stmt)
2351 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2352 && !slp_node
2353 && loop_vinfo
2354 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2355 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2356 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2357 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2358 {
2359 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2360 { 0, 1, 2, ... vf - 1 } vector. */
2361 gcc_assert (nargs == 0);
2362 }
2363 else
2364 {
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367 "function is not vectorizable.\n");
2368 return false;
2369 }
2370 }
2371
2372 gcc_assert (!gimple_vuse (stmt));
2373
2374 if (slp_node || PURE_SLP_STMT (stmt_info))
2375 ncopies = 1;
2376 else if (modifier == NARROW)
2377 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2378 else
2379 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2380
2381 /* Sanity check: make sure that at least one copy of the vectorized stmt
2382 needs to be generated. */
2383 gcc_assert (ncopies >= 1);
2384
2385 if (!vec_stmt) /* transformation not required. */
2386 {
2387 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2388 if (dump_enabled_p ())
2389 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2390 "\n");
2391 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2392 return true;
2393 }
2394
2395 /** Transform. **/
2396
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2399
2400 /* Handle def. */
2401 scalar_dest = gimple_call_lhs (stmt);
2402 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2403
2404 prev_stmt_info = NULL;
2405 switch (modifier)
2406 {
2407 case NONE:
2408 for (j = 0; j < ncopies; ++j)
2409 {
2410 /* Build argument list for the vectorized call. */
2411 if (j == 0)
2412 vargs.create (nargs);
2413 else
2414 vargs.truncate (0);
2415
2416 if (slp_node)
2417 {
2418 auto_vec<vec<tree> > vec_defs (nargs);
2419 vec<tree> vec_oprnds0;
2420
2421 for (i = 0; i < nargs; i++)
2422 vargs.quick_push (gimple_call_arg (stmt, i));
2423 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2424 vec_oprnds0 = vec_defs[0];
2425
2426 /* Arguments are ready. Create the new vector stmt. */
2427 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2428 {
2429 size_t k;
2430 for (k = 0; k < nargs; k++)
2431 {
2432 vec<tree> vec_oprndsk = vec_defs[k];
2433 vargs[k] = vec_oprndsk[i];
2434 }
2435 new_stmt = gimple_build_call_vec (fndecl, vargs);
2436 new_temp = make_ssa_name (vec_dest, new_stmt);
2437 gimple_call_set_lhs (new_stmt, new_temp);
2438 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2439 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2440 }
2441
2442 for (i = 0; i < nargs; i++)
2443 {
2444 vec<tree> vec_oprndsi = vec_defs[i];
2445 vec_oprndsi.release ();
2446 }
2447 continue;
2448 }
2449
2450 for (i = 0; i < nargs; i++)
2451 {
2452 op = gimple_call_arg (stmt, i);
2453 if (j == 0)
2454 vec_oprnd0
2455 = vect_get_vec_def_for_operand (op, stmt, NULL);
2456 else
2457 {
2458 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2459 vec_oprnd0
2460 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2461 }
2462
2463 vargs.quick_push (vec_oprnd0);
2464 }
2465
2466 if (gimple_call_internal_p (stmt)
2467 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2468 {
2469 tree *v = XALLOCAVEC (tree, nunits_out);
2470 int k;
2471 for (k = 0; k < nunits_out; ++k)
2472 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2473 tree cst = build_vector (vectype_out, v);
2474 tree new_var
2475 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2476 gimple init_stmt = gimple_build_assign (new_var, cst);
2477 new_temp = make_ssa_name (new_var, init_stmt);
2478 gimple_assign_set_lhs (init_stmt, new_temp);
2479 vect_init_vector_1 (stmt, init_stmt, NULL);
2480 new_temp = make_ssa_name (vec_dest);
2481 new_stmt = gimple_build_assign (new_temp,
2482 gimple_assign_lhs (init_stmt));
2483 }
2484 else
2485 {
2486 new_stmt = gimple_build_call_vec (fndecl, vargs);
2487 new_temp = make_ssa_name (vec_dest, new_stmt);
2488 gimple_call_set_lhs (new_stmt, new_temp);
2489 }
2490 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2491
2492 if (j == 0)
2493 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2494 else
2495 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2496
2497 prev_stmt_info = vinfo_for_stmt (new_stmt);
2498 }
2499
2500 break;
2501
2502 case NARROW:
2503 for (j = 0; j < ncopies; ++j)
2504 {
2505 /* Build argument list for the vectorized call. */
2506 if (j == 0)
2507 vargs.create (nargs * 2);
2508 else
2509 vargs.truncate (0);
2510
2511 if (slp_node)
2512 {
2513 auto_vec<vec<tree> > vec_defs (nargs);
2514 vec<tree> vec_oprnds0;
2515
2516 for (i = 0; i < nargs; i++)
2517 vargs.quick_push (gimple_call_arg (stmt, i));
2518 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2519 vec_oprnds0 = vec_defs[0];
2520
2521 /* Arguments are ready. Create the new vector stmt. */
2522 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2523 {
2524 size_t k;
2525 vargs.truncate (0);
2526 for (k = 0; k < nargs; k++)
2527 {
2528 vec<tree> vec_oprndsk = vec_defs[k];
2529 vargs.quick_push (vec_oprndsk[i]);
2530 vargs.quick_push (vec_oprndsk[i + 1]);
2531 }
2532 new_stmt = gimple_build_call_vec (fndecl, vargs);
2533 new_temp = make_ssa_name (vec_dest, new_stmt);
2534 gimple_call_set_lhs (new_stmt, new_temp);
2535 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2536 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2537 }
2538
2539 for (i = 0; i < nargs; i++)
2540 {
2541 vec<tree> vec_oprndsi = vec_defs[i];
2542 vec_oprndsi.release ();
2543 }
2544 continue;
2545 }
2546
2547 for (i = 0; i < nargs; i++)
2548 {
2549 op = gimple_call_arg (stmt, i);
2550 if (j == 0)
2551 {
2552 vec_oprnd0
2553 = vect_get_vec_def_for_operand (op, stmt, NULL);
2554 vec_oprnd1
2555 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2556 }
2557 else
2558 {
2559 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2560 vec_oprnd0
2561 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2562 vec_oprnd1
2563 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2564 }
2565
2566 vargs.quick_push (vec_oprnd0);
2567 vargs.quick_push (vec_oprnd1);
2568 }
2569
2570 new_stmt = gimple_build_call_vec (fndecl, vargs);
2571 new_temp = make_ssa_name (vec_dest, new_stmt);
2572 gimple_call_set_lhs (new_stmt, new_temp);
2573 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2574
2575 if (j == 0)
2576 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2577 else
2578 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2579
2580 prev_stmt_info = vinfo_for_stmt (new_stmt);
2581 }
2582
2583 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2584
2585 break;
2586
2587 case WIDEN:
2588 /* No current target implements this case. */
2589 return false;
2590 }
2591
2592 vargs.release ();
2593
2594 /* The call in STMT might prevent it from being removed in dce.
2595 We however cannot remove it here, due to the way the ssa name
2596 it defines is mapped to the new definition. So just replace
2597 rhs of the statement with something harmless. */
2598
2599 if (slp_node)
2600 return true;
2601
2602 type = TREE_TYPE (scalar_dest);
2603 if (is_pattern_stmt_p (stmt_info))
2604 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2605 else
2606 lhs = gimple_call_lhs (stmt);
2607 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2608 set_vinfo_for_stmt (new_stmt, stmt_info);
2609 set_vinfo_for_stmt (stmt, NULL);
2610 STMT_VINFO_STMT (stmt_info) = new_stmt;
2611 gsi_replace (gsi, new_stmt, false);
2612
2613 return true;
2614}
2615
2616
2617struct simd_call_arg_info
2618{
2619 tree vectype;
2620 tree op;
2621 enum vect_def_type dt;
2622 HOST_WIDE_INT linear_step;
2623 unsigned int align;
2624};
2625
2626/* Function vectorizable_simd_clone_call.
2627
2628 Check if STMT performs a function call that can be vectorized
2629 by calling a simd clone of the function.
2630 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2631 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2632 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2633
2634static bool
2635vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2636 gimple *vec_stmt, slp_tree slp_node)
2637{
2638 tree vec_dest;
2639 tree scalar_dest;
2640 tree op, type;
2641 tree vec_oprnd0 = NULL_TREE;
2642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2643 tree vectype;
2644 unsigned int nunits;
2645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2646 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2647 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2648 tree fndecl, new_temp, def;
2649 gimple def_stmt;
2650 gimple new_stmt = NULL;
2651 int ncopies, j;
2652 vec<simd_call_arg_info> arginfo = vNULL;
2653 vec<tree> vargs = vNULL;
2654 size_t i, nargs;
2655 tree lhs, rtype, ratype;
2656 vec<constructor_elt, va_gc> *ret_ctor_elts;
2657
2658 /* Is STMT a vectorizable call? */
2659 if (!is_gimple_call (stmt))
2660 return false;
2661
2662 fndecl = gimple_call_fndecl (stmt);
2663 if (fndecl == NULL_TREE)
2664 return false;
2665
2666 struct cgraph_node *node = cgraph_node::get (fndecl);
2667 if (node == NULL || node->simd_clones == NULL)
2668 return false;
2669
2670 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2671 return false;
2672
2673 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2674 return false;
2675
2676 if (gimple_call_lhs (stmt)
2677 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2678 return false;
2679
2680 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2681
2682 vectype = STMT_VINFO_VECTYPE (stmt_info);
2683
2684 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2685 return false;
2686
2687 /* FORNOW */
2688 if (slp_node || PURE_SLP_STMT (stmt_info))
2689 return false;
2690
2691 /* Process function arguments. */
2692 nargs = gimple_call_num_args (stmt);
2693
2694 /* Bail out if the function has zero arguments. */
2695 if (nargs == 0)
2696 return false;
2697
2698 arginfo.create (nargs);
2699
2700 for (i = 0; i < nargs; i++)
2701 {
2702 simd_call_arg_info thisarginfo;
2703 affine_iv iv;
2704
2705 thisarginfo.linear_step = 0;
2706 thisarginfo.align = 0;
2707 thisarginfo.op = NULL_TREE;
2708
2709 op = gimple_call_arg (stmt, i);
2710 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2711 &def_stmt, &def, &thisarginfo.dt,
2712 &thisarginfo.vectype)
2713 || thisarginfo.dt == vect_uninitialized_def)
2714 {
2715 if (dump_enabled_p ())
2716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2717 "use not simple.\n");
2718 arginfo.release ();
2719 return false;
2720 }
2721
2722 if (thisarginfo.dt == vect_constant_def
2723 || thisarginfo.dt == vect_external_def)
2724 gcc_assert (thisarginfo.vectype == NULL_TREE);
2725 else
2726 gcc_assert (thisarginfo.vectype != NULL_TREE);
2727
2728 /* For linear arguments, the analyze phase should have saved
2729 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2730 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2731 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2732 {
2733 gcc_assert (vec_stmt);
2734 thisarginfo.linear_step
2735 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2736 thisarginfo.op
2737 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2738 /* If loop has been peeled for alignment, we need to adjust it. */
2739 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2740 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2741 if (n1 != n2)
2742 {
2743 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2744 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2745 tree opt = TREE_TYPE (thisarginfo.op);
2746 bias = fold_convert (TREE_TYPE (step), bias);
2747 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2748 thisarginfo.op
2749 = fold_build2 (POINTER_TYPE_P (opt)
2750 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2751 thisarginfo.op, bias);
2752 }
2753 }
2754 else if (!vec_stmt
2755 && thisarginfo.dt != vect_constant_def
2756 && thisarginfo.dt != vect_external_def
2757 && loop_vinfo
2758 && TREE_CODE (op) == SSA_NAME
2759 && simple_iv (loop, loop_containing_stmt (stmt), op,
2760 &iv, false)
2761 && tree_fits_shwi_p (iv.step))
2762 {
2763 thisarginfo.linear_step = tree_to_shwi (iv.step);
2764 thisarginfo.op = iv.base;
2765 }
2766 else if ((thisarginfo.dt == vect_constant_def
2767 || thisarginfo.dt == vect_external_def)
2768 && POINTER_TYPE_P (TREE_TYPE (op)))
2769 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2770
2771 arginfo.quick_push (thisarginfo);
2772 }
2773
2774 unsigned int badness = 0;
2775 struct cgraph_node *bestn = NULL;
2776 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2777 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2778 else
2779 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2780 n = n->simdclone->next_clone)
2781 {
2782 unsigned int this_badness = 0;
2783 if (n->simdclone->simdlen
2784 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2785 || n->simdclone->nargs != nargs)
2786 continue;
2787 if (n->simdclone->simdlen
2788 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2789 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2790 - exact_log2 (n->simdclone->simdlen)) * 1024;
2791 if (n->simdclone->inbranch)
2792 this_badness += 2048;
2793 int target_badness = targetm.simd_clone.usable (n);
2794 if (target_badness < 0)
2795 continue;
2796 this_badness += target_badness * 512;
2797 /* FORNOW: Have to add code to add the mask argument. */
2798 if (n->simdclone->inbranch)
2799 continue;
2800 for (i = 0; i < nargs; i++)
2801 {
2802 switch (n->simdclone->args[i].arg_type)
2803 {
2804 case SIMD_CLONE_ARG_TYPE_VECTOR:
2805 if (!useless_type_conversion_p
2806 (n->simdclone->args[i].orig_type,
2807 TREE_TYPE (gimple_call_arg (stmt, i))))
2808 i = -1;
2809 else if (arginfo[i].dt == vect_constant_def
2810 || arginfo[i].dt == vect_external_def
2811 || arginfo[i].linear_step)
2812 this_badness += 64;
2813 break;
2814 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2815 if (arginfo[i].dt != vect_constant_def
2816 && arginfo[i].dt != vect_external_def)
2817 i = -1;
2818 break;
2819 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2820 if (arginfo[i].dt == vect_constant_def
2821 || arginfo[i].dt == vect_external_def
2822 || (arginfo[i].linear_step
2823 != n->simdclone->args[i].linear_step))
2824 i = -1;
2825 break;
2826 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2827 /* FORNOW */
2828 i = -1;
2829 break;
2830 case SIMD_CLONE_ARG_TYPE_MASK:
2831 gcc_unreachable ();
2832 }
2833 if (i == (size_t) -1)
2834 break;
2835 if (n->simdclone->args[i].alignment > arginfo[i].align)
2836 {
2837 i = -1;
2838 break;
2839 }
2840 if (arginfo[i].align)
2841 this_badness += (exact_log2 (arginfo[i].align)
2842 - exact_log2 (n->simdclone->args[i].alignment));
2843 }
2844 if (i == (size_t) -1)
2845 continue;
2846 if (bestn == NULL || this_badness < badness)
2847 {
2848 bestn = n;
2849 badness = this_badness;
2850 }
2851 }
2852
2853 if (bestn == NULL)
2854 {
2855 arginfo.release ();
2856 return false;
2857 }
2858
2859 for (i = 0; i < nargs; i++)
2860 if ((arginfo[i].dt == vect_constant_def
2861 || arginfo[i].dt == vect_external_def)
2862 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2863 {
2864 arginfo[i].vectype
2865 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2866 i)));
2867 if (arginfo[i].vectype == NULL
2868 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2869 > bestn->simdclone->simdlen))
2870 {
2871 arginfo.release ();
2872 return false;
2873 }
2874 }
2875
2876 fndecl = bestn->decl;
2877 nunits = bestn->simdclone->simdlen;
2878 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2879
2880 /* If the function isn't const, only allow it in simd loops where user
2881 has asserted that at least nunits consecutive iterations can be
2882 performed using SIMD instructions. */
2883 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2884 && gimple_vuse (stmt))
2885 {
2886 arginfo.release ();
2887 return false;
2888 }
2889
2890 /* Sanity check: make sure that at least one copy of the vectorized stmt
2891 needs to be generated. */
2892 gcc_assert (ncopies >= 1);
2893
2894 if (!vec_stmt) /* transformation not required. */
2895 {
2896 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2897 for (i = 0; i < nargs; i++)
2898 if (bestn->simdclone->args[i].arg_type
2899 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2900 {
2901 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2902 + 1);
2903 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2904 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2905 ? size_type_node : TREE_TYPE (arginfo[i].op);
2906 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2907 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2908 }
2909 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2910 if (dump_enabled_p ())
2911 dump_printf_loc (MSG_NOTE, vect_location,
2912 "=== vectorizable_simd_clone_call ===\n");
2913/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2914 arginfo.release ();
2915 return true;
2916 }
2917
2918 /** Transform. **/
2919
2920 if (dump_enabled_p ())
2921 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2922
2923 /* Handle def. */
2924 scalar_dest = gimple_call_lhs (stmt);
2925 vec_dest = NULL_TREE;
2926 rtype = NULL_TREE;
2927 ratype = NULL_TREE;
2928 if (scalar_dest)
2929 {
2930 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2931 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2932 if (TREE_CODE (rtype) == ARRAY_TYPE)
2933 {
2934 ratype = rtype;
2935 rtype = TREE_TYPE (ratype);
2936 }
2937 }
2938
2939 prev_stmt_info = NULL;
2940 for (j = 0; j < ncopies; ++j)
2941 {
2942 /* Build argument list for the vectorized call. */
2943 if (j == 0)
2944 vargs.create (nargs);
2945 else
2946 vargs.truncate (0);
2947
2948 for (i = 0; i < nargs; i++)
2949 {
2950 unsigned int k, l, m, o;
2951 tree atype;
2952 op = gimple_call_arg (stmt, i);
2953 switch (bestn->simdclone->args[i].arg_type)
2954 {
2955 case SIMD_CLONE_ARG_TYPE_VECTOR:
2956 atype = bestn->simdclone->args[i].vector_type;
2957 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2958 for (m = j * o; m < (j + 1) * o; m++)
2959 {
2960 if (TYPE_VECTOR_SUBPARTS (atype)
2961 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2962 {
2963 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2964 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2965 / TYPE_VECTOR_SUBPARTS (atype));
2966 gcc_assert ((k & (k - 1)) == 0);
2967 if (m == 0)
2968 vec_oprnd0
2969 = vect_get_vec_def_for_operand (op, stmt, NULL);
2970 else
2971 {
2972 vec_oprnd0 = arginfo[i].op;
2973 if ((m & (k - 1)) == 0)
2974 vec_oprnd0
2975 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2976 vec_oprnd0);
2977 }
2978 arginfo[i].op = vec_oprnd0;
2979 vec_oprnd0
2980 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2981 size_int (prec),
2982 bitsize_int ((m & (k - 1)) * prec));
2983 new_stmt
2984 = gimple_build_assign (make_ssa_name (atype),
2985 vec_oprnd0);
2986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2987 vargs.safe_push (gimple_assign_lhs (new_stmt));
2988 }
2989 else
2990 {
2991 k = (TYPE_VECTOR_SUBPARTS (atype)
2992 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2993 gcc_assert ((k & (k - 1)) == 0);
2994 vec<constructor_elt, va_gc> *ctor_elts;
2995 if (k != 1)
2996 vec_alloc (ctor_elts, k);
2997 else
2998 ctor_elts = NULL;
2999 for (l = 0; l < k; l++)
3000 {
3001 if (m == 0 && l == 0)
3002 vec_oprnd0
3003 = vect_get_vec_def_for_operand (op, stmt, NULL);
3004 else
3005 vec_oprnd0
3006 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3007 arginfo[i].op);
3008 arginfo[i].op = vec_oprnd0;
3009 if (k == 1)
3010 break;
3011 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3012 vec_oprnd0);
3013 }
3014 if (k == 1)
3015 vargs.safe_push (vec_oprnd0);
3016 else
3017 {
3018 vec_oprnd0 = build_constructor (atype, ctor_elts);
3019 new_stmt
3020 = gimple_build_assign (make_ssa_name (atype),
3021 vec_oprnd0);
3022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3023 vargs.safe_push (gimple_assign_lhs (new_stmt));
3024 }
3025 }
3026 }
3027 break;
3028 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3029 vargs.safe_push (op);
3030 break;
3031 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3032 if (j == 0)
3033 {
3034 gimple_seq stmts;
3035 arginfo[i].op
3036 = force_gimple_operand (arginfo[i].op, &stmts, true,
3037 NULL_TREE);
3038 if (stmts != NULL)
3039 {
3040 basic_block new_bb;
3041 edge pe = loop_preheader_edge (loop);
3042 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3043 gcc_assert (!new_bb);
3044 }
3045 tree phi_res = copy_ssa_name (op);
3046 gphi *new_phi = create_phi_node (phi_res, loop->header);
3047 set_vinfo_for_stmt (new_phi,
3048 new_stmt_vec_info (new_phi, loop_vinfo,
3049 NULL));
3050 add_phi_arg (new_phi, arginfo[i].op,
3051 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3052 enum tree_code code
3053 = POINTER_TYPE_P (TREE_TYPE (op))
3054 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3055 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3056 ? sizetype : TREE_TYPE (op);
3057 widest_int cst
3058 = wi::mul (bestn->simdclone->args[i].linear_step,
3059 ncopies * nunits);
3060 tree tcst = wide_int_to_tree (type, cst);
3061 tree phi_arg = copy_ssa_name (op);
3062 new_stmt
3063 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3064 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3065 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3066 set_vinfo_for_stmt (new_stmt,
3067 new_stmt_vec_info (new_stmt, loop_vinfo,
3068 NULL));
3069 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3070 UNKNOWN_LOCATION);
3071 arginfo[i].op = phi_res;
3072 vargs.safe_push (phi_res);
3073 }
3074 else
3075 {
3076 enum tree_code code
3077 = POINTER_TYPE_P (TREE_TYPE (op))
3078 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3079 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3080 ? sizetype : TREE_TYPE (op);
3081 widest_int cst
3082 = wi::mul (bestn->simdclone->args[i].linear_step,
3083 j * nunits);
3084 tree tcst = wide_int_to_tree (type, cst);
3085 new_temp = make_ssa_name (TREE_TYPE (op));
3086 new_stmt = gimple_build_assign (new_temp, code,
3087 arginfo[i].op, tcst);
3088 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3089 vargs.safe_push (new_temp);
3090 }
3091 break;
3092 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3093 default:
3094 gcc_unreachable ();
3095 }
3096 }
3097
3098 new_stmt = gimple_build_call_vec (fndecl, vargs);
3099 if (vec_dest)
3100 {
3101 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3102 if (ratype)
3103 new_temp = create_tmp_var (ratype);
3104 else if (TYPE_VECTOR_SUBPARTS (vectype)
3105 == TYPE_VECTOR_SUBPARTS (rtype))
3106 new_temp = make_ssa_name (vec_dest, new_stmt);
3107 else
3108 new_temp = make_ssa_name (rtype, new_stmt);
3109 gimple_call_set_lhs (new_stmt, new_temp);
3110 }
3111 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3112
3113 if (vec_dest)
3114 {
3115 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3116 {
3117 unsigned int k, l;
3118 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3119 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3120 gcc_assert ((k & (k - 1)) == 0);
3121 for (l = 0; l < k; l++)
3122 {
3123 tree t;
3124 if (ratype)
3125 {
3126 t = build_fold_addr_expr (new_temp);
3127 t = build2 (MEM_REF, vectype, t,
3128 build_int_cst (TREE_TYPE (t),
3129 l * prec / BITS_PER_UNIT));
3130 }
3131 else
3132 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3133 size_int (prec), bitsize_int (l * prec));
3134 new_stmt
3135 = gimple_build_assign (make_ssa_name (vectype), t);
3136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3137 if (j == 0 && l == 0)
3138 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3139 else
3140 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3141
3142 prev_stmt_info = vinfo_for_stmt (new_stmt);
3143 }
3144
3145 if (ratype)
3146 {
3147 tree clobber = build_constructor (ratype, NULL);
3148 TREE_THIS_VOLATILE (clobber) = 1;
3149 new_stmt = gimple_build_assign (new_temp, clobber);
3150 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3151 }
3152 continue;
3153 }
3154 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3155 {
3156 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3157 / TYPE_VECTOR_SUBPARTS (rtype));
3158 gcc_assert ((k & (k - 1)) == 0);
3159 if ((j & (k - 1)) == 0)
3160 vec_alloc (ret_ctor_elts, k);
3161 if (ratype)
3162 {
3163 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3164 for (m = 0; m < o; m++)
3165 {
3166 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3167 size_int (m), NULL_TREE, NULL_TREE);
3168 new_stmt
3169 = gimple_build_assign (make_ssa_name (rtype), tem);
3170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3171 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3172 gimple_assign_lhs (new_stmt));
3173 }
3174 tree clobber = build_constructor (ratype, NULL);
3175 TREE_THIS_VOLATILE (clobber) = 1;
3176 new_stmt = gimple_build_assign (new_temp, clobber);
3177 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3178 }
3179 else
3180 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3181 if ((j & (k - 1)) != k - 1)
3182 continue;
3183 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3184 new_stmt
3185 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3186 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3187
3188 if ((unsigned) j == k - 1)
3189 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3190 else
3191 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3192
3193 prev_stmt_info = vinfo_for_stmt (new_stmt);
3194 continue;
3195 }
3196 else if (ratype)
3197 {
3198 tree t = build_fold_addr_expr (new_temp);
3199 t = build2 (MEM_REF, vectype, t,
3200 build_int_cst (TREE_TYPE (t), 0));
3201 new_stmt
3202 = gimple_build_assign (make_ssa_name (vec_dest), t);
3203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3204 tree clobber = build_constructor (ratype, NULL);
3205 TREE_THIS_VOLATILE (clobber) = 1;
3206 vect_finish_stmt_generation (stmt,
3207 gimple_build_assign (new_temp,
3208 clobber), gsi);
3209 }
3210 }
3211
3212 if (j == 0)
3213 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3214 else
3215 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3216
3217 prev_stmt_info = vinfo_for_stmt (new_stmt);
3218 }
3219
3220 vargs.release ();
3221
3222 /* The call in STMT might prevent it from being removed in dce.
3223 We however cannot remove it here, due to the way the ssa name
3224 it defines is mapped to the new definition. So just replace
3225 rhs of the statement with something harmless. */
3226
3227 if (slp_node)
3228 return true;
3229
3230 if (scalar_dest)
3231 {
3232 type = TREE_TYPE (scalar_dest);
3233 if (is_pattern_stmt_p (stmt_info))
3234 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3235 else
3236 lhs = gimple_call_lhs (stmt);
3237 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3238 }
3239 else
3240 new_stmt = gimple_build_nop ();
3241 set_vinfo_for_stmt (new_stmt, stmt_info);
3242 set_vinfo_for_stmt (stmt, NULL);
3243 STMT_VINFO_STMT (stmt_info) = new_stmt;
3244 gsi_replace (gsi, new_stmt, true);
3245 unlink_stmt_vdef (stmt);
3246
3247 return true;
3248}
3249
3250
3251/* Function vect_gen_widened_results_half
3252
3253 Create a vector stmt whose code, type, number of arguments, and result
3254 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3255 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3256 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3257 needs to be created (DECL is a function-decl of a target-builtin).
3258 STMT is the original scalar stmt that we are vectorizing. */
3259
3260static gimple
3261vect_gen_widened_results_half (enum tree_code code,
3262 tree decl,
3263 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3264 tree vec_dest, gimple_stmt_iterator *gsi,
3265 gimple stmt)
3266{
3267 gimple new_stmt;
3268 tree new_temp;
3269
3270 /* Generate half of the widened result: */
3271 if (code == CALL_EXPR)
3272 {
3273 /* Target specific support */
3274 if (op_type == binary_op)
3275 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3276 else
3277 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3278 new_temp = make_ssa_name (vec_dest, new_stmt);
3279 gimple_call_set_lhs (new_stmt, new_temp);
3280 }
3281 else
3282 {
3283 /* Generic support */
3284 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3285 if (op_type != binary_op)
3286 vec_oprnd1 = NULL;
3287 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3288 new_temp = make_ssa_name (vec_dest, new_stmt);
3289 gimple_assign_set_lhs (new_stmt, new_temp);
3290 }
3291 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3292
3293 return new_stmt;
3294}
3295
3296
3297/* Get vectorized definitions for loop-based vectorization. For the first
3298 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3299 scalar operand), and for the rest we get a copy with
3300 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3301 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3302 The vectors are collected into VEC_OPRNDS. */
3303
3304static void
3305vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3306 vec<tree> *vec_oprnds, int multi_step_cvt)
3307{
3308 tree vec_oprnd;
3309
3310 /* Get first vector operand. */
3311 /* All the vector operands except the very first one (that is scalar oprnd)
3312 are stmt copies. */
3313 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3314 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3315 else
3316 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3317
3318 vec_oprnds->quick_push (vec_oprnd);
3319
3320 /* Get second vector operand. */
3321 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3322 vec_oprnds->quick_push (vec_oprnd);
3323
3324 *oprnd = vec_oprnd;
3325
3326 /* For conversion in multiple steps, continue to get operands
3327 recursively. */
3328 if (multi_step_cvt)
3329 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3330}
3331
3332
3333/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3334 For multi-step conversions store the resulting vectors and call the function
3335 recursively. */
3336
3337static void
3338vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3339 int multi_step_cvt, gimple stmt,
3340 vec<tree> vec_dsts,
3341 gimple_stmt_iterator *gsi,
3342 slp_tree slp_node, enum tree_code code,
3343 stmt_vec_info *prev_stmt_info)
3344{
3345 unsigned int i;
3346 tree vop0, vop1, new_tmp, vec_dest;
3347 gimple new_stmt;
3348 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3349
3350 vec_dest = vec_dsts.pop ();
3351
3352 for (i = 0; i < vec_oprnds->length (); i += 2)
3353 {
3354 /* Create demotion operation. */
3355 vop0 = (*vec_oprnds)[i];
3356 vop1 = (*vec_oprnds)[i + 1];
3357 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3358 new_tmp = make_ssa_name (vec_dest, new_stmt);
3359 gimple_assign_set_lhs (new_stmt, new_tmp);
3360 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361
3362 if (multi_step_cvt)
3363 /* Store the resulting vector for next recursive call. */
3364 (*vec_oprnds)[i/2] = new_tmp;
3365 else
3366 {
3367 /* This is the last step of the conversion sequence. Store the
3368 vectors in SLP_NODE or in vector info of the scalar statement
3369 (or in STMT_VINFO_RELATED_STMT chain). */
3370 if (slp_node)
3371 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3372 else
3373 {
3374 if (!*prev_stmt_info)
3375 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3376 else
3377 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3378
3379 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3380 }
3381 }
3382 }
3383
3384 /* For multi-step demotion operations we first generate demotion operations
3385 from the source type to the intermediate types, and then combine the
3386 results (stored in VEC_OPRNDS) in demotion operation to the destination
3387 type. */
3388 if (multi_step_cvt)
3389 {
3390 /* At each level of recursion we have half of the operands we had at the
3391 previous level. */
3392 vec_oprnds->truncate ((i+1)/2);
3393 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3394 stmt, vec_dsts, gsi, slp_node,
3395 VEC_PACK_TRUNC_EXPR,
3396 prev_stmt_info);
3397 }
3398
3399 vec_dsts.quick_push (vec_dest);
3400}
3401
3402
3403/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3404 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3405 the resulting vectors and call the function recursively. */
3406
3407static void
3408vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3409 vec<tree> *vec_oprnds1,
3410 gimple stmt, tree vec_dest,
3411 gimple_stmt_iterator *gsi,
3412 enum tree_code code1,
3413 enum tree_code code2, tree decl1,
3414 tree decl2, int op_type)
3415{
3416 int i;
3417 tree vop0, vop1, new_tmp1, new_tmp2;
3418 gimple new_stmt1, new_stmt2;
3419 vec<tree> vec_tmp = vNULL;
3420
3421 vec_tmp.create (vec_oprnds0->length () * 2);
3422 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3423 {
3424 if (op_type == binary_op)
3425 vop1 = (*vec_oprnds1)[i];
3426 else
3427 vop1 = NULL_TREE;
3428
3429 /* Generate the two halves of promotion operation. */
3430 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3431 op_type, vec_dest, gsi, stmt);
3432 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3433 op_type, vec_dest, gsi, stmt);
3434 if (is_gimple_call (new_stmt1))
3435 {
3436 new_tmp1 = gimple_call_lhs (new_stmt1);
3437 new_tmp2 = gimple_call_lhs (new_stmt2);
3438 }
3439 else
3440 {
3441 new_tmp1 = gimple_assign_lhs (new_stmt1);
3442 new_tmp2 = gimple_assign_lhs (new_stmt2);
3443 }
3444
3445 /* Store the results for the next step. */
3446 vec_tmp.quick_push (new_tmp1);
3447 vec_tmp.quick_push (new_tmp2);
3448 }
3449
3450 vec_oprnds0->release ();
3451 *vec_oprnds0 = vec_tmp;
3452}
3453
3454
3455/* Check if STMT performs a conversion operation, that can be vectorized.
3456 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3457 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3458 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3459
3460static bool
3461vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3462 gimple *vec_stmt, slp_tree slp_node)
3463{
3464 tree vec_dest;
3465 tree scalar_dest;
3466 tree op0, op1 = NULL_TREE;
3467 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3468 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3469 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3470 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3471 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3472 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3473 tree new_temp;
3474 tree def;
3475 gimple def_stmt;
3476 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3477 gimple new_stmt = NULL;
3478 stmt_vec_info prev_stmt_info;
3479 int nunits_in;
3480 int nunits_out;
3481 tree vectype_out, vectype_in;
3482 int ncopies, i, j;
3483 tree lhs_type, rhs_type;
3484 enum { NARROW, NONE, WIDEN } modifier;
3485 vec<tree> vec_oprnds0 = vNULL;
3486 vec<tree> vec_oprnds1 = vNULL;
3487 tree vop0;
3488 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3489 int multi_step_cvt = 0;
3490 vec<tree> vec_dsts = vNULL;
3491 vec<tree> interm_types = vNULL;
3492 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3493 int op_type;
3494 machine_mode rhs_mode;
3495 unsigned short fltsz;
3496
3497 /* Is STMT a vectorizable conversion? */
3498
3499 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3500 return false;
3501
3502 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3503 return false;
3504
3505 if (!is_gimple_assign (stmt))
3506 return false;
3507
3508 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3509 return false;
3510
3511 code = gimple_assign_rhs_code (stmt);
3512 if (!CONVERT_EXPR_CODE_P (code)
3513 && code != FIX_TRUNC_EXPR
3514 && code != FLOAT_EXPR
3515 && code != WIDEN_MULT_EXPR
3516 && code != WIDEN_LSHIFT_EXPR)
3517 return false;
3518
3519 op_type = TREE_CODE_LENGTH (code);
3520
3521 /* Check types of lhs and rhs. */
3522 scalar_dest = gimple_assign_lhs (stmt);
3523 lhs_type = TREE_TYPE (scalar_dest);
3524 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3525
3526 op0 = gimple_assign_rhs1 (stmt);
3527 rhs_type = TREE_TYPE (op0);
3528
3529 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3530 && !((INTEGRAL_TYPE_P (lhs_type)
3531 && INTEGRAL_TYPE_P (rhs_type))
3532 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3533 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3534 return false;
3535
3536 if ((INTEGRAL_TYPE_P (lhs_type)
3537 && (TYPE_PRECISION (lhs_type)
3538 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3539 || (INTEGRAL_TYPE_P (rhs_type)
3540 && (TYPE_PRECISION (rhs_type)
3541 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3542 {
3543 if (dump_enabled_p ())
3544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3545 "type conversion to/from bit-precision unsupported."
3546 "\n");
3547 return false;
3548 }
3549
3550 /* Check the operands of the operation. */
3551 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3552 &def_stmt, &def, &dt[0], &vectype_in))
3553 {
3554 if (dump_enabled_p ())
3555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3556 "use not simple.\n");
3557 return false;
3558 }
3559 if (op_type == binary_op)
3560 {
3561 bool ok;
3562
3563 op1 = gimple_assign_rhs2 (stmt);
3564 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3565 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3566 OP1. */
3567 if (CONSTANT_CLASS_P (op0))
3568 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3569 &def_stmt, &def, &dt[1], &vectype_in);
3570 else
3571 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3572 &def, &dt[1]);
3573
3574 if (!ok)
3575 {
3576 if (dump_enabled_p ())
3577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3578 "use not simple.\n");
3579 return false;
3580 }
3581 }
3582
3583 /* If op0 is an external or constant defs use a vector type of
3584 the same size as the output vector type. */
3585 if (!vectype_in)
3586 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3587 if (vec_stmt)
3588 gcc_assert (vectype_in);
3589 if (!vectype_in)
3590 {
3591 if (dump_enabled_p ())
3592 {
3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3594 "no vectype for scalar type ");
3595 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3596 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3597 }
3598
3599 return false;
3600 }
3601
3602 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3603 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3604 if (nunits_in < nunits_out)
3605 modifier = NARROW;
3606 else if (nunits_out == nunits_in)
3607 modifier = NONE;
3608 else
3609 modifier = WIDEN;
3610
3611 /* Multiple types in SLP are handled by creating the appropriate number of
3612 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3613 case of SLP. */
3614 if (slp_node || PURE_SLP_STMT (stmt_info))
3615 ncopies = 1;
3616 else if (modifier == NARROW)
3617 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3618 else
3619 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3620
3621 /* Sanity check: make sure that at least one copy of the vectorized stmt
3622 needs to be generated. */
3623 gcc_assert (ncopies >= 1);
3624
3625 /* Supportable by target? */
3626 switch (modifier)
3627 {
3628 case NONE:
3629 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3630 return false;
3631 if (supportable_convert_operation (code, vectype_out, vectype_in,
3632 &decl1, &code1))
3633 break;
3634 /* FALLTHRU */
3635 unsupported:
3636 if (dump_enabled_p ())
3637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3638 "conversion not supported by target.\n");
3639 return false;
3640
3641 case WIDEN:
3642 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3643 &code1, &code2, &multi_step_cvt,
3644 &interm_types))
3645 {
3646 /* Binary widening operation can only be supported directly by the
3647 architecture. */
3648 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3649 break;
3650 }
3651
3652 if (code != FLOAT_EXPR
3653 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3654 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3655 goto unsupported;
3656
3657 rhs_mode = TYPE_MODE (rhs_type);
3658 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3659 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3660 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3661 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3662 {
3663 cvt_type
3664 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3665 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3666 if (cvt_type == NULL_TREE)
3667 goto unsupported;
3668
3669 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3670 {
3671 if (!supportable_convert_operation (code, vectype_out,
3672 cvt_type, &decl1, &codecvt1))
3673 goto unsupported;
3674 }
3675 else if (!supportable_widening_operation (code, stmt, vectype_out,
3676 cvt_type, &codecvt1,
3677 &codecvt2, &multi_step_cvt,
3678 &interm_types))
3679 continue;
3680 else
3681 gcc_assert (multi_step_cvt == 0);
3682
3683 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3684 vectype_in, &code1, &code2,
3685 &multi_step_cvt, &interm_types))
3686 break;
3687 }
3688
3689 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3690 goto unsupported;
3691
3692 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3693 codecvt2 = ERROR_MARK;
3694 else
3695 {
3696 multi_step_cvt++;
3697 interm_types.safe_push (cvt_type);
3698 cvt_type = NULL_TREE;
3699 }
3700 break;
3701
3702 case NARROW:
3703 gcc_assert (op_type == unary_op);
3704 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3705 &code1, &multi_step_cvt,
3706 &interm_types))
3707 break;
3708
3709 if (code != FIX_TRUNC_EXPR
3710 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))