LLVM 20.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/Twine.h"
35#include "llvm/ADT/ilist.h"
36#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <string>
50
51namespace llvm {
52
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56class IRBuilderBase;
57class LoopInfo;
58class raw_ostream;
59class RecurrenceDescriptor;
60class SCEV;
61class Type;
62class VPBasicBlock;
63class VPRegionBlock;
64class VPlan;
65class VPReplicateRecipe;
66class VPlanSlp;
67class Value;
68class LoopVectorizationCostModel;
69class LoopVersioning;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77/// Returns a calculation for the total number of elements for a given \p VF.
78/// For fixed width vectors this value is a constant, whereas for scalable
79/// vectors it is an expression determined at runtime.
80Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
81
82/// Return a value for Step multiplied by VF.
83Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84 int64_t Step);
85
86/// A helper function that returns the reciprocal of the block probability of
87/// predicated blocks. If we return X, we are assuming the predicated block
88/// will execute once for every X iterations of the loop header.
89///
90/// TODO: We should use actual block probability here, if available. Currently,
91/// we always assume predicated blocks have a 50% chance of executing.
92inline unsigned getReciprocalPredBlockProb() { return 2; }
93
94/// A range of powers-of-2 vectorization factors with fixed start and
95/// adjustable end. The range includes start and excludes end, e.g.,:
96/// [1, 16) = {1, 2, 4, 8}
97struct VFRange {
98 // A power of 2.
100
101 // A power of 2. If End <= Start range is empty.
103
104 bool isEmpty() const {
106 }
107
109 : Start(Start), End(End) {
111 "Both Start and End should have the same scalable flag");
113 "Expected Start to be a power of 2");
115 "Expected End to be a power of 2");
116 }
117
118 /// Iterator to iterate over vectorization factors in a VFRange.
120 : public iterator_facade_base<iterator, std::forward_iterator_tag,
121 ElementCount> {
122 ElementCount VF;
123
124 public:
125 iterator(ElementCount VF) : VF(VF) {}
126
127 bool operator==(const iterator &Other) const { return VF == Other.VF; }
128
129 ElementCount operator*() const { return VF; }
130
132 VF *= 2;
133 return *this;
134 }
135 };
136
140 return iterator(End);
141 }
142};
143
144using VPlanPtr = std::unique_ptr<VPlan>;
145
146/// In what follows, the term "input IR" refers to code that is fed into the
147/// vectorizer whereas the term "output IR" refers to code that is generated by
148/// the vectorizer.
149
150/// VPLane provides a way to access lanes in both fixed width and scalable
151/// vectors, where for the latter the lane index sometimes needs calculating
152/// as a runtime expression.
153class VPLane {
154public:
155 /// Kind describes how to interpret Lane.
156 enum class Kind : uint8_t {
157 /// For First, Lane is the index into the first N elements of a
158 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
159 First,
160 /// For ScalableLast, Lane is the offset from the start of the last
161 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
162 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
163 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
165 };
166
167private:
168 /// in [0..VF)
169 unsigned Lane;
170
171 /// Indicates how the Lane should be interpreted, as described above.
172 Kind LaneKind;
173
174public:
175 VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
176 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
177
179
180 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
181 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
182 "trying to extract with invalid offset");
183 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
184 Kind LaneKind;
185 if (VF.isScalable())
186 // In this case 'LaneOffset' refers to the offset from the start of the
187 // last subvector with VF.getKnownMinValue() elements.
189 else
190 LaneKind = VPLane::Kind::First;
191 return VPLane(LaneOffset, LaneKind);
192 }
193
195 return getLaneFromEnd(VF, 1);
196 }
197
198 /// Returns a compile-time known value for the lane index and asserts if the
199 /// lane can only be calculated at runtime.
200 unsigned getKnownLane() const {
201 assert(LaneKind == Kind::First);
202 return Lane;
203 }
204
205 /// Returns an expression describing the lane index that can be used at
206 /// runtime.
207 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
208
209 /// Returns the Kind of lane offset.
210 Kind getKind() const { return LaneKind; }
211
212 /// Returns true if this is the first lane of the whole vector.
213 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
214
215 /// Maps the lane to a cache index based on \p VF.
216 unsigned mapToCacheIndex(const ElementCount &VF) const {
217 switch (LaneKind) {
219 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
220 return VF.getKnownMinValue() + Lane;
221 default:
222 assert(Lane < VF.getKnownMinValue());
223 return Lane;
224 }
225 }
226};
227
228/// VPTransformState holds information passed down when "executing" a VPlan,
229/// needed for generating the output IR.
234 Loop *CurrentParentLoop, Type *CanonicalIVTy);
235 /// Target Transform Info.
237
238 /// The chosen Vectorization Factor of the loop being vectorized.
240
241 /// Hold the index to generate specific scalar instructions. Null indicates
242 /// that all instances are to be generated, using either scalar or vector
243 /// instructions.
244 std::optional<VPLane> Lane;
245
246 struct DataState {
247 // Each value from the original loop, when vectorized, is represented by a
248 // vector value in the map.
250
253
254 /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
255 /// is false, otherwise return the generated scalar. \See set.
256 Value *get(VPValue *Def, bool IsScalar = false);
257
258 /// Get the generated Value for a given VPValue and given Part and Lane.
259 Value *get(VPValue *Def, const VPLane &Lane);
260
261 bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
262
264 auto I = Data.VPV2Scalars.find(Def);
265 if (I == Data.VPV2Scalars.end())
266 return false;
267 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
268 return CacheIdx < I->second.size() && I->second[CacheIdx];
269 }
270
271 /// Set the generated vector Value for a given VPValue, if \p
272 /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
273 void set(VPValue *Def, Value *V, bool IsScalar = false) {
274 if (IsScalar) {
275 set(Def, V, VPLane(0));
276 return;
277 }
278 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
279 "scalar values must be stored as (0, 0)");
280 Data.VPV2Vector[Def] = V;
281 }
282
283 /// Reset an existing vector value for \p Def and a given \p Part.
284 void reset(VPValue *Def, Value *V) {
285 assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
286 Data.VPV2Vector[Def] = V;
287 }
288
289 /// Set the generated scalar \p V for \p Def and the given \p Lane.
290 void set(VPValue *Def, Value *V, const VPLane &Lane) {
291 auto &Scalars = Data.VPV2Scalars[Def];
292 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
293 if (Scalars.size() <= CacheIdx)
294 Scalars.resize(CacheIdx + 1);
295 assert(!Scalars[CacheIdx] && "should overwrite existing value");
296 Scalars[CacheIdx] = V;
297 }
298
299 /// Reset an existing scalar value for \p Def and a given \p Lane.
300 void reset(VPValue *Def, Value *V, const VPLane &Lane) {
301 auto Iter = Data.VPV2Scalars.find(Def);
302 assert(Iter != Data.VPV2Scalars.end() &&
303 "need to overwrite existing value");
304 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
305 assert(CacheIdx < Iter->second.size() &&
306 "need to overwrite existing value");
307 Iter->second[CacheIdx] = V;
308 }
309
310 /// Add additional metadata to \p To that was not present on \p Orig.
311 ///
312 /// Currently this is used to add the noalias annotations based on the
313 /// inserted memchecks. Use this for instructions that are *cloned* into the
314 /// vector loop.
315 void addNewMetadata(Instruction *To, const Instruction *Orig);
316
317 /// Add metadata from one instruction to another.
318 ///
319 /// This includes both the original MDs from \p From and additional ones (\see
320 /// addNewMetadata). Use this for *newly created* instructions in the vector
321 /// loop.
322 void addMetadata(Value *To, Instruction *From);
323
324 /// Set the debug location in the builder using the debug location \p DL.
326
327 /// Construct the vector value of a scalarized value \p V one lane at a time.
328 void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
329
330 /// Hold state information used when constructing the CFG of the output IR,
331 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
332 struct CFGState {
333 /// The previous VPBasicBlock visited. Initially set to null.
335
336 /// The previous IR BasicBlock created or used. Initially set to the new
337 /// header BasicBlock.
338 BasicBlock *PrevBB = nullptr;
339
340 /// The last IR BasicBlock in the output IR. Set to the exit block of the
341 /// vector loop.
342 BasicBlock *ExitBB = nullptr;
343
344 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
345 /// of replication, maps the BasicBlock of the last replica created.
347
348 /// Updater for the DominatorTree.
350
352 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
353
354 /// Returns the BasicBlock* mapped to the pre-header of the loop region
355 /// containing \p R.
358
359 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
361
362 /// Hold a reference to the IRBuilder used to generate output IR code.
364
365 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
367
368 /// Pointer to the VPlan code is generated for.
370
371 /// The parent loop object for the current scope, or nullptr.
373
374 /// LoopVersioning. It's only set up (non-null) if memchecks were
375 /// used.
376 ///
377 /// This is currently only used to add no-alias metadata based on the
378 /// memchecks. The actually versioning is performed manually.
380
381 /// Map SCEVs to their expanded values. Populated when executing
382 /// VPExpandSCEVRecipes.
384
385 /// VPlan-based type analysis.
387};
388
389/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
390/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
392 friend class VPBlockUtils;
393
394 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
395
396 /// An optional name for the block.
397 std::string Name;
398
399 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
400 /// it is a topmost VPBlockBase.
401 VPRegionBlock *Parent = nullptr;
402
403 /// List of predecessor blocks.
405
406 /// List of successor blocks.
408
409 /// VPlan containing the block. Can only be set on the entry block of the
410 /// plan.
411 VPlan *Plan = nullptr;
412
413 /// Add \p Successor as the last successor to this block.
414 void appendSuccessor(VPBlockBase *Successor) {
415 assert(Successor && "Cannot add nullptr successor!");
416 Successors.push_back(Successor);
417 }
418
419 /// Add \p Predecessor as the last predecessor to this block.
420 void appendPredecessor(VPBlockBase *Predecessor) {
421 assert(Predecessor && "Cannot add nullptr predecessor!");
422 Predecessors.push_back(Predecessor);
423 }
424
425 /// Remove \p Predecessor from the predecessors of this block.
426 void removePredecessor(VPBlockBase *Predecessor) {
427 auto Pos = find(Predecessors, Predecessor);
428 assert(Pos && "Predecessor does not exist");
429 Predecessors.erase(Pos);
430 }
431
432 /// Remove \p Successor from the successors of this block.
433 void removeSuccessor(VPBlockBase *Successor) {
434 auto Pos = find(Successors, Successor);
435 assert(Pos && "Successor does not exist");
436 Successors.erase(Pos);
437 }
438
439 /// This function replaces one predecessor with another, useful when
440 /// trying to replace an old block in the CFG with a new one.
441 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
442 auto I = find(Predecessors, Old);
443 assert(I != Predecessors.end());
444 assert(Old->getParent() == New->getParent() &&
445 "replaced predecessor must have the same parent");
446 *I = New;
447 }
448
449 /// This function replaces one successor with another, useful when
450 /// trying to replace an old block in the CFG with a new one.
451 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
452 auto I = find(Successors, Old);
453 assert(I != Successors.end());
454 assert(Old->getParent() == New->getParent() &&
455 "replaced successor must have the same parent");
456 *I = New;
457 }
458
459protected:
460 VPBlockBase(const unsigned char SC, const std::string &N)
461 : SubclassID(SC), Name(N) {}
462
463public:
464 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
465 /// that are actually instantiated. Values of this enumeration are kept in the
466 /// SubclassID field of the VPBlockBase objects. They are used for concrete
467 /// type identification.
468 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
469
471
472 virtual ~VPBlockBase() = default;
473
474 const std::string &getName() const { return Name; }
475
476 void setName(const Twine &newName) { Name = newName.str(); }
477
478 /// \return an ID for the concrete type of this object.
479 /// This is used to implement the classof checks. This should not be used
480 /// for any other purpose, as the values may change as LLVM evolves.
481 unsigned getVPBlockID() const { return SubclassID; }
482
483 VPRegionBlock *getParent() { return Parent; }
484 const VPRegionBlock *getParent() const { return Parent; }
485
486 /// \return A pointer to the plan containing the current block.
487 VPlan *getPlan();
488 const VPlan *getPlan() const;
489
490 /// Sets the pointer of the plan containing the block. The block must be the
491 /// entry block into the VPlan.
492 void setPlan(VPlan *ParentPlan);
493
494 void setParent(VPRegionBlock *P) { Parent = P; }
495
496 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
497 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
498 /// VPBlockBase is a VPBasicBlock, it is returned.
499 const VPBasicBlock *getEntryBasicBlock() const;
501
502 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
503 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
504 /// VPBlockBase is a VPBasicBlock, it is returned.
505 const VPBasicBlock *getExitingBasicBlock() const;
507
508 const VPBlocksTy &getSuccessors() const { return Successors; }
509 VPBlocksTy &getSuccessors() { return Successors; }
510
513
514 const VPBlocksTy &getPredecessors() const { return Predecessors; }
515 VPBlocksTy &getPredecessors() { return Predecessors; }
516
517 /// \return the successor of this VPBlockBase if it has a single successor.
518 /// Otherwise return a null pointer.
520 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
521 }
522
523 /// \return the predecessor of this VPBlockBase if it has a single
524 /// predecessor. Otherwise return a null pointer.
526 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
527 }
528
529 size_t getNumSuccessors() const { return Successors.size(); }
530 size_t getNumPredecessors() const { return Predecessors.size(); }
531
532 /// An Enclosing Block of a block B is any block containing B, including B
533 /// itself. \return the closest enclosing block starting from "this", which
534 /// has successors. \return the root enclosing block if all enclosing blocks
535 /// have no successors.
537
538 /// \return the closest enclosing block starting from "this", which has
539 /// predecessors. \return the root enclosing block if all enclosing blocks
540 /// have no predecessors.
542
543 /// \return the successors either attached directly to this VPBlockBase or, if
544 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
545 /// successors of its own, search recursively for the first enclosing
546 /// VPRegionBlock that has successors and return them. If no such
547 /// VPRegionBlock exists, return the (empty) successors of the topmost
548 /// VPBlockBase reached.
551 }
552
553 /// \return the hierarchical successor of this VPBlockBase if it has a single
554 /// hierarchical successor. Otherwise return a null pointer.
557 }
558
559 /// \return the predecessors either attached directly to this VPBlockBase or,
560 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
561 /// predecessors of its own, search recursively for the first enclosing
562 /// VPRegionBlock that has predecessors and return them. If no such
563 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
564 /// VPBlockBase reached.
567 }
568
569 /// \return the hierarchical predecessor of this VPBlockBase if it has a
570 /// single hierarchical predecessor. Otherwise return a null pointer.
573 }
574
575 /// Set a given VPBlockBase \p Successor as the single successor of this
576 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
577 /// This VPBlockBase must have no successors.
579 assert(Successors.empty() && "Setting one successor when others exist.");
580 assert(Successor->getParent() == getParent() &&
581 "connected blocks must have the same parent");
582 appendSuccessor(Successor);
583 }
584
585 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
586 /// successors of this VPBlockBase. This VPBlockBase is not added as
587 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
588 /// successors.
589 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
590 assert(Successors.empty() && "Setting two successors when others exist.");
591 appendSuccessor(IfTrue);
592 appendSuccessor(IfFalse);
593 }
594
595 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
596 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
597 /// as successor of any VPBasicBlock in \p NewPreds.
599 assert(Predecessors.empty() && "Block predecessors already set.");
600 for (auto *Pred : NewPreds)
601 appendPredecessor(Pred);
602 }
603
604 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
605 /// This VPBlockBase must have no successors. This VPBlockBase is not added
606 /// as predecessor of any VPBasicBlock in \p NewSuccs.
608 assert(Successors.empty() && "Block successors already set.");
609 for (auto *Succ : NewSuccs)
610 appendSuccessor(Succ);
611 }
612
613 /// Remove all the predecessor of this block.
614 void clearPredecessors() { Predecessors.clear(); }
615
616 /// Remove all the successors of this block.
617 void clearSuccessors() { Successors.clear(); }
618
619 /// Swap successors of the block. The block must have exactly 2 successors.
620 // TODO: This should be part of introducing conditional branch recipes rather
621 // than being independent.
623 assert(Successors.size() == 2 && "must have 2 successors to swap");
624 std::swap(Successors[0], Successors[1]);
625 }
626
627 /// The method which generates the output IR that correspond to this
628 /// VPBlockBase, thereby "executing" the VPlan.
629 virtual void execute(VPTransformState *State) = 0;
630
631 /// Return the cost of the block.
633
634 /// Return true if it is legal to hoist instructions into this block.
636 // There are currently no constraints that prevent an instruction to be
637 // hoisted into a VPBlockBase.
638 return true;
639 }
640
641#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
642 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
643 OS << getName();
644 }
645
646 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
647 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
648 /// consequtive numbers.
649 ///
650 /// Note that the numbering is applied to the whole VPlan, so printing
651 /// individual blocks is consistent with the whole VPlan printing.
652 virtual void print(raw_ostream &O, const Twine &Indent,
653 VPSlotTracker &SlotTracker) const = 0;
654
655 /// Print plain-text dump of this VPlan to \p O.
656 void print(raw_ostream &O) const {
658 print(O, "", SlotTracker);
659 }
660
661 /// Print the successors of this block to \p O, prefixing all lines with \p
662 /// Indent.
663 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
664
665 /// Dump this VPBlockBase to dbgs().
666 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
667#endif
668
669 /// Clone the current block and it's recipes without updating the operands of
670 /// the cloned recipes, including all blocks in the single-entry single-exit
671 /// region for VPRegionBlocks.
672 virtual VPBlockBase *clone() = 0;
673};
674
675/// Struct to hold various analysis needed for cost computations.
684
688 : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
689 CM(CM), CostKind(CostKind) {}
690
691 /// Return the cost for \p UI with \p VF using the legacy cost model as
692 /// fallback until computing the cost of all recipes migrates to VPlan.
694
695 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
696 /// has already been pre-computed.
697 bool skipCostComputation(Instruction *UI, bool IsVector) const;
698
699 /// Returns the OperandInfo for \p V, if it is a live-in.
701};
702
703/// VPRecipeBase is a base class modeling a sequence of one or more output IR
704/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
705/// and is responsible for deleting its defined values. Single-value
706/// recipes must inherit from VPSingleDef instead of inheriting from both
707/// VPRecipeBase and VPValue separately.
708class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
709 public VPDef,
710 public VPUser {
711 friend VPBasicBlock;
712 friend class VPBlockUtils;
713
714 /// Each VPRecipe belongs to a single VPBasicBlock.
715 VPBasicBlock *Parent = nullptr;
716
717 /// The debug location for the recipe.
718 DebugLoc DL;
719
720public:
722 DebugLoc DL = {})
723 : VPDef(SC), VPUser(Operands), DL(DL) {}
724
725 template <typename IterT>
727 DebugLoc DL = {})
728 : VPDef(SC), VPUser(Operands), DL(DL) {}
729 virtual ~VPRecipeBase() = default;
730
731 /// Clone the current recipe.
732 virtual VPRecipeBase *clone() = 0;
733
734 /// \return the VPBasicBlock which this VPRecipe belongs to.
735 VPBasicBlock *getParent() { return Parent; }
736 const VPBasicBlock *getParent() const { return Parent; }
737
738 /// The method which generates the output IR instructions that correspond to
739 /// this VPRecipe, thereby "executing" the VPlan.
740 virtual void execute(VPTransformState &State) = 0;
741
742 /// Return the cost of this recipe, taking into account if the cost
743 /// computation should be skipped and the ForceTargetInstructionCost flag.
744 /// Also takes care of printing the cost for debugging.
746
747 /// Insert an unlinked recipe into a basic block immediately before
748 /// the specified recipe.
749 void insertBefore(VPRecipeBase *InsertPos);
750 /// Insert an unlinked recipe into \p BB immediately before the insertion
751 /// point \p IP;
753
754 /// Insert an unlinked Recipe into a basic block immediately after
755 /// the specified Recipe.
756 void insertAfter(VPRecipeBase *InsertPos);
757
758 /// Unlink this recipe from its current VPBasicBlock and insert it into
759 /// the VPBasicBlock that MovePos lives in, right after MovePos.
760 void moveAfter(VPRecipeBase *MovePos);
761
762 /// Unlink this recipe and insert into BB before I.
763 ///
764 /// \pre I is a valid iterator into BB.
766
767 /// This method unlinks 'this' from the containing basic block, but does not
768 /// delete it.
769 void removeFromParent();
770
771 /// This method unlinks 'this' from the containing basic block and deletes it.
772 ///
773 /// \returns an iterator pointing to the element after the erased one
775
776 /// Method to support type inquiry through isa, cast, and dyn_cast.
777 static inline bool classof(const VPDef *D) {
778 // All VPDefs are also VPRecipeBases.
779 return true;
780 }
781
782 static inline bool classof(const VPUser *U) { return true; }
783
784 /// Returns true if the recipe may have side-effects.
785 bool mayHaveSideEffects() const;
786
787 /// Returns true for PHI-like recipes.
788 bool isPhi() const {
789 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
790 }
791
792 /// Returns true if the recipe may read from memory.
793 bool mayReadFromMemory() const;
794
795 /// Returns true if the recipe may write to memory.
796 bool mayWriteToMemory() const;
797
798 /// Returns true if the recipe may read from or write to memory.
799 bool mayReadOrWriteMemory() const {
801 }
802
803 /// Returns the debug location of the recipe.
804 DebugLoc getDebugLoc() const { return DL; }
805
806protected:
807 /// Compute the cost of this recipe either using a recipe's specialized
808 /// implementation or using the legacy cost model and the underlying
809 /// instructions.
811 VPCostContext &Ctx) const;
812};
813
814// Helper macro to define common classof implementations for recipes.
815#define VP_CLASSOF_IMPL(VPDefID) \
816 static inline bool classof(const VPDef *D) { \
817 return D->getVPDefID() == VPDefID; \
818 } \
819 static inline bool classof(const VPValue *V) { \
820 auto *R = V->getDefiningRecipe(); \
821 return R && R->getVPDefID() == VPDefID; \
822 } \
823 static inline bool classof(const VPUser *U) { \
824 auto *R = dyn_cast<VPRecipeBase>(U); \
825 return R && R->getVPDefID() == VPDefID; \
826 } \
827 static inline bool classof(const VPRecipeBase *R) { \
828 return R->getVPDefID() == VPDefID; \
829 } \
830 static inline bool classof(const VPSingleDefRecipe *R) { \
831 return R->getVPDefID() == VPDefID; \
832 }
833
834/// VPSingleDef is a base class for recipes for modeling a sequence of one or
835/// more output IR that define a single result VPValue.
836/// Note that VPRecipeBase must be inherited from before VPValue.
837class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
838public:
839 template <typename IterT>
840 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
841 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
842
843 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
844 DebugLoc DL = {})
845 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
846
847 template <typename IterT>
848 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
849 DebugLoc DL = {})
850 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
851
852 static inline bool classof(const VPRecipeBase *R) {
853 switch (R->getVPDefID()) {
854 case VPRecipeBase::VPDerivedIVSC:
855 case VPRecipeBase::VPEVLBasedIVPHISC:
856 case VPRecipeBase::VPExpandSCEVSC:
857 case VPRecipeBase::VPInstructionSC:
858 case VPRecipeBase::VPReductionEVLSC:
859 case VPRecipeBase::VPReductionSC:
860 case VPRecipeBase::VPReplicateSC:
861 case VPRecipeBase::VPScalarIVStepsSC:
862 case VPRecipeBase::VPVectorPointerSC:
863 case VPRecipeBase::VPReverseVectorPointerSC:
864 case VPRecipeBase::VPWidenCallSC:
865 case VPRecipeBase::VPWidenCanonicalIVSC:
866 case VPRecipeBase::VPWidenCastSC:
867 case VPRecipeBase::VPWidenGEPSC:
868 case VPRecipeBase::VPWidenIntrinsicSC:
869 case VPRecipeBase::VPWidenSC:
870 case VPRecipeBase::VPWidenEVLSC:
871 case VPRecipeBase::VPWidenSelectSC:
872 case VPRecipeBase::VPBlendSC:
873 case VPRecipeBase::VPPredInstPHISC:
874 case VPRecipeBase::VPCanonicalIVPHISC:
875 case VPRecipeBase::VPActiveLaneMaskPHISC:
876 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
877 case VPRecipeBase::VPWidenPHISC:
878 case VPRecipeBase::VPWidenIntOrFpInductionSC:
879 case VPRecipeBase::VPWidenPointerInductionSC:
880 case VPRecipeBase::VPReductionPHISC:
881 case VPRecipeBase::VPScalarCastSC:
882 case VPRecipeBase::VPPartialReductionSC:
883 return true;
884 case VPRecipeBase::VPBranchOnMaskSC:
885 case VPRecipeBase::VPInterleaveSC:
886 case VPRecipeBase::VPIRInstructionSC:
887 case VPRecipeBase::VPWidenLoadEVLSC:
888 case VPRecipeBase::VPWidenLoadSC:
889 case VPRecipeBase::VPWidenStoreEVLSC:
890 case VPRecipeBase::VPWidenStoreSC:
891 case VPRecipeBase::VPHistogramSC:
892 // TODO: Widened stores don't define a value, but widened loads do. Split
893 // the recipes to be able to make widened loads VPSingleDefRecipes.
894 return false;
895 }
896 llvm_unreachable("Unhandled VPDefID");
897 }
898
899 static inline bool classof(const VPUser *U) {
900 auto *R = dyn_cast<VPRecipeBase>(U);
901 return R && classof(R);
902 }
903
904 virtual VPSingleDefRecipe *clone() override = 0;
905
906 /// Returns the underlying instruction.
908 return cast<Instruction>(getUnderlyingValue());
909 }
911 return cast<Instruction>(getUnderlyingValue());
912 }
913
914#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
915 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
916 LLVM_DUMP_METHOD void dump() const;
917#endif
918};
919
920/// Class to record LLVM IR flag for a recipe along with it.
922 enum class OperationType : unsigned char {
923 Cmp,
924 OverflowingBinOp,
925 DisjointOp,
926 PossiblyExactOp,
927 GEPOp,
928 FPMathOp,
929 NonNegOp,
930 Other
931 };
932
933public:
934 struct WrapFlagsTy {
935 char HasNUW : 1;
936 char HasNSW : 1;
937
939 };
940
942 char IsDisjoint : 1;
944 };
945
946private:
947 struct ExactFlagsTy {
948 char IsExact : 1;
949 };
950 struct NonNegFlagsTy {
951 char NonNeg : 1;
952 };
953 struct FastMathFlagsTy {
954 char AllowReassoc : 1;
955 char NoNaNs : 1;
956 char NoInfs : 1;
957 char NoSignedZeros : 1;
958 char AllowReciprocal : 1;
959 char AllowContract : 1;
960 char ApproxFunc : 1;
961
962 FastMathFlagsTy(const FastMathFlags &FMF);
963 };
964
965 OperationType OpType;
966
967 union {
971 ExactFlagsTy ExactFlags;
973 NonNegFlagsTy NonNegFlags;
974 FastMathFlagsTy FMFs;
975 unsigned AllFlags;
976 };
977
978protected:
980 OpType = Other.OpType;
981 AllFlags = Other.AllFlags;
982 }
983
984public:
985 template <typename IterT>
986 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
987 : VPSingleDefRecipe(SC, Operands, DL) {
988 OpType = OperationType::Other;
989 AllFlags = 0;
990 }
991
992 template <typename IterT>
993 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
995 if (auto *Op = dyn_cast<CmpInst>(&I)) {
996 OpType = OperationType::Cmp;
997 CmpPredicate = Op->getPredicate();
998 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
999 OpType = OperationType::DisjointOp;
1000 DisjointFlags.IsDisjoint = Op->isDisjoint();
1001 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1002 OpType = OperationType::OverflowingBinOp;
1003 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1004 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1005 OpType = OperationType::PossiblyExactOp;
1006 ExactFlags.IsExact = Op->isExact();
1007 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1008 OpType = OperationType::GEPOp;
1009 GEPFlags = GEP->getNoWrapFlags();
1010 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1011 OpType = OperationType::NonNegOp;
1012 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1013 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1014 OpType = OperationType::FPMathOp;
1015 FMFs = Op->getFastMathFlags();
1016 } else {
1017 OpType = OperationType::Other;
1018 AllFlags = 0;
1019 }
1020 }
1021
1022 template <typename IterT>
1023 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1024 CmpInst::Predicate Pred, DebugLoc DL = {})
1025 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1026 CmpPredicate(Pred) {}
1027
1028 template <typename IterT>
1029 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1031 : VPSingleDefRecipe(SC, Operands, DL),
1032 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1033
1034 template <typename IterT>
1035 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1036 FastMathFlags FMFs, DebugLoc DL = {})
1037 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1038 FMFs(FMFs) {}
1039
1040 template <typename IterT>
1041 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1043 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1045
1046protected:
1047 template <typename IterT>
1048 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1050 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1051 GEPFlags(GEPFlags) {}
1052
1053public:
1054 static inline bool classof(const VPRecipeBase *R) {
1055 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1056 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1057 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
1058 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1059 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1060 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1061 R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
1062 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1063 }
1064
1065 static inline bool classof(const VPUser *U) {
1066 auto *R = dyn_cast<VPRecipeBase>(U);
1067 return R && classof(R);
1068 }
1069
1070 /// Drop all poison-generating flags.
1072 // NOTE: This needs to be kept in-sync with
1073 // Instruction::dropPoisonGeneratingFlags.
1074 switch (OpType) {
1075 case OperationType::OverflowingBinOp:
1076 WrapFlags.HasNUW = false;
1077 WrapFlags.HasNSW = false;
1078 break;
1079 case OperationType::DisjointOp:
1080 DisjointFlags.IsDisjoint = false;
1081 break;
1082 case OperationType::PossiblyExactOp:
1083 ExactFlags.IsExact = false;
1084 break;
1085 case OperationType::GEPOp:
1087 break;
1088 case OperationType::FPMathOp:
1089 FMFs.NoNaNs = false;
1090 FMFs.NoInfs = false;
1091 break;
1092 case OperationType::NonNegOp:
1093 NonNegFlags.NonNeg = false;
1094 break;
1095 case OperationType::Cmp:
1096 case OperationType::Other:
1097 break;
1098 }
1099 }
1100
1101 /// Set the IR flags for \p I.
1102 void setFlags(Instruction *I) const {
1103 switch (OpType) {
1104 case OperationType::OverflowingBinOp:
1105 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1106 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1107 break;
1108 case OperationType::DisjointOp:
1109 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1110 break;
1111 case OperationType::PossiblyExactOp:
1112 I->setIsExact(ExactFlags.IsExact);
1113 break;
1114 case OperationType::GEPOp:
1115 cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
1116 break;
1117 case OperationType::FPMathOp:
1118 I->setHasAllowReassoc(FMFs.AllowReassoc);
1119 I->setHasNoNaNs(FMFs.NoNaNs);
1120 I->setHasNoInfs(FMFs.NoInfs);
1121 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1122 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1123 I->setHasAllowContract(FMFs.AllowContract);
1124 I->setHasApproxFunc(FMFs.ApproxFunc);
1125 break;
1126 case OperationType::NonNegOp:
1127 I->setNonNeg(NonNegFlags.NonNeg);
1128 break;
1129 case OperationType::Cmp:
1130 case OperationType::Other:
1131 break;
1132 }
1133 }
1134
1136 assert(OpType == OperationType::Cmp &&
1137 "recipe doesn't have a compare predicate");
1138 return CmpPredicate;
1139 }
1140
1142
1143 /// Returns true if the recipe has fast-math flags.
1144 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1145
1147
1148 bool hasNoUnsignedWrap() const {
1149 assert(OpType == OperationType::OverflowingBinOp &&
1150 "recipe doesn't have a NUW flag");
1151 return WrapFlags.HasNUW;
1152 }
1153
1154 bool hasNoSignedWrap() const {
1155 assert(OpType == OperationType::OverflowingBinOp &&
1156 "recipe doesn't have a NSW flag");
1157 return WrapFlags.HasNSW;
1158 }
1159
1160 bool isDisjoint() const {
1161 assert(OpType == OperationType::DisjointOp &&
1162 "recipe cannot have a disjoing flag");
1164 }
1165
1166#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1167 void printFlags(raw_ostream &O) const;
1168#endif
1169};
1170
1171/// Helper to access the operand that contains the unroll part for this recipe
1172/// after unrolling.
1173template <unsigned PartOpIdx> class VPUnrollPartAccessor {
1174protected:
1175 /// Return the VPValue operand containing the unroll part or null if there is
1176 /// no such operand.
1178
1179 /// Return the unroll part.
1180 unsigned getUnrollPart(VPUser &U) const;
1181};
1182
1183/// This is a concrete Recipe that models a single VPlan-level instruction.
1184/// While as any Recipe it may generate a sequence of IR instructions when
1185/// executed, these instructions would always form a single-def expression as
1186/// the VPInstruction is also a single def-use vertex.
1188 public VPUnrollPartAccessor<1> {
1189 friend class VPlanSlp;
1190
1191public:
1192 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1193 enum {
1195 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1196 // values of a first-order recurrence.
1202 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1203 /// The first operand is the incoming value from the predecessor in VPlan,
1204 /// the second operand is the incoming value for all other predecessors
1205 /// (which are currently not modeled in VPlan).
1208 // Increment the canonical IV separately for each unrolled part.
1213 // Takes the VPValue to extract from as first operand and the lane or part
1214 // to extract as second operand, counting from the end starting with 1 for
1215 // last. The second operand must be a positive constant and <= VF.
1217 LogicalAnd, // Non-poison propagating logical And.
1218 // Add an offset in bytes (second operand) to a base pointer (first
1219 // operand). Only generates scalar values (either for the first lane only or
1220 // for all lanes, depending on its uses).
1222 // Returns a scalar boolean value, which is true if any lane of its (only
1223 // boolean) vector operand is true.
1225 };
1226
1227private:
1228 typedef unsigned char OpcodeTy;
1229 OpcodeTy Opcode;
1230
1231 /// An optional name that can be used for the generated IR instruction.
1232 const std::string Name;
1233
1234 /// Returns true if this VPInstruction generates scalar values for all lanes.
1235 /// Most VPInstructions generate a single value per part, either vector or
1236 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1237 /// values per all lanes, stemming from an original ingredient. This method
1238 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1239 /// underlying ingredient.
1240 bool doesGeneratePerAllLanes() const;
1241
1242 /// Returns true if we can generate a scalar for the first lane only if
1243 /// needed.
1244 bool canGenerateScalarForFirstLane() const;
1245
1246 /// Utility methods serving execute(): generates a single vector instance of
1247 /// the modeled instruction. \returns the generated value. . In some cases an
1248 /// existing value is returned rather than a generated one.
1249 Value *generate(VPTransformState &State);
1250
1251 /// Utility methods serving execute(): generates a scalar single instance of
1252 /// the modeled instruction for a given lane. \returns the scalar generated
1253 /// value for lane \p Lane.
1254 Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
1255
1256#if !defined(NDEBUG)
1257 /// Return true if the VPInstruction is a floating point math operation, i.e.
1258 /// has fast-math flags.
1259 bool isFPMathOp() const;
1260#endif
1261
1262public:
1264 const Twine &Name = "")
1265 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1266 Opcode(Opcode), Name(Name.str()) {}
1267
1268 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1269 DebugLoc DL = {}, const Twine &Name = "")
1271
1272 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1273 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1274
1275 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1276 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1277 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1278 Opcode(Opcode), Name(Name.str()) {}
1279
1280 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1281 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1282 const Twine &Name = "")
1283 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1284 Opcode(Opcode), Name(Name.str()) {
1285 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1286 }
1287
1289 DebugLoc DL = {}, const Twine &Name = "")
1290 : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
1291 ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
1292 Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
1293
1294 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1295 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1296
1297 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1298
1299 VPInstruction *clone() override {
1301 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1302 New->transferFlags(*this);
1303 return New;
1304 }
1305
1306 unsigned getOpcode() const { return Opcode; }
1307
1308 /// Generate the instruction.
1309 /// TODO: We currently execute only per-part unless a specific instance is
1310 /// provided.
1311 void execute(VPTransformState &State) override;
1312
1313 /// Return the cost of this VPInstruction.
1315 VPCostContext &Ctx) const override {
1316 // TODO: Compute accurate cost after retiring the legacy cost model.
1317 return 0;
1318 }
1319
1320#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1321 /// Print the VPInstruction to \p O.
1322 void print(raw_ostream &O, const Twine &Indent,
1323 VPSlotTracker &SlotTracker) const override;
1324
1325 /// Print the VPInstruction to dbgs() (for debugging).
1326 LLVM_DUMP_METHOD void dump() const;
1327#endif
1328
1329 bool hasResult() const {
1330 // CallInst may or may not have a result, depending on the called function.
1331 // Conservatively return calls have results for now.
1332 switch (getOpcode()) {
1333 case Instruction::Ret:
1334 case Instruction::Br:
1335 case Instruction::Store:
1336 case Instruction::Switch:
1337 case Instruction::IndirectBr:
1338 case Instruction::Resume:
1339 case Instruction::CatchRet:
1340 case Instruction::Unreachable:
1341 case Instruction::Fence:
1342 case Instruction::AtomicRMW:
1345 return false;
1346 default:
1347 return true;
1348 }
1349 }
1350
1351 /// Returns true if the underlying opcode may read from or write to memory.
1352 bool opcodeMayReadOrWriteFromMemory() const;
1353
1354 /// Returns true if the recipe only uses the first lane of operand \p Op.
1355 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1356
1357 /// Returns true if the recipe only uses the first part of operand \p Op.
1358 bool onlyFirstPartUsed(const VPValue *Op) const override;
1359
1360 /// Returns true if this VPInstruction produces a scalar value from a vector,
1361 /// e.g. by performing a reduction or extracting a lane.
1362 bool isVectorToScalar() const;
1363
1364 /// Returns true if this VPInstruction's operands are single scalars and the
1365 /// result is also a single scalar.
1366 bool isSingleScalar() const;
1367
1368 /// Returns the symbolic name assigned to the VPInstruction.
1369 StringRef getName() const { return Name; }
1370};
1371
1372/// A recipe to wrap on original IR instruction not to be modified during
1373/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
1374/// and it is used to add a new incoming value for the single predecessor VPBB.
1375/// Expect PHIs, VPIRInstructions cannot have any operands.
1377 Instruction &I;
1378
1379public:
1381 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1382
1383 ~VPIRInstruction() override = default;
1384
1385 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1386
1388 auto *R = new VPIRInstruction(I);
1389 for (auto *Op : operands())
1390 R->addOperand(Op);
1391 return R;
1392 }
1393
1394 void execute(VPTransformState &State) override;
1395
1396 /// Return the cost of this VPIRInstruction.
1398 VPCostContext &Ctx) const override;
1399
1400 Instruction &getInstruction() const { return I; }
1401
1402#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1403 /// Print the recipe.
1404 void print(raw_ostream &O, const Twine &Indent,
1405 VPSlotTracker &SlotTracker) const override;
1406#endif
1407
1408 bool usesScalars(const VPValue *Op) const override {
1410 "Op must be an operand of the recipe");
1411 return true;
1412 }
1413
1414 bool onlyFirstPartUsed(const VPValue *Op) const override {
1416 "Op must be an operand of the recipe");
1417 return true;
1418 }
1419
1420 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1422 "Op must be an operand of the recipe");
1423 return true;
1424 }
1425};
1426
1427/// VPWidenRecipe is a recipe for producing a widened instruction using the
1428/// opcode and operands of the recipe. This recipe covers most of the
1429/// traditional vectorization cases where each recipe transforms into a
1430/// vectorized version of itself.
1432 unsigned Opcode;
1433
1434protected:
1435 template <typename IterT>
1436 VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1438 : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1439
1440public:
1441 template <typename IterT>
1443 : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1444
1445 ~VPWidenRecipe() override = default;
1446
1447 VPWidenRecipe *clone() override {
1448 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1449 R->transferFlags(*this);
1450 return R;
1451 }
1452
1453 static inline bool classof(const VPRecipeBase *R) {
1454 return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1455 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1456 }
1457
1458 static inline bool classof(const VPUser *U) {
1459 auto *R = dyn_cast<VPRecipeBase>(U);
1460 return R && classof(R);
1461 }
1462
1463 /// Produce a widened instruction using the opcode and operands of the recipe,
1464 /// processing State.VF elements.
1465 void execute(VPTransformState &State) override;
1466
1467 /// Return the cost of this VPWidenRecipe.
1469 VPCostContext &Ctx) const override;
1470
1471 unsigned getOpcode() const { return Opcode; }
1472
1473#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1474 /// Print the recipe.
1475 void print(raw_ostream &O, const Twine &Indent,
1476 VPSlotTracker &SlotTracker) const override;
1477#endif
1478};
1479
1480/// A recipe for widening operations with vector-predication intrinsics with
1481/// explicit vector length (EVL).
1484
1485public:
1486 template <typename IterT>
1488 : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1489 addOperand(&EVL);
1490 }
1492 : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1493 transferFlags(W);
1494 }
1495
1496 ~VPWidenEVLRecipe() override = default;
1497
1498 VPWidenRecipe *clone() override final {
1499 llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1500 return nullptr;
1501 }
1502
1503 VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1504
1506 const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1507
1508 /// Produce a vp-intrinsic using the opcode and operands of the recipe,
1509 /// processing EVL elements.
1510 void execute(VPTransformState &State) override final;
1511
1512 /// Returns true if the recipe only uses the first lane of operand \p Op.
1513 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1515 "Op must be an operand of the recipe");
1516 // EVL in that recipe is always the last operand, thus any use before means
1517 // the VPValue should be vectorized.
1518 return getEVL() == Op;
1519 }
1520
1521#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1522 /// Print the recipe.
1523 void print(raw_ostream &O, const Twine &Indent,
1524 VPSlotTracker &SlotTracker) const override final;
1525#endif
1526};
1527
1528/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1530 /// Cast instruction opcode.
1531 Instruction::CastOps Opcode;
1532
1533 /// Result type for the cast.
1534 Type *ResultTy;
1535
1536public:
1538 CastInst &UI)
1539 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1540 ResultTy(ResultTy) {
1541 assert(UI.getOpcode() == Opcode &&
1542 "opcode of underlying cast doesn't match");
1543 }
1544
1546 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1547 ResultTy(ResultTy) {}
1548
1549 ~VPWidenCastRecipe() override = default;
1550
1552 if (auto *UV = getUnderlyingValue())
1553 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1554 *cast<CastInst>(UV));
1555
1556 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1557 }
1558
1559 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1560
1561 /// Produce widened copies of the cast.
1562 void execute(VPTransformState &State) override;
1563
1564 /// Return the cost of this VPWidenCastRecipe.
1566 VPCostContext &Ctx) const override;
1567
1568#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1569 /// Print the recipe.
1570 void print(raw_ostream &O, const Twine &Indent,
1571 VPSlotTracker &SlotTracker) const override;
1572#endif
1573
1574 Instruction::CastOps getOpcode() const { return Opcode; }
1575
1576 /// Returns the result type of the cast.
1577 Type *getResultType() const { return ResultTy; }
1578};
1579
1580/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1582 Instruction::CastOps Opcode;
1583
1584 Type *ResultTy;
1585
1586 Value *generate(VPTransformState &State);
1587
1588public:
1590 DebugLoc DL)
1591 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode),
1592 ResultTy(ResultTy) {}
1593
1594 ~VPScalarCastRecipe() override = default;
1595
1597 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy,
1598 getDebugLoc());
1599 }
1600
1601 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1602
1603 void execute(VPTransformState &State) override;
1604
1605 /// Return the cost of this VPScalarCastRecipe.
1607 VPCostContext &Ctx) const override {
1608 // TODO: Compute accurate cost after retiring the legacy cost model.
1609 return 0;
1610 }
1611
1612#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1613 void print(raw_ostream &O, const Twine &Indent,
1614 VPSlotTracker &SlotTracker) const override;
1615#endif
1616
1617 /// Returns the result type of the cast.
1618 Type *getResultType() const { return ResultTy; }
1619
1620 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1621 // At the moment, only uniform codegen is implemented.
1623 "Op must be an operand of the recipe");
1624 return true;
1625 }
1626};
1627
1628/// A recipe for widening vector intrinsics.
1630 /// ID of the vector intrinsic to widen.
1631 Intrinsic::ID VectorIntrinsicID;
1632
1633 /// Scalar return type of the intrinsic.
1634 Type *ResultTy;
1635
1636 /// True if the intrinsic may read from memory.
1637 bool MayReadFromMemory;
1638
1639 /// True if the intrinsic may read write to memory.
1640 bool MayWriteToMemory;
1641
1642 /// True if the intrinsic may have side-effects.
1643 bool MayHaveSideEffects;
1644
1645public:
1647 ArrayRef<VPValue *> CallArguments, Type *Ty,
1648 DebugLoc DL = {})
1649 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1650 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1651 MayReadFromMemory(CI.mayReadFromMemory()),
1652 MayWriteToMemory(CI.mayWriteToMemory()),
1653 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1654
1656 ArrayRef<VPValue *> CallArguments, Type *Ty,
1657 DebugLoc DL = {})
1658 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1659 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1660 LLVMContext &Ctx = Ty->getContext();
1661 AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
1662 MemoryEffects ME = Attrs.getMemoryEffects();
1663 MayReadFromMemory = ME.onlyWritesMemory();
1664 MayWriteToMemory = ME.onlyReadsMemory();
1665 MayHaveSideEffects = MayWriteToMemory ||
1666 !Attrs.hasFnAttr(Attribute::NoUnwind) ||
1667 !Attrs.hasFnAttr(Attribute::WillReturn);
1668 }
1669
1671 std::initializer_list<VPValue *> CallArguments,
1672 Type *Ty, DebugLoc DL = {})
1673 : VPWidenIntrinsicRecipe(VectorIntrinsicID,
1674 ArrayRef<VPValue *>(CallArguments), Ty, DL) {}
1675
1676 ~VPWidenIntrinsicRecipe() override = default;
1677
1679 return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
1680 VectorIntrinsicID, {op_begin(), op_end()},
1681 ResultTy, getDebugLoc());
1682 }
1683
1684 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1685
1686 /// Produce a widened version of the vector intrinsic.
1687 void execute(VPTransformState &State) override;
1688
1689 /// Return the cost of this vector intrinsic.
1691 VPCostContext &Ctx) const override;
1692
1693 /// Return the ID of the intrinsic.
1694 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1695
1696 /// Return the scalar return type of the intrinsic.
1697 Type *getResultType() const { return ResultTy; }
1698
1699 /// Return to name of the intrinsic as string.
1701
1702 /// Returns true if the intrinsic may read from memory.
1703 bool mayReadFromMemory() const { return MayReadFromMemory; }
1704
1705 /// Returns true if the intrinsic may write to memory.
1706 bool mayWriteToMemory() const { return MayWriteToMemory; }
1707
1708 /// Returns true if the intrinsic may have side-effects.
1709 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1710
1711#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1712 /// Print the recipe.
1713 void print(raw_ostream &O, const Twine &Indent,
1714 VPSlotTracker &SlotTracker) const override;
1715#endif
1716
1717 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1718};
1719
1720/// A recipe for widening Call instructions using library calls.
1722 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1723 /// between a given VF and the chosen vectorized variant, so there will be a
1724 /// different VPlan for each VF with a valid variant.
1725 Function *Variant;
1726
1727public:
1729 ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
1730 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1731 *cast<Instruction>(UV)),
1732 Variant(Variant) {
1733 assert(
1734 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1735 "last operand must be the called function");
1736 }
1737
1738 ~VPWidenCallRecipe() override = default;
1739
1741 return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
1742 {op_begin(), op_end()}, getDebugLoc());
1743 }
1744
1745 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1746
1747 /// Produce a widened version of the call instruction.
1748 void execute(VPTransformState &State) override;
1749
1750 /// Return the cost of this VPWidenCallRecipe.
1752 VPCostContext &Ctx) const override;
1753
1755 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1756 }
1757
1759 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1760 }
1762 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1763 }
1764
1765#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1766 /// Print the recipe.
1767 void print(raw_ostream &O, const Twine &Indent,
1768 VPSlotTracker &SlotTracker) const override;
1769#endif
1770};
1771
1772/// A recipe representing a sequence of load -> update -> store as part of
1773/// a histogram operation. This means there may be aliasing between vector
1774/// lanes, which is handled by the llvm.experimental.vector.histogram family
1775/// of intrinsics. The only update operations currently supported are
1776/// 'add' and 'sub' where the other term is loop-invariant.
1778 /// Opcode of the update operation, currently either add or sub.
1779 unsigned Opcode;
1780
1781public:
1782 template <typename IterT>
1783 VPHistogramRecipe(unsigned Opcode, iterator_range<IterT> Operands,
1784 DebugLoc DL = {})
1785 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1786
1787 ~VPHistogramRecipe() override = default;
1788
1790 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1791 }
1792
1793 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1794
1795 /// Produce a vectorized histogram operation.
1796 void execute(VPTransformState &State) override;
1797
1798 /// Return the cost of this VPHistogramRecipe.
1800 VPCostContext &Ctx) const override;
1801
1802 unsigned getOpcode() const { return Opcode; }
1803
1804 /// Return the mask operand if one was provided, or a null pointer if all
1805 /// lanes should be executed unconditionally.
1806 VPValue *getMask() const {
1807 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1808 }
1809
1810#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1811 /// Print the recipe
1812 void print(raw_ostream &O, const Twine &Indent,
1813 VPSlotTracker &SlotTracker) const override;
1814#endif
1815};
1816
1817/// A recipe for widening select instructions.
1819 template <typename IterT>
1821 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I) {}
1822
1823 ~VPWidenSelectRecipe() override = default;
1824
1826 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1827 operands());
1828 }
1829
1830 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1831
1832 /// Produce a widened version of the select instruction.
1833 void execute(VPTransformState &State) override;
1834
1835 /// Return the cost of this VPWidenSelectRecipe.
1837 VPCostContext &Ctx) const override;
1838
1839#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1840 /// Print the recipe.
1841 void print(raw_ostream &O, const Twine &Indent,
1842 VPSlotTracker &SlotTracker) const override;
1843#endif
1844
1845 VPValue *getCond() const {
1846 return getOperand(0);
1847 }
1848
1849 bool isInvariantCond() const {
1851 }
1852};
1853
1854/// A recipe for handling GEP instructions.
1856 bool isPointerLoopInvariant() const {
1858 }
1859
1860 bool isIndexLoopInvariant(unsigned I) const {
1862 }
1863
1864 bool areAllOperandsInvariant() const {
1865 return all_of(operands(), [](VPValue *Op) {
1866 return Op->isDefinedOutsideLoopRegions();
1867 });
1868 }
1869
1870public:
1871 template <typename IterT>
1873 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1874
1875 ~VPWidenGEPRecipe() override = default;
1876
1878 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1879 operands());
1880 }
1881
1882 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1883
1884 /// Generate the gep nodes.
1885 void execute(VPTransformState &State) override;
1886
1887 /// Return the cost of this VPWidenGEPRecipe.
1889 VPCostContext &Ctx) const override {
1890 // TODO: Compute accurate cost after retiring the legacy cost model.
1891 return 0;
1892 }
1893
1894#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1895 /// Print the recipe.
1896 void print(raw_ostream &O, const Twine &Indent,
1897 VPSlotTracker &SlotTracker) const override;
1898#endif
1899};
1900
1901/// A recipe to compute the pointers for widened memory accesses of IndexTy
1902/// in reverse order.
1904 public VPUnrollPartAccessor<2> {
1905 Type *IndexedTy;
1906
1907public:
1910 : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1911 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1912 IndexedTy(IndexedTy) {}
1913
1914 VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1915
1917 const VPValue *getVFValue() const { return getOperand(1); }
1918
1919 void execute(VPTransformState &State) override;
1920
1921 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1923 "Op must be an operand of the recipe");
1924 return true;
1925 }
1926
1927 /// Return the cost of this VPVectorPointerRecipe.
1929 VPCostContext &Ctx) const override {
1930 // TODO: Compute accurate cost after retiring the legacy cost model.
1931 return 0;
1932 }
1933
1934 /// Returns true if the recipe only uses the first part of operand \p Op.
1935 bool onlyFirstPartUsed(const VPValue *Op) const override {
1937 "Op must be an operand of the recipe");
1938 assert(getNumOperands() <= 2 && "must have at most two operands");
1939 return true;
1940 }
1941
1944 IndexedTy, getGEPNoWrapFlags(),
1945 getDebugLoc());
1946 }
1947
1948#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1949 /// Print the recipe.
1950 void print(raw_ostream &O, const Twine &Indent,
1951 VPSlotTracker &SlotTracker) const override;
1952#endif
1953};
1954
1955/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1957 public VPUnrollPartAccessor<1> {
1958 Type *IndexedTy;
1959
1960public:
1962 DebugLoc DL)
1963 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1964 GEPFlags, DL),
1965 IndexedTy(IndexedTy) {}
1966
1967 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1968
1969 void execute(VPTransformState &State) override;
1970
1971 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1973 "Op must be an operand of the recipe");
1974 return true;
1975 }
1976
1977 /// Returns true if the recipe only uses the first part of operand \p Op.
1978 bool onlyFirstPartUsed(const VPValue *Op) const override {
1980 "Op must be an operand of the recipe");
1981 assert(getNumOperands() <= 2 && "must have at most two operands");
1982 return true;
1983 }
1984
1986 return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
1988 }
1989
1990 /// Return the cost of this VPHeaderPHIRecipe.
1992 VPCostContext &Ctx) const override {
1993 // TODO: Compute accurate cost after retiring the legacy cost model.
1994 return 0;
1995 }
1996
1997#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1998 /// Print the recipe.
1999 void print(raw_ostream &O, const Twine &Indent,
2000 VPSlotTracker &SlotTracker) const override;
2001#endif
2002};
2003
2004/// A pure virtual base class for all recipes modeling header phis, including
2005/// phis for first order recurrences, pointer inductions and reductions. The
2006/// start value is the first operand of the recipe and the incoming value from
2007/// the backedge is the second operand.
2008///
2009/// Inductions are modeled using the following sub-classes:
2010/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2011/// starting at a specified value (zero for the main vector loop, the resume
2012/// value for the epilogue vector loop) and stepping by 1. The induction
2013/// controls exiting of the vector loop by comparing against the vector trip
2014/// count. Produces a single scalar PHI for the induction value per
2015/// iteration.
2016/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2017/// floating point inductions with arbitrary start and step values. Produces
2018/// a vector PHI per-part.
2019/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2020/// value of an IV with different start and step values. Produces a single
2021/// scalar value per iteration
2022/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2023/// canonical or derived induction.
2024/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2025/// pointer induction. Produces either a vector PHI per-part or scalar values
2026/// per-lane based on the canonical induction.
2028protected:
2029 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2030 VPValue *Start = nullptr, DebugLoc DL = {})
2031 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
2032 if (Start)
2033 addOperand(Start);
2034 }
2035
2036public:
2037 ~VPHeaderPHIRecipe() override = default;
2038
2039 /// Method to support type inquiry through isa, cast, and dyn_cast.
2040 static inline bool classof(const VPRecipeBase *B) {
2041 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2042 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2043 }
2044 static inline bool classof(const VPValue *V) {
2045 auto *B = V->getDefiningRecipe();
2046 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2047 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2048 }
2049
2050 /// Generate the phi nodes.
2051 void execute(VPTransformState &State) override = 0;
2052
2053 /// Return the cost of this header phi recipe.
2055 VPCostContext &Ctx) const override;
2056
2057#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2058 /// Print the recipe.
2059 void print(raw_ostream &O, const Twine &Indent,
2060 VPSlotTracker &SlotTracker) const override = 0;
2061#endif
2062
2063 /// Returns the start value of the phi, if one is set.
2065 return getNumOperands() == 0 ? nullptr : getOperand(0);
2066 }
2068 return getNumOperands() == 0 ? nullptr : getOperand(0);
2069 }
2070
2071 /// Update the start value of the recipe.
2073
2074 /// Returns the incoming value from the loop backedge.
2076 return getOperand(1);
2077 }
2078
2079 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2080 /// to be a recipe.
2083 }
2084};
2085
2086/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2087/// VPWidenPointerInductionRecipe), providing shared functionality, including
2088/// retrieving the step value, induction descriptor and original phi node.
2090 const InductionDescriptor &IndDesc;
2091
2092public:
2093 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2094 VPValue *Step, const InductionDescriptor &IndDesc,
2095 DebugLoc DL)
2096 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2097 addOperand(Step);
2098 }
2099
2100 static inline bool classof(const VPRecipeBase *R) {
2101 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2102 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2103 }
2104
2105 static inline bool classof(const VPValue *V) {
2106 auto *R = V->getDefiningRecipe();
2107 return R && classof(R);
2108 }
2109
2110 static inline bool classof(const VPHeaderPHIRecipe *R) {
2111 return classof(static_cast<const VPRecipeBase *>(R));
2112 }
2113
2114 virtual void execute(VPTransformState &State) override = 0;
2115
2116 /// Returns the step value of the induction.
2118 const VPValue *getStepValue() const { return getOperand(1); }
2119
2120 PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); }
2121
2122 /// Returns the induction descriptor for the recipe.
2123 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2124
2126 // TODO: All operands of base recipe must exist and be at same index in
2127 // derived recipe.
2129 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2130 }
2131
2133 // TODO: All operands of base recipe must exist and be at same index in
2134 // derived recipe.
2136 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2137 }
2138};
2139
2140/// A recipe for handling phi nodes of integer and floating-point inductions,
2141/// producing their vector values.
2143 TruncInst *Trunc;
2144
2145public:
2147 VPValue *VF, const InductionDescriptor &IndDesc,
2148 DebugLoc DL)
2149 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2150 Step, IndDesc, DL),
2151 Trunc(nullptr) {
2152 addOperand(VF);
2153 }
2154
2156 VPValue *VF, const InductionDescriptor &IndDesc,
2157 TruncInst *Trunc, DebugLoc DL)
2158 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2159 Step, IndDesc, DL),
2160 Trunc(Trunc) {
2161 addOperand(VF);
2162 }
2163
2165
2170 }
2171
2172 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2173
2174 /// Generate the vectorized and scalarized versions of the phi node as
2175 /// needed by their users.
2176 void execute(VPTransformState &State) override;
2177
2178#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2179 /// Print the recipe.
2180 void print(raw_ostream &O, const Twine &Indent,
2181 VPSlotTracker &SlotTracker) const override;
2182#endif
2183
2185 const VPValue *getVFValue() const { return getOperand(2); }
2186
2188 // If the recipe has been unrolled (4 operands), return the VPValue for the
2189 // induction increment.
2190 return getNumOperands() == 5 ? getOperand(3) : nullptr;
2191 }
2192
2193 /// Returns the first defined value as TruncInst, if it is one or nullptr
2194 /// otherwise.
2195 TruncInst *getTruncInst() { return Trunc; }
2196 const TruncInst *getTruncInst() const { return Trunc; }
2197
2198 /// Returns true if the induction is canonical, i.e. starting at 0 and
2199 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2200 /// same type as the canonical induction.
2201 bool isCanonical() const;
2202
2203 /// Returns the scalar type of the induction.
2205 return Trunc ? Trunc->getType() : getPHINode()->getType();
2206 }
2207
2208 /// Returns the VPValue representing the value of this induction at
2209 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2210 /// take place.
2212 return getNumOperands() == 5 ? getOperand(4) : this;
2213 }
2214};
2215
2217 public VPUnrollPartAccessor<3> {
2218 bool IsScalarAfterVectorization;
2219
2220public:
2221 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2222 /// Start.
2224 const InductionDescriptor &IndDesc,
2225 bool IsScalarAfterVectorization, DebugLoc DL)
2226 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2227 Step, IndDesc, DL),
2228 IsScalarAfterVectorization(IsScalarAfterVectorization) {}
2229
2231
2234 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2235 getInductionDescriptor(), IsScalarAfterVectorization, getDebugLoc());
2236 }
2237
2238 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2239
2240 /// Generate vector values for the pointer induction.
2241 void execute(VPTransformState &State) override;
2242
2243 /// Returns true if only scalar values will be generated.
2244 bool onlyScalarsGenerated(bool IsScalable);
2245
2246 /// Returns the VPValue representing the value of this induction at
2247 /// the first unrolled part, if it exists. Returns itself if unrolling did not
2248 /// take place.
2250 return getUnrollPart(*this) == 0 ? this : getOperand(2);
2251 }
2252
2253#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2254 /// Print the recipe.
2255 void print(raw_ostream &O, const Twine &Indent,
2256 VPSlotTracker &SlotTracker) const override;
2257#endif
2258};
2259
2260/// Recipe to generate a scalar PHI. Used to generate code for recipes that
2261/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
2262/// VPEVLBasedIVPHIRecipe.
2264 std::string Name;
2265
2266public:
2267 VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
2268 StringRef Name)
2269 : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
2270 Name(Name.str()) {
2271 addOperand(BackedgeValue);
2272 }
2273
2274 ~VPScalarPHIRecipe() override = default;
2275
2277 llvm_unreachable("cloning not implemented yet");
2278 }
2279
2280 VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
2281
2282 /// Generate the phi/select nodes.
2283 void execute(VPTransformState &State) override;
2284
2285 /// Returns true if the recipe only uses the first lane of operand \p Op.
2286 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2288 "Op must be an operand of the recipe");
2289 return true;
2290 }
2291
2292#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2293 /// Print the recipe.
2294 void print(raw_ostream &O, const Twine &Indent,
2295 VPSlotTracker &SlotTracker) const override;
2296#endif
2297};
2298
2299/// A recipe for handling phis that are widened in the vector loop.
2300/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
2301/// managed in the recipe directly.
2303 /// List of incoming blocks. Only used in the VPlan native path.
2304 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
2305
2306public:
2307 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2308 /// debug location \p DL.
2309 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {})
2310 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL) {
2311 if (Start)
2312 addOperand(Start);
2313 }
2314
2316 llvm_unreachable("cloning not implemented yet");
2317 }
2318
2319 ~VPWidenPHIRecipe() override = default;
2320
2321 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2322
2323 /// Generate the phi/select nodes.
2324 void execute(VPTransformState &State) override;
2325
2326#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2327 /// Print the recipe.
2328 void print(raw_ostream &O, const Twine &Indent,
2329 VPSlotTracker &SlotTracker) const override;
2330#endif
2331
2332 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
2333 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
2334 addOperand(IncomingV);
2335 IncomingBlocks.push_back(IncomingBlock);
2336 }
2337
2338 /// Returns the \p I th incoming VPBasicBlock.
2339 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
2340
2341 /// Returns the \p I th incoming VPValue.
2342 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
2343};
2344
2345/// A recipe for handling first-order recurrence phis. The start value is the
2346/// first operand of the recipe and the incoming value from the backedge is the
2347/// second operand.
2350 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2351
2352 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2353
2355 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
2356 }
2357
2360 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
2361 }
2362
2363 void execute(VPTransformState &State) override;
2364
2365 /// Return the cost of this first-order recurrence phi recipe.
2367 VPCostContext &Ctx) const override;
2368
2369#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2370 /// Print the recipe.
2371 void print(raw_ostream &O, const Twine &Indent,
2372 VPSlotTracker &SlotTracker) const override;
2373#endif
2374};
2375
2376/// A recipe for handling reduction phis. The start value is the first operand
2377/// of the recipe and the incoming value from the backedge is the second
2378/// operand.
2380 public VPUnrollPartAccessor<2> {
2381 /// Descriptor for the reduction.
2382 const RecurrenceDescriptor &RdxDesc;
2383
2384 /// The phi is part of an in-loop reduction.
2385 bool IsInLoop;
2386
2387 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2388 bool IsOrdered;
2389
2390 /// When expanding the reduction PHI, the plan's VF element count is divided
2391 /// by this factor to form the reduction phi's VF.
2392 unsigned VFScaleFactor = 1;
2393
2394public:
2395 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2396 /// RdxDesc.
2398 VPValue &Start, bool IsInLoop = false,
2399 bool IsOrdered = false, unsigned VFScaleFactor = 1)
2400 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2401 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
2402 VFScaleFactor(VFScaleFactor) {
2403 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2404 }
2405
2406 ~VPReductionPHIRecipe() override = default;
2407
2409 auto *R = new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
2410 RdxDesc, *getOperand(0), IsInLoop,
2411 IsOrdered, VFScaleFactor);
2412 R->addOperand(getBackedgeValue());
2413 return R;
2414 }
2415
2416 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2417
2419 return R->getVPDefID() == VPDef::VPReductionPHISC;
2420 }
2421
2422 /// Generate the phi/select nodes.
2423 void execute(VPTransformState &State) override;
2424
2425#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2426 /// Print the recipe.
2427 void print(raw_ostream &O, const Twine &Indent,
2428 VPSlotTracker &SlotTracker) const override;
2429#endif
2430
2432 return RdxDesc;
2433 }
2434
2435 /// Returns true, if the phi is part of an ordered reduction.
2436 bool isOrdered() const { return IsOrdered; }
2437
2438 /// Returns true, if the phi is part of an in-loop reduction.
2439 bool isInLoop() const { return IsInLoop; }
2440};
2441
2442/// A recipe for forming partial reductions. In the loop, an accumulator and
2443/// vector operand are added together and passed to the next iteration as the
2444/// next accumulator. After the loop body, the accumulator is reduced to a
2445/// scalar value.
2447 unsigned Opcode;
2448
2449public:
2451 VPValue *Op1)
2452 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1,
2453 ReductionInst) {}
2454 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2455 Instruction *ReductionInst = nullptr)
2456 : VPSingleDefRecipe(VPDef::VPPartialReductionSC,
2457 ArrayRef<VPValue *>({Op0, Op1}), ReductionInst),
2458 Opcode(Opcode) {
2459 assert(isa<VPReductionPHIRecipe>(getOperand(1)->getDefiningRecipe()) &&
2460 "Unexpected operand order for partial reduction recipe");
2461 }
2462 ~VPPartialReductionRecipe() override = default;
2463
2465 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2467 }
2468
2469 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2470
2471 /// Generate the reduction in the loop.
2472 void execute(VPTransformState &State) override;
2473
2474 /// Return the cost of this VPPartialReductionRecipe.
2476 VPCostContext &Ctx) const override;
2477
2478 /// Get the binary op's opcode.
2479 unsigned getOpcode() const { return Opcode; }
2480
2481#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2482 /// Print the recipe.
2483 void print(raw_ostream &O, const Twine &Indent,
2484 VPSlotTracker &SlotTracker) const override;
2485#endif
2486};
2487
2488/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2489/// instructions.
2491public:
2492 /// The blend operation is a User of the incoming values and of their
2493 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2494 /// be omitted (implied by passing an odd number of operands) in which case
2495 /// all other incoming values are merged into it.
2497 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2498 assert(Operands.size() > 0 && "Expected at least one operand!");
2499 }
2500
2501 VPBlendRecipe *clone() override {
2503 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2504 }
2505
2506 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2507
2508 /// A normalized blend is one that has an odd number of operands, whereby the
2509 /// first operand does not have an associated mask.
2510 bool isNormalized() const { return getNumOperands() % 2; }
2511
2512 /// Return the number of incoming values, taking into account when normalized
2513 /// the first incoming value will have no mask.
2514 unsigned getNumIncomingValues() const {
2515 return (getNumOperands() + isNormalized()) / 2;
2516 }
2517
2518 /// Return incoming value number \p Idx.
2519 VPValue *getIncomingValue(unsigned Idx) const {
2520 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2521 }
2522
2523 /// Return mask number \p Idx.
2524 VPValue *getMask(unsigned Idx) const {
2525 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2526 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2527 }
2528
2529 /// Generate the phi/select nodes.
2530 void execute(VPTransformState &State) override;
2531
2532 /// Return the cost of this VPWidenMemoryRecipe.
2534 VPCostContext &Ctx) const override;
2535
2536#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2537 /// Print the recipe.
2538 void print(raw_ostream &O, const Twine &Indent,
2539 VPSlotTracker &SlotTracker) const override;
2540#endif
2541
2542 /// Returns true if the recipe only uses the first lane of operand \p Op.
2543 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2545 "Op must be an operand of the recipe");
2546 // Recursing through Blend recipes only, must terminate at header phi's the
2547 // latest.
2548 return all_of(users(),
2549 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2550 }
2551};
2552
2553/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2554/// or stores into one wide load/store and shuffles. The first operand of a
2555/// VPInterleave recipe is the address, followed by the stored values, followed
2556/// by an optional mask.
2559
2560 /// Indicates if the interleave group is in a conditional block and requires a
2561 /// mask.
2562 bool HasMask = false;
2563
2564 /// Indicates if gaps between members of the group need to be masked out or if
2565 /// unusued gaps can be loaded speculatively.
2566 bool NeedsMaskForGaps = false;
2567
2568public:
2570 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2571 bool NeedsMaskForGaps)
2572 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2573 NeedsMaskForGaps(NeedsMaskForGaps) {
2574 for (unsigned i = 0; i < IG->getFactor(); ++i)
2575 if (Instruction *I = IG->getMember(i)) {
2576 if (I->getType()->isVoidTy())
2577 continue;
2578 new VPValue(I, this);
2579 }
2580
2581 for (auto *SV : StoredValues)
2582 addOperand(SV);
2583 if (Mask) {
2584 HasMask = true;
2585 addOperand(Mask);
2586 }
2587 }
2588 ~VPInterleaveRecipe() override = default;
2589
2591 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2592 NeedsMaskForGaps);
2593 }
2594
2595 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2596
2597 /// Return the address accessed by this recipe.
2598 VPValue *getAddr() const {
2599 return getOperand(0); // Address is the 1st, mandatory operand.
2600 }
2601
2602 /// Return the mask used by this recipe. Note that a full mask is represented
2603 /// by a nullptr.
2604 VPValue *getMask() const {
2605 // Mask is optional and therefore the last, currently 2nd operand.
2606 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2607 }
2608
2609 /// Return the VPValues stored by this interleave group. If it is a load
2610 /// interleave group, return an empty ArrayRef.
2612 // The first operand is the address, followed by the stored values, followed
2613 // by an optional mask.
2616 }
2617
2618 /// Generate the wide load or store, and shuffles.
2619 void execute(VPTransformState &State) override;
2620
2621 /// Return the cost of this VPInterleaveRecipe.
2623 VPCostContext &Ctx) const override;
2624
2625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2626 /// Print the recipe.
2627 void print(raw_ostream &O, const Twine &Indent,
2628 VPSlotTracker &SlotTracker) const override;
2629#endif
2630
2632
2633 /// Returns the number of stored operands of this interleave group. Returns 0
2634 /// for load interleave groups.
2635 unsigned getNumStoreOperands() const {
2636 return getNumOperands() - (HasMask ? 2 : 1);
2637 }
2638
2639 /// The recipe only uses the first lane of the address.
2640 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2642 "Op must be an operand of the recipe");
2643 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2644 }
2645
2646 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2647};
2648
2649/// A recipe to represent inloop reduction operations, performing a reduction on
2650/// a vector operand into a scalar value, and adding the result to a chain.
2651/// The Operands are {ChainOp, VecOp, [Condition]}.
2653 /// The recurrence decriptor for the reduction in question.
2654 const RecurrenceDescriptor &RdxDesc;
2655 bool IsOrdered;
2656 /// Whether the reduction is conditional.
2657 bool IsConditional = false;
2658
2659protected:
2660 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2662 VPValue *CondOp, bool IsOrdered, DebugLoc DL)
2663 : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
2664 IsOrdered(IsOrdered) {
2665 if (CondOp) {
2666 IsConditional = true;
2667 addOperand(CondOp);
2668 }
2669 }
2670
2671public:
2673 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2674 bool IsOrdered, DebugLoc DL = {})
2675 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2676 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2677 IsOrdered, DL) {}
2678
2679 ~VPReductionRecipe() override = default;
2680
2682 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2683 getVecOp(), getCondOp(), IsOrdered,
2684 getDebugLoc());
2685 }
2686
2687 static inline bool classof(const VPRecipeBase *R) {
2688 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2689 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2690 }
2691
2692 static inline bool classof(const VPUser *U) {
2693 auto *R = dyn_cast<VPRecipeBase>(U);
2694 return R && classof(R);
2695 }
2696
2697 /// Generate the reduction in the loop.
2698 void execute(VPTransformState &State) override;
2699
2700 /// Return the cost of VPReductionRecipe.
2702 VPCostContext &Ctx) const override;
2703
2704#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2705 /// Print the recipe.
2706 void print(raw_ostream &O, const Twine &Indent,
2707 VPSlotTracker &SlotTracker) const override;
2708#endif
2709
2710 /// Return the recurrence decriptor for the in-loop reduction.
2712 return RdxDesc;
2713 }
2714 /// Return true if the in-loop reduction is ordered.
2715 bool isOrdered() const { return IsOrdered; };
2716 /// Return true if the in-loop reduction is conditional.
2717 bool isConditional() const { return IsConditional; };
2718 /// The VPValue of the scalar Chain being accumulated.
2719 VPValue *getChainOp() const { return getOperand(0); }
2720 /// The VPValue of the vector value to be reduced.
2721 VPValue *getVecOp() const { return getOperand(1); }
2722 /// The VPValue of the condition for the block.
2724 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2725 }
2726};
2727
2728/// A recipe to represent inloop reduction operations with vector-predication
2729/// intrinsics, performing a reduction on a vector operand with the explicit
2730/// vector length (EVL) into a scalar value, and adding the result to a chain.
2731/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2733public:
2736 VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
2738 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2739 R.isOrdered(), R.getDebugLoc()) {}
2740
2741 ~VPReductionEVLRecipe() override = default;
2742
2744 llvm_unreachable("cloning not implemented yet");
2745 }
2746
2747 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2748
2749 /// Generate the reduction in the loop
2750 void execute(VPTransformState &State) override;
2751
2752#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2753 /// Print the recipe.
2754 void print(raw_ostream &O, const Twine &Indent,
2755 VPSlotTracker &SlotTracker) const override;
2756#endif
2757
2758 /// The VPValue of the explicit vector length.
2759 VPValue *getEVL() const { return getOperand(2); }
2760
2761 /// Returns true if the recipe only uses the first lane of operand \p Op.
2762 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2764 "Op must be an operand of the recipe");
2765 return Op == getEVL();
2766 }
2767};
2768
2769/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2770/// copies of the original scalar type, one per lane, instead of producing a
2771/// single copy of widened type for all lanes. If the instruction is known to be
2772/// uniform only one copy, per lane zero, will be generated.
2774 /// Indicator if only a single replica per lane is needed.
2775 bool IsUniform;
2776
2777 /// Indicator if the replicas are also predicated.
2778 bool IsPredicated;
2779
2780public:
2781 template <typename IterT>
2783 bool IsUniform, VPValue *Mask = nullptr)
2784 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2785 IsUniform(IsUniform), IsPredicated(Mask) {
2786 if (Mask)
2787 addOperand(Mask);
2788 }
2789
2790 ~VPReplicateRecipe() override = default;
2791
2793 auto *Copy =
2794 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2795 isPredicated() ? getMask() : nullptr);
2796 Copy->transferFlags(*this);
2797 return Copy;
2798 }
2799
2800 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2801
2802 /// Generate replicas of the desired Ingredient. Replicas will be generated
2803 /// for all parts and lanes unless a specific part and lane are specified in
2804 /// the \p State.
2805 void execute(VPTransformState &State) override;
2806
2807 /// Return the cost of this VPReplicateRecipe.
2809 VPCostContext &Ctx) const override;
2810
2811#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2812 /// Print the recipe.
2813 void print(raw_ostream &O, const Twine &Indent,
2814 VPSlotTracker &SlotTracker) const override;
2815#endif
2816
2817 bool isUniform() const { return IsUniform; }
2818
2819 bool isPredicated() const { return IsPredicated; }
2820
2821 /// Returns true if the recipe only uses the first lane of operand \p Op.
2822 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2824 "Op must be an operand of the recipe");
2825 return isUniform();
2826 }
2827
2828 /// Returns true if the recipe uses scalars of operand \p Op.
2829 bool usesScalars(const VPValue *Op) const override {
2831 "Op must be an operand of the recipe");
2832 return true;
2833 }
2834
2835 /// Returns true if the recipe is used by a widened recipe via an intervening
2836 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2837 /// in a vector.
2838 bool shouldPack() const;
2839
2840 /// Return the mask of a predicated VPReplicateRecipe.
2842 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2843 return getOperand(getNumOperands() - 1);
2844 }
2845
2846 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2847};
2848
2849/// A recipe for generating conditional branches on the bits of a mask.
2851public:
2853 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2854 if (BlockInMask) // nullptr means all-one mask.
2855 addOperand(BlockInMask);
2856 }
2857
2859 return new VPBranchOnMaskRecipe(getOperand(0));
2860 }
2861
2862 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2863
2864 /// Generate the extraction of the appropriate bit from the block mask and the
2865 /// conditional branch.
2866 void execute(VPTransformState &State) override;
2867
2868 /// Return the cost of this VPBranchOnMaskRecipe.
2870 VPCostContext &Ctx) const override;
2871
2872#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2873 /// Print the recipe.
2874 void print(raw_ostream &O, const Twine &Indent,
2875 VPSlotTracker &SlotTracker) const override {
2876 O << Indent << "BRANCH-ON-MASK ";
2877 if (VPValue *Mask = getMask())
2878 Mask->printAsOperand(O, SlotTracker);
2879 else
2880 O << " All-One";
2881 }
2882#endif
2883
2884 /// Return the mask used by this recipe. Note that a full mask is represented
2885 /// by a nullptr.
2886 VPValue *getMask() const {
2887 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2888 // Mask is optional.
2889 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2890 }
2891
2892 /// Returns true if the recipe uses scalars of operand \p Op.
2893 bool usesScalars(const VPValue *Op) const override {
2895 "Op must be an operand of the recipe");
2896 return true;
2897 }
2898};
2899
2900/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2901/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2902/// order to merge values that are set under such a branch and feed their uses.
2903/// The phi nodes can be scalar or vector depending on the users of the value.
2904/// This recipe works in concert with VPBranchOnMaskRecipe.
2906public:
2907 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2908 /// nodes after merging back from a Branch-on-Mask.
2910 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
2911 ~VPPredInstPHIRecipe() override = default;
2912
2914 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
2915 }
2916
2917 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2918
2919 /// Generates phi nodes for live-outs (from a replicate region) as needed to
2920 /// retain SSA form.
2921 void execute(VPTransformState &State) override;
2922
2923 /// Return the cost of this VPPredInstPHIRecipe.
2925 VPCostContext &Ctx) const override {
2926 // TODO: Compute accurate cost after retiring the legacy cost model.
2927 return 0;
2928 }
2929
2930#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2931 /// Print the recipe.
2932 void print(raw_ostream &O, const Twine &Indent,
2933 VPSlotTracker &SlotTracker) const override;
2934#endif
2935
2936 /// Returns true if the recipe uses scalars of operand \p Op.
2937 bool usesScalars(const VPValue *Op) const override {
2939 "Op must be an operand of the recipe");
2940 return true;
2941 }
2942};
2943
2944/// A common base class for widening memory operations. An optional mask can be
2945/// provided as the last operand.
2947protected:
2949
2950 /// Whether the accessed addresses are consecutive.
2952
2953 /// Whether the consecutive accessed addresses are in reverse order.
2955
2956 /// Whether the memory access is masked.
2957 bool IsMasked = false;
2958
2959 void setMask(VPValue *Mask) {
2960 assert(!IsMasked && "cannot re-set mask");
2961 if (!Mask)
2962 return;
2963 addOperand(Mask);
2964 IsMasked = true;
2965 }
2966
2967 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2968 std::initializer_list<VPValue *> Operands,
2969 bool Consecutive, bool Reverse, DebugLoc DL)
2971 Reverse(Reverse) {
2972 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2973 }
2974
2975public:
2977 llvm_unreachable("cloning not supported");
2978 }
2979
2980 static inline bool classof(const VPRecipeBase *R) {
2981 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2982 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2983 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2984 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2985 }
2986
2987 static inline bool classof(const VPUser *U) {
2988 auto *R = dyn_cast<VPRecipeBase>(U);
2989 return R && classof(R);
2990 }
2991
2992 /// Return whether the loaded-from / stored-to addresses are consecutive.
2993 bool isConsecutive() const { return Consecutive; }
2994
2995 /// Return whether the consecutive loaded/stored addresses are in reverse
2996 /// order.
2997 bool isReverse() const { return Reverse; }
2998
2999 /// Return the address accessed by this recipe.
3000 VPValue *getAddr() const { return getOperand(0); }
3001
3002 /// Returns true if the recipe is masked.
3003 bool isMasked() const { return IsMasked; }
3004
3005 /// Return the mask used by this recipe. Note that a full mask is represented
3006 /// by a nullptr.
3007 VPValue *getMask() const {
3008 // Mask is optional and therefore the last operand.
3009 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3010 }
3011
3012 /// Generate the wide load/store.
3013 void execute(VPTransformState &State) override {
3014 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3015 }
3016
3017 /// Return the cost of this VPWidenMemoryRecipe.
3019 VPCostContext &Ctx) const override;
3020
3022};
3023
3024/// A recipe for widening load operations, using the address to load from and an
3025/// optional mask.
3026struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
3028 bool Consecutive, bool Reverse, DebugLoc DL)
3029 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3030 Reverse, DL),
3031 VPValue(this, &Load) {
3032 setMask(Mask);
3033 }
3034
3036 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
3038 getDebugLoc());
3039 }
3040
3041 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3042
3043 /// Generate a wide load or gather.
3044 void execute(VPTransformState &State) override;
3045
3046#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3047 /// Print the recipe.
3048 void print(raw_ostream &O, const Twine &Indent,
3049 VPSlotTracker &SlotTracker) const override;
3050#endif
3051
3052 /// Returns true if the recipe only uses the first lane of operand \p Op.
3053 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3055 "Op must be an operand of the recipe");
3056 // Widened, consecutive loads operations only demand the first lane of
3057 // their address.
3058 return Op == getAddr() && isConsecutive();
3059 }
3060};
3061
3062/// A recipe for widening load operations with vector-predication intrinsics,
3063/// using the address to load from, the explicit vector length and an optional
3064/// mask.
3065struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3067 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3068 {L.getAddr(), &EVL}, L.isConsecutive(),
3069 L.isReverse(), L.getDebugLoc()),
3070 VPValue(this, &getIngredient()) {
3071 setMask(Mask);
3072 }
3073
3074 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3075
3076 /// Return the EVL operand.
3077 VPValue *getEVL() const { return getOperand(1); }
3078
3079 /// Generate the wide load or gather.
3080 void execute(VPTransformState &State) override;
3081
3082 /// Return the cost of this VPWidenLoadEVLRecipe.
3084 VPCostContext &Ctx) const override;
3085
3086#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3087 /// Print the recipe.
3088 void print(raw_ostream &O, const Twine &Indent,
3089 VPSlotTracker &SlotTracker) const override;
3090#endif
3091
3092 /// Returns true if the recipe only uses the first lane of operand \p Op.
3093 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3095 "Op must be an operand of the recipe");
3096 // Widened loads only demand the first lane of EVL and consecutive loads
3097 // only demand the first lane of their address.
3098 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3099 }
3100};
3101
3102/// A recipe for widening store operations, using the stored value, the address
3103/// to store to and an optional mask.
3106 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
3107 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3109 setMask(Mask);
3110 }
3111
3113 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
3115 Reverse, getDebugLoc());
3116 }
3117
3118 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3119
3120 /// Return the value stored by this recipe.
3121 VPValue *getStoredValue() const { return getOperand(1); }
3122
3123 /// Generate a wide store or scatter.
3124 void execute(VPTransformState &State) override;
3125
3126#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3127 /// Print the recipe.
3128 void print(raw_ostream &O, const Twine &Indent,
3129 VPSlotTracker &SlotTracker) const override;
3130#endif
3131
3132 /// Returns true if the recipe only uses the first lane of operand \p Op.
3133 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3135 "Op must be an operand of the recipe");
3136 // Widened, consecutive stores only demand the first lane of their address,
3137 // unless the same operand is also stored.
3138 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3139 }
3140};
3141
3142/// A recipe for widening store operations with vector-predication intrinsics,
3143/// using the value to store, the address to store to, the explicit vector
3144/// length and an optional mask.
3147 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3148 {S.getAddr(), S.getStoredValue(), &EVL},
3149 S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
3150 setMask(Mask);
3151 }
3152
3153 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3154
3155 /// Return the address accessed by this recipe.
3156 VPValue *getStoredValue() const { return getOperand(1); }
3157
3158 /// Return the EVL operand.
3159 VPValue *getEVL() const { return getOperand(2); }
3160
3161 /// Generate the wide store or scatter.
3162 void execute(VPTransformState &State) override;
3163
3164 /// Return the cost of this VPWidenStoreEVLRecipe.
3166 VPCostContext &Ctx) const override;
3167
3168#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3169 /// Print the recipe.
3170 void print(raw_ostream &O, const Twine &Indent,
3171 VPSlotTracker &SlotTracker) const override;
3172#endif
3173
3174 /// Returns true if the recipe only uses the first lane of operand \p Op.
3175 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3177 "Op must be an operand of the recipe");
3178 if (Op == getEVL()) {
3179 assert(getStoredValue() != Op && "unexpected store of EVL");
3180 return true;
3181 }
3182 // Widened, consecutive memory operations only demand the first lane of
3183 // their address, unless the same operand is also stored. That latter can
3184 // happen with opaque pointers.
3185 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3186 }
3187};
3188
3189/// Recipe to expand a SCEV expression.
3191 const SCEV *Expr;
3192 ScalarEvolution &SE;
3193
3194public:
3196 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
3197
3198 ~VPExpandSCEVRecipe() override = default;
3199
3201 return new VPExpandSCEVRecipe(Expr, SE);
3202 }
3203
3204 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3205
3206 /// Generate a canonical vector induction variable of the vector loop, with
3207 void execute(VPTransformState &State) override;
3208
3209 /// Return the cost of this VPExpandSCEVRecipe.
3211 VPCostContext &Ctx) const override {
3212 // TODO: Compute accurate cost after retiring the legacy cost model.
3213 return 0;
3214 }
3215
3216#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3217 /// Print the recipe.
3218 void print(raw_ostream &O, const Twine &Indent,
3219 VPSlotTracker &SlotTracker) const override;
3220#endif
3221
3222 const SCEV *getSCEV() const { return Expr; }
3223};
3224
3225/// Canonical scalar induction phi of the vector loop. Starting at the specified
3226/// start value (either 0 or the resume value when vectorizing the epilogue
3227/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3228/// canonical induction variable.
3230public:
3232 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3233
3234 ~VPCanonicalIVPHIRecipe() override = default;
3235
3237 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3238 R->addOperand(getBackedgeValue());
3239 return R;
3240 }
3241
3242 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3243
3245 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
3246 }
3247
3248 void execute(VPTransformState &State) override {
3250 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3251 }
3252
3253#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3254 /// Print the recipe.
3255 void print(raw_ostream &O, const Twine &Indent,
3256 VPSlotTracker &SlotTracker) const override;
3257#endif
3258
3259 /// Returns the scalar type of the induction.
3261 return getStartValue()->getLiveInIRValue()->getType();
3262 }
3263
3264 /// Returns true if the recipe only uses the first lane of operand \p Op.
3265 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3267 "Op must be an operand of the recipe");
3268 return true;
3269 }
3270
3271 /// Returns true if the recipe only uses the first part of operand \p Op.
3272 bool onlyFirstPartUsed(const VPValue *Op) const override {
3274 "Op must be an operand of the recipe");
3275 return true;
3276 }
3277
3278 /// Return the cost of this VPCanonicalIVPHIRecipe.
3280 VPCostContext &Ctx) const override {
3281 // For now, match the behavior of the legacy cost model.
3282 return 0;
3283 }
3284};
3285
3286/// A recipe for generating the active lane mask for the vector loop that is
3287/// used to predicate the vector operations.
3288/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3289/// remove VPActiveLaneMaskPHIRecipe.
3291public:
3293 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3294 DL) {}
3295
3296 ~VPActiveLaneMaskPHIRecipe() override = default;
3297
3300 if (getNumOperands() == 2)
3301 R->addOperand(getOperand(1));
3302 return R;
3303 }
3304
3305 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3306
3308 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
3309 }
3310
3311 /// Generate the active lane mask phi of the vector loop.
3312 void execute(VPTransformState &State) override;
3313
3314#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3315 /// Print the recipe.
3316 void print(raw_ostream &O, const Twine &Indent,
3317 VPSlotTracker &SlotTracker) const override;
3318#endif
3319};
3320
3321/// A recipe for generating the phi node for the current index of elements,
3322/// adjusted in accordance with EVL value. It starts at the start value of the
3323/// canonical induction and gets incremented by EVL in each iteration of the
3324/// vector loop.
3326public:
3328 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3329
3330 ~VPEVLBasedIVPHIRecipe() override = default;
3331
3333 llvm_unreachable("cloning not implemented yet");
3334 }
3335
3336 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3337
3339 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
3340 }
3341
3342 void execute(VPTransformState &State) override {
3344 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3345 }
3346
3347 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3349 VPCostContext &Ctx) const override {
3350 // For now, match the behavior of the legacy cost model.
3351 return 0;
3352 }
3353
3354 /// Returns true if the recipe only uses the first lane of operand \p Op.
3355 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3357 "Op must be an operand of the recipe");
3358 return true;
3359 }
3360
3361#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3362 /// Print the recipe.
3363 void print(raw_ostream &O, const Twine &Indent,
3364 VPSlotTracker &SlotTracker) const override;
3365#endif
3366};
3367
3368/// A Recipe for widening the canonical induction variable of the vector loop.
3370 public VPUnrollPartAccessor<1> {
3371public:
3373 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3374
3375 ~VPWidenCanonicalIVRecipe() override = default;
3376
3378 return new VPWidenCanonicalIVRecipe(
3379 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
3380 }
3381
3382 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3383
3384 /// Generate a canonical vector induction variable of the vector loop, with
3385 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3386 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3387 void execute(VPTransformState &State) override;
3388
3389 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3391 VPCostContext &Ctx) const override {
3392 // TODO: Compute accurate cost after retiring the legacy cost model.
3393 return 0;
3394 }
3395
3396#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3397 /// Print the recipe.
3398 void print(raw_ostream &O, const Twine &Indent,
3399 VPSlotTracker &SlotTracker) const override;
3400#endif
3401};
3402
3403/// A recipe for converting the input value \p IV value to the corresponding
3404/// value of an IV with different start and step values, using Start + IV *
3405/// Step.
3407 /// Kind of the induction.
3409 /// If not nullptr, the floating point induction binary operator. Must be set
3410 /// for floating point inductions.
3411 const FPMathOperator *FPBinOp;
3412
3413 /// Name to use for the generated IR instruction for the derived IV.
3414 std::string Name;
3415
3416public:
3418 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3419 const Twine &Name = "")
3421 IndDesc.getKind(),
3422 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3423 Start, CanonicalIV, Step, Name) {}
3424
3426 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3427 VPValue *Step, const Twine &Name = "")
3428 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3429 FPBinOp(FPBinOp), Name(Name.str()) {}
3430
3431 ~VPDerivedIVRecipe() override = default;
3432
3434 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3435 getStepValue());
3436 }
3437
3438 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3439
3440 /// Generate the transformed value of the induction at offset StartValue (1.
3441 /// operand) + IV (2. operand) * StepValue (3, operand).
3442 void execute(VPTransformState &State) override;
3443
3444 /// Return the cost of this VPDerivedIVRecipe.
3446 VPCostContext &Ctx) const override {
3447 // TODO: Compute accurate cost after retiring the legacy cost model.
3448 return 0;
3449 }
3450
3451#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3452 /// Print the recipe.
3453 void print(raw_ostream &O, const Twine &Indent,
3454 VPSlotTracker &SlotTracker) const override;
3455#endif
3456
3458 return getStartValue()->getLiveInIRValue()->getType();
3459 }
3460
3461 VPValue *getStartValue() const { return getOperand(0); }
3462 VPValue *getStepValue() const { return getOperand(2); }
3463
3464 /// Returns true if the recipe only uses the first lane of operand \p Op.
3465 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3467 "Op must be an operand of the recipe");
3468 return true;
3469 }
3470};
3471
3472/// A recipe for handling phi nodes of integer and floating-point inductions,
3473/// producing their scalar values.
3475 public VPUnrollPartAccessor<2> {
3476 Instruction::BinaryOps InductionOpcode;
3477
3478public:
3481 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3482 ArrayRef<VPValue *>({IV, Step}), FMFs),
3483 InductionOpcode(Opcode) {}
3484
3486 VPValue *Step)
3488 IV, Step, IndDesc.getInductionOpcode(),
3489 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3490 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3491 : FastMathFlags()) {}
3492
3493 ~VPScalarIVStepsRecipe() override = default;
3494
3496 return new VPScalarIVStepsRecipe(
3497 getOperand(0), getOperand(1), InductionOpcode,
3499 }
3500
3501 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3502
3503 /// Generate the scalarized versions of the phi node as needed by their users.
3504 void execute(VPTransformState &State) override;
3505
3506 /// Return the cost of this VPScalarIVStepsRecipe.
3508 VPCostContext &Ctx) const override {
3509 // TODO: Compute accurate cost after retiring the legacy cost model.
3510 return 0;
3511 }
3512
3513#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3514 /// Print the recipe.
3515 void print(raw_ostream &O, const Twine &Indent,
3516 VPSlotTracker &SlotTracker) const override;
3517#endif
3518
3519 VPValue *getStepValue() const { return getOperand(1); }
3520
3521 /// Returns true if the recipe only uses the first lane of operand \p Op.
3522 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3524 "Op must be an operand of the recipe");
3525 return true;
3526 }
3527};
3528
3529/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3530/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3531/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3533 friend class VPlan;
3534
3535 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3536 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3537 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3538 if (Recipe)
3539 appendRecipe(Recipe);
3540 }
3541
3542public:
3544
3545protected:
3546 /// The VPRecipes held in the order of output instructions to generate.
3548
3549 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3550 : VPBlockBase(BlockSC, Name.str()) {}
3551
3552public:
3553 ~VPBasicBlock() override {
3554 while (!Recipes.empty())
3555 Recipes.pop_back();
3556 }
3557
3558 /// Instruction iterators...
3563
3564 //===--------------------------------------------------------------------===//
3565 /// Recipe iterator methods
3566 ///
3567 inline iterator begin() { return Recipes.begin(); }
3568 inline const_iterator begin() const { return Recipes.begin(); }
3569 inline iterator end() { return Recipes.end(); }
3570 inline const_iterator end() const { return Recipes.end(); }
3571
3572 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3573 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3574 inline reverse_iterator rend() { return Recipes.rend(); }
3575 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3576
3577 inline size_t size() const { return Recipes.size(); }
3578 inline bool empty() const { return Recipes.empty(); }
3579 inline const VPRecipeBase &front() const { return Recipes.front(); }
3580 inline VPRecipeBase &front() { return Recipes.front(); }
3581 inline const VPRecipeBase &back() const { return Recipes.back(); }
3582 inline VPRecipeBase &back() { return Recipes.back(); }
3583
3584 /// Returns a reference to the list of recipes.
3586
3587 /// Returns a pointer to a member of the recipe list.
3589 return &VPBasicBlock::Recipes;
3590 }
3591
3592 /// Method to support type inquiry through isa, cast, and dyn_cast.
3593 static inline bool classof(const VPBlockBase *V) {
3594 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3595 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3596 }
3597
3598 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3599 assert(Recipe && "No recipe to append.");
3600 assert(!Recipe->Parent && "Recipe already in VPlan");
3601 Recipe->Parent = this;
3602 Recipes.insert(InsertPt, Recipe);
3603 }
3604
3605 /// Augment the existing recipes of a VPBasicBlock with an additional
3606 /// \p Recipe as the last recipe.
3607 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3608
3609 /// The method which generates the output IR instructions that correspond to
3610 /// this VPBasicBlock, thereby "executing" the VPlan.
3611 void execute(VPTransformState *State) override;
3612
3613 /// Return the cost of this VPBasicBlock.
3615
3616 /// Return the position of the first non-phi node recipe in the block.
3618
3619 /// Returns an iterator range over the PHI-like recipes in the block.
3621 return make_range(begin(), getFirstNonPhi());
3622 }
3623
3624 /// Split current block at \p SplitAt by inserting a new block between the
3625 /// current block and its successors and moving all recipes starting at
3626 /// SplitAt to the new block. Returns the new block.
3627 VPBasicBlock *splitAt(iterator SplitAt);
3628
3631
3632#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3633 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3634 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3635 ///
3636 /// Note that the numbering is applied to the whole VPlan, so printing
3637 /// individual blocks is consistent with the whole VPlan printing.
3638 void print(raw_ostream &O, const Twine &Indent,
3639 VPSlotTracker &SlotTracker) const override;
3640 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3641#endif
3642
3643 /// If the block has multiple successors, return the branch recipe terminating
3644 /// the block. If there are no or only a single successor, return nullptr;
3646 const VPRecipeBase *getTerminator() const;
3647
3648 /// Returns true if the block is exiting it's parent region.
3649 bool isExiting() const;
3650
3651 /// Clone the current block and it's recipes, without updating the operands of
3652 /// the cloned recipes.
3653 VPBasicBlock *clone() override;
3654
3655protected:
3656 /// Execute the recipes in the IR basic block \p BB.
3657 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3658
3659 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3660 /// generated for this VPBB.
3662
3663private:
3664 /// Create an IR BasicBlock to hold the output instructions generated by this
3665 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3666 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3667};
3668
3669/// A special type of VPBasicBlock that wraps an existing IR basic block.
3670/// Recipes of the block get added before the first non-phi instruction in the
3671/// wrapped block.
3672/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3673/// preheader block.
3675 friend class VPlan;
3676
3677 BasicBlock *IRBB;
3678
3679 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
3681 : VPBasicBlock(VPIRBasicBlockSC,
3682 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3683 IRBB(IRBB) {}
3684
3685public:
3686 ~VPIRBasicBlock() override {}
3687
3688 static inline bool classof(const VPBlockBase *V) {
3689 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3690 }
3691
3692 /// The method which generates the output IR instructions that correspond to
3693 /// this VPBasicBlock, thereby "executing" the VPlan.
3694 void execute(VPTransformState *State) override;
3695
3696 VPIRBasicBlock *clone() override;
3697
3698 BasicBlock *getIRBasicBlock() const { return IRBB; }
3699};
3700
3701/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3702/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3703/// A VPRegionBlock may indicate that its contents are to be replicated several
3704/// times. This is designed to support predicated scalarization, in which a
3705/// scalar if-then code structure needs to be generated VF * UF times. Having
3706/// this replication indicator helps to keep a single model for multiple
3707/// candidate VF's. The actual replication takes place only once the desired VF
3708/// and UF have been determined.
3710 friend class VPlan;
3711
3712 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3713 VPBlockBase *Entry;
3714
3715 /// Hold the Single Exiting block of the SESE region modelled by the
3716 /// VPRegionBlock.
3717 VPBlockBase *Exiting;
3718
3719 /// An indicator whether this region is to generate multiple replicated
3720 /// instances of output IR corresponding to its VPBlockBases.
3721 bool IsReplicator;
3722
3723 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
3724 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
3725 const std::string &Name = "", bool IsReplicator = false)
3726 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3727 IsReplicator(IsReplicator) {
3728 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3729 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3730 Entry->setParent(this);
3731 Exiting->setParent(this);
3732 }
3733 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3734 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3735 IsReplicator(IsReplicator) {}
3736
3737public:
3738 ~VPRegionBlock() override {}
3739
3740 /// Method to support type inquiry through isa, cast, and dyn_cast.
3741 static inline bool classof(const VPBlockBase *V) {
3742 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3743 }
3744
3745 const VPBlockBase *getEntry() const { return Entry; }
3746 VPBlockBase *getEntry() { return Entry; }
3747
3748 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3749 /// EntryBlock must have no predecessors.
3750 void setEntry(VPBlockBase *EntryBlock) {
3751 assert(EntryBlock->getPredecessors().empty() &&
3752 "Entry block cannot have predecessors.");
3753 Entry = EntryBlock;
3754 EntryBlock->setParent(this);
3755 }
3756
3757 const VPBlockBase *getExiting() const { return Exiting; }
3758 VPBlockBase *getExiting() { return Exiting; }
3759
3760 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3761 /// ExitingBlock must have no successors.
3762 void setExiting(VPBlockBase *ExitingBlock) {
3763 assert(ExitingBlock->getSuccessors().empty() &&
3764 "Exit block cannot have successors.");
3765 Exiting = ExitingBlock;
3766 ExitingBlock->setParent(this);
3767 }
3768
3769 /// Returns the pre-header VPBasicBlock of the loop region.
3771 assert(!isReplicator() && "should only get pre-header of loop regions");
3773 }
3774
3775 /// An indicator whether this region is to generate multiple replicated
3776 /// instances of output IR corresponding to its VPBlockBases.
3777 bool isReplicator() const { return IsReplicator; }
3778
3779 /// The method which generates the output IR instructions that correspond to
3780 /// this VPRegionBlock, thereby "executing" the VPlan.
3781 void execute(VPTransformState *State) override;
3782
3783 // Return the cost of this region.
3785
3786#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3787 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3788 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3789 /// consequtive numbers.
3790 ///
3791 /// Note that the numbering is applied to the whole VPlan, so printing
3792 /// individual regions is consistent with the whole VPlan printing.
3793 void print(raw_ostream &O, const Twine &Indent,
3794 VPSlotTracker &SlotTracker) const override;
3795 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3796#endif
3797
3798 /// Clone all blocks in the single-entry single-exit region of the block and
3799 /// their recipes without updating the operands of the cloned recipes.
3800 VPRegionBlock *clone() override;
3801};
3802
3803/// VPlan models a candidate for vectorization, encoding various decisions take
3804/// to produce efficient output IR, including which branches, basic-blocks and
3805/// output IR instructions to generate, and their cost. VPlan holds a
3806/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3807/// VPBasicBlock.
3808class VPlan {
3809 friend class VPlanPrinter;
3810 friend class VPSlotTracker;
3811
3812 /// VPBasicBlock corresponding to the original preheader. Used to place
3813 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3814 /// rest of VPlan execution.
3815 /// When this VPlan is used for the epilogue vector loop, the entry will be
3816 /// replaced by a new entry block created during skeleton creation.
3817 VPBasicBlock *Entry;
3818
3819 /// VPIRBasicBlock wrapping the header of the original scalar loop.
3820 VPIRBasicBlock *ScalarHeader;
3821
3822 /// Holds the VFs applicable to this VPlan.
3824
3825 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3826 /// any UF.
3828
3829 /// Holds the name of the VPlan, for printing.
3830 std::string Name;
3831
3832 /// Represents the trip count of the original loop, for folding
3833 /// the tail.
3834 VPValue *TripCount = nullptr;
3835
3836 /// Represents the backedge taken count of the original loop, for folding
3837 /// the tail. It equals TripCount - 1.
3838 VPValue *BackedgeTakenCount = nullptr;
3839
3840 /// Represents the vector trip count.
3841 VPValue VectorTripCount;
3842
3843 /// Represents the vectorization factor of the loop.
3844 VPValue VF;
3845
3846 /// Represents the loop-invariant VF * UF of the vector loop region.
3847 VPValue VFxUF;
3848
3849 /// Holds a mapping between Values and their corresponding VPValue inside
3850 /// VPlan.
3851 Value2VPValueTy Value2VPValue;
3852
3853 /// Contains all the external definitions created for this VPlan. External
3854 /// definitions are VPValues that hold a pointer to their underlying IR.
3855 SmallVector<VPValue *, 16> VPLiveInsToFree;
3856
3857 /// Mapping from SCEVs to the VPValues representing their expansions.
3858 /// NOTE: This mapping is temporary and will be removed once all users have
3859 /// been modeled in VPlan directly.
3860 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3861
3862 /// Blocks allocated and owned by the VPlan. They will be deleted once the
3863 /// VPlan is destroyed.
3864 SmallVector<VPBlockBase *> CreatedBlocks;
3865
3866 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
3867 /// wrapping the original header of the scalar loop.
3868 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
3869 : Entry(Entry), ScalarHeader(ScalarHeader) {
3870 Entry->setPlan(this);
3871 assert(ScalarHeader->getNumSuccessors() == 0 &&
3872 "scalar header must be a leaf node");
3873 }
3874
3875public:
3876 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
3877 /// original preheader and scalar header of \p L, to be used as entry and
3878 /// scalar header blocks of the new VPlan.
3879 VPlan(Loop *L);
3880
3881 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
3882 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
3883 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
3884 setEntry(createVPBasicBlock("preheader"));
3885 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
3886 TripCount = TC;
3887 }
3888
3889 ~VPlan();
3890
3892 Entry = VPBB;
3893 VPBB->setPlan(this);
3894 }
3895
3896 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3897 /// original scalar pre-header) which contains SCEV expansions that need
3898 /// to happen before the CFG is modified (when executing a VPlan for the
3899 /// epilogue vector loop, the original entry needs to be replaced by a new
3900 /// one); a VPBasicBlock for the vector pre-header, followed by a region for
3901 /// the vector loop, followed by the middle VPBasicBlock. If a check is needed
3902 /// to guard executing the scalar epilogue loop, it will be added to the
3903 /// middle block, together with VPBasicBlocks for the scalar preheader and
3904 /// exit blocks. \p InductionTy is the type of the canonical induction and
3905 /// used for related values, like the trip count expression.
3906 static VPlanPtr createInitialVPlan(Type *InductionTy,
3908 bool RequiresScalarEpilogueCheck,
3909 bool TailFolded, Loop *TheLoop);
3910
3911 /// Prepare the plan for execution, setting up the required live-in values.
3912 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3913 VPTransformState &State);
3914
3915 /// Generate the IR code for this VPlan.
3916 void execute(VPTransformState *State);
3917
3918 /// Return the cost of this plan.
3920
3921 VPBasicBlock *getEntry() { return Entry; }
3922 const VPBasicBlock *getEntry() const { return Entry; }
3923
3924 /// Returns the preheader of the vector loop region, if one exists, or null
3925 /// otherwise.
3927 VPRegionBlock *VectorRegion = getVectorLoopRegion();
3928 return VectorRegion
3929 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
3930 : nullptr;
3931 }
3932
3933 /// Returns the VPRegionBlock of the vector loop.
3935 const VPRegionBlock *getVectorLoopRegion() const;
3936
3937 /// Returns the 'middle' block of the plan, that is the block that selects
3938 /// whether to execute the scalar tail loop or the exit block from the loop
3939 /// latch.
3941 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3942 }
3944 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3945 }
3946
3947 /// Return the VPBasicBlock for the preheader of the scalar loop.
3949 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
3950 }
3951
3952 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3953 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
3954
3955 /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
3956 /// of the VPlan, that is leaf nodes except the scalar header. Defined in
3957 /// VPlanHCFG, as the definition of the type needs access to the definitions
3958 /// of VPBlockShallowTraversalWrapper.
3959 auto getExitBlocks();
3960
3961 /// The trip count of the original loop.
3963 assert(TripCount && "trip count needs to be set before accessing it");
3964 return TripCount;
3965 }
3966
3967 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3968 /// the original trip count have been replaced.
3969 void resetTripCount(VPValue *NewTripCount) {
3970 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3971 "TripCount always must be set");
3972 TripCount = NewTripCount;
3973 }
3974
3975 /// The backedge taken count of the original loop.
3977 if (!BackedgeTakenCount)
3978 BackedgeTakenCount = new VPValue();
3979 return BackedgeTakenCount;
3980 }
3981
3982 /// The vector trip count.
3983 VPValue &getVectorTripCount() { return VectorTripCount; }
3984
3985 /// Returns the VF of the vector loop region.
3986 VPValue &getVF() { return VF; };
3987
3988 /// Returns VF * UF of the vector loop region.
3989 VPValue &getVFxUF() { return VFxUF; }
3990
3991 void addVF(ElementCount VF) { VFs.insert(VF); }
3992
3994 assert(hasVF(VF) && "Cannot set VF not already in plan");
3995 VFs.clear();
3996 VFs.insert(VF);
3997 }
3998
3999 bool hasVF(ElementCount VF) { return VFs.count(VF); }
4001 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4002 }
4003
4004 /// Returns an iterator range over all VFs of the plan.
4007 return {VFs.begin(), VFs.end()};
4008 }
4009
4010 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
4011
4012 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4013
4014 unsigned getUF() const {
4015 assert(UFs.size() == 1 && "Expected a single UF");
4016 return UFs[0];
4017 }
4018
4019 void setUF(unsigned UF) {
4020 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4021 UFs.clear();
4022 UFs.insert(UF);
4023 }
4024
4025 /// Return a string with the name of the plan and the applicable VFs and UFs.
4026 std::string getName() const;
4027
4028 void setName(const Twine &newName) { Name = newName.str(); }
4029
4030 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4031 /// yet) for \p V.
4033 assert(V && "Trying to get or add the VPValue of a null Value");
4034 if (!Value2VPValue.count(V)) {
4035 VPValue *VPV = new VPValue(V);
4036 VPLiveInsToFree.push_back(VPV);
4037 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4038 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
4039 Value2VPValue[V] = VPV;
4040 }
4041
4042 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
4043 assert(Value2VPValue[V]->isLiveIn() &&
4044 "Only live-ins should be in mapping");
4045 return Value2VPValue[V];
4046 }
4047
4048 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4049 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4050
4051#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4052 /// Print the live-ins of this VPlan to \p O.
4053 void printLiveIns(raw_ostream &O) const;
4054
4055 /// Print this VPlan to \p O.
4056 void print(raw_ostream &O) const;
4057
4058 /// Print this VPlan in DOT format to \p O.
4059 void printDOT(raw_ostream &O) const;
4060
4061 /// Dump the plan to stderr (for debugging).
4062 LLVM_DUMP_METHOD void dump() const;
4063#endif
4064
4065 /// Returns the canonical induction recipe of the vector loop.
4068 if (EntryVPBB->empty()) {
4069 // VPlan native path.
4070 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4071 }
4072 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4073 }
4074
4075 VPValue *getSCEVExpansion(const SCEV *S) const {
4076 return SCEVToExpansion.lookup(S);
4077 }
4078
4079 void addSCEVExpansion(const SCEV *S, VPValue *V) {
4080 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
4081 SCEVToExpansion[S] = V;
4082 }
4083
4084 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4085 /// recipes to refer to the clones, and return it.
4086 VPlan *duplicate();
4087
4088 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4089 /// present. The returned block is owned by the VPlan and deleted once the
4090 /// VPlan is destroyed.
4092 VPRecipeBase *Recipe = nullptr) {
4093 auto *VPB = new VPBasicBlock(Name, Recipe);
4094 CreatedBlocks.push_back(VPB);
4095 return VPB;
4096 }
4097
4098 /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
4099 /// IsReplicator is true, the region is a replicate region. The returned block
4100 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4102 const std::string &Name = "",
4103 bool IsReplicator = false) {
4104 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
4105 CreatedBlocks.push_back(VPB);
4106 return VPB;
4107 }
4108
4109 /// Create a new VPRegionBlock with \p Name and entry and exiting blocks set
4110 /// to nullptr. If \p IsReplicator is true, the region is a replicate region.
4111 /// The returned block is owned by the VPlan and deleted once the VPlan is
4112 /// destroyed.
4113 VPRegionBlock *createVPRegionBlock(const std::string &Name = "",
4114 bool IsReplicator = false) {
4115 auto *VPB = new VPRegionBlock(Name, IsReplicator);
4116 CreatedBlocks.push_back(VPB);
4117 return VPB;
4118 }
4119
4120 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4121 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4122 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4124
4125 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4126 /// instructions in \p IRBB, except its terminator which is managed by the
4127 /// successors of the block in VPlan. The returned block is owned by the VPlan
4128 /// and deleted once the VPlan is destroyed.
4130};
4131
4132#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4133/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
4134/// indented and follows the dot format.
4136 raw_ostream &OS;
4137 const VPlan &Plan;
4138 unsigned Depth = 0;
4139 unsigned TabWidth = 2;
4140 std::string Indent;
4141 unsigned BID = 0;
4143
4145
4146 /// Handle indentation.
4147 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
4148
4149 /// Print a given \p Block of the Plan.
4150 void dumpBlock(const VPBlockBase *Block);
4151
4152 /// Print the information related to the CFG edges going out of a given
4153 /// \p Block, followed by printing the successor blocks themselves.
4154 void dumpEdges(const VPBlockBase *Block);
4155
4156 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
4157 /// its successor blocks.
4158 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
4159
4160 /// Print a given \p Region of the Plan.
4161 void dumpRegion(const VPRegionBlock *Region);
4162
4163 unsigned getOrCreateBID(const VPBlockBase *Block) {
4164 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
4165 }
4166
4167 Twine getOrCreateName(const VPBlockBase *Block);
4168
4169 Twine getUID(const VPBlockBase *Block);
4170
4171 /// Print the information related to a CFG edge between two VPBlockBases.
4172 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
4173 const Twine &Label);
4174
4175public:
4177 : OS(O), Plan(P), SlotTracker(&P) {}
4178
4179 LLVM_DUMP_METHOD void dump();
4180};
4181
4183 const Value *V;
4184
4185 VPlanIngredient(const Value *V) : V(V) {}
4186
4187 void print(raw_ostream &O) const;
4188};
4189
4191 I.print(OS);
4192 return OS;
4193}
4194
4196 Plan.print(OS);
4197 return OS;
4198}
4199#endif
4200
4203 InterleaveGroupMap;
4204
4205 /// Type for mapping of instruction based interleave groups to VPInstruction
4206 /// interleave groups
4209
4210 /// Recursively \p Region and populate VPlan based interleave groups based on
4211 /// \p IAI.
4212 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
4214 /// Recursively traverse \p Block and populate VPlan based interleave groups
4215 /// based on \p IAI.
4216 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
4218
4219public:
4221
4224 // Avoid releasing a pointer twice.
4225 for (auto &I : InterleaveGroupMap)
4226 DelSet.insert(I.second);
4227 for (auto *Ptr : DelSet)
4228 delete Ptr;
4229 }
4230
4231 /// Get the interleave group that \p Instr belongs to.
4232 ///
4233 /// \returns nullptr if doesn't have such group.
4236 return InterleaveGroupMap.lookup(Instr);
4237 }
4238};
4239
4240/// Class that maps (parts of) an existing VPlan to trees of combined
4241/// VPInstructions.
4243 enum class OpMode { Failed, Load, Opcode };
4244
4245 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
4246 /// DenseMap keys.
4247 struct BundleDenseMapInfo {
4248 static SmallVector<VPValue *, 4> getEmptyKey() {
4249 return {reinterpret_cast<VPValue *>(-1)};
4250 }
4251
4252 static SmallVector<VPValue *, 4> getTombstoneKey() {
4253 return {reinterpret_cast<VPValue *>(-2)};
4254 }
4255
4256 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
4257 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
4258 }
4259
4260 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
4262 return LHS == RHS;
4263 }
4264 };
4265
4266 /// Mapping of values in the original VPlan to a combined VPInstruction.
4268 BundleToCombined;
4269
4271
4272 /// Basic block to operate on. For now, only instructions in a single BB are
4273 /// considered.
4274 const VPBasicBlock &BB;
4275
4276 /// Indicates whether we managed to combine all visited instructions or not.
4277 bool CompletelySLP = true;
4278
4279 /// Width of the widest combined bundle in bits.
4280 unsigned WidestBundleBits = 0;
4281
4282 using MultiNodeOpTy =
4283 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
4284
4285 // Input operand bundles for the current multi node. Each multi node operand
4286 // bundle contains values not matching the multi node's opcode. They will
4287 // be reordered in reorderMultiNodeOps, once we completed building a
4288 // multi node.
4289 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
4290
4291 /// Indicates whether we are building a multi node currently.
4292 bool MultiNodeActive = false;
4293
4294 /// Check if we can vectorize Operands together.
4295 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
4296
4297 /// Add combined instruction \p New for the bundle \p Operands.
4298 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
4299
4300 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
4301 VPInstruction *markFailed();
4302
4303 /// Reorder operands in the multi node to maximize sequential memory access
4304 /// and commutative operations.
4305 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
4306
4307 /// Choose the best candidate to use for the lane after \p Last. The set of
4308 /// candidates to choose from are values with an opcode matching \p Last's
4309 /// or loads consecutive to \p Last.
4310 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
4311 SmallPtrSetImpl<VPValue *> &Candidates,
4313
4314#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4315 /// Print bundle \p Values to dbgs().
4316 void dumpBundle(ArrayRef<VPValue *> Values);
4317#endif
4318
4319public:
4320 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
4321
4322 ~VPlanSlp() = default;
4323
4324 /// Tries to build an SLP tree rooted at \p Operands and returns a
4325 /// VPInstruction combining \p Operands, if they can be combined.
4327
4328 /// Return the width of the widest combined bundle in bits.
4329 unsigned getWidestBundleBits() const { return WidestBundleBits; }
4330
4331 /// Return true if all visited instruction can be combined.
4332 bool isCompletelySLP() const { return CompletelySLP; }
4333};
4334} // end namespace llvm
4335
4336#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:410
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
Flatten the CFG
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:815
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:608
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:205
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:291
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488
uint32_t getFactor() const
Definition: VectorUtils.h:504
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558
InstTy * getInsertPos() const
Definition: VectorUtils.h:574
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:630
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition: ModRef.h:198
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition: ModRef.h:195
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:121
ElementCount operator*() const
Definition: VPlan.h:129
iterator & operator++()
Definition: VPlan.h:131
iterator(ElementCount VF)
Definition: VPlan.h:125
bool operator==(const iterator &Other) const
Definition: VPlan.h:127
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:3290
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3298
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3307
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:3292
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3532
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:3560
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3607
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:3562
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3559
void connectToPredecessors(VPTransformState::CFGState &CFG)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition: VPlan.cpp:414
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:479
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3585
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:3549
iterator end()
Definition: VPlan.h:3569
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3567
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:3561
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.cpp:516
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3620
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition: VPlan.cpp:757
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:208
~VPBasicBlock() override
Definition: VPlan.h:3553
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:565
const_reverse_iterator rbegin() const
Definition: VPlan.h:3573
reverse_iterator rend()
Definition: VPlan.h:3574
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:535
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:3547
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:523
VPRecipeBase & back()
Definition: VPlan.h:3582
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:630
const VPRecipeBase & front() const
Definition: VPlan.h:3579
const_iterator begin() const
Definition: VPlan.h:3568
VPRecipeBase & front()
Definition: VPlan.h:3580
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:613
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:601
const VPRecipeBase & back() const
Definition: VPlan.h:3581
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3598
bool empty() const
Definition: VPlan.h:3578
const_iterator end() const
Definition: VPlan.h:3570
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3593
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3588
reverse_iterator rbegin()
Definition: VPlan.h:3572
size_t size() const
Definition: VPlan.h:3577
const_reverse_iterator rend() const
Definition: VPlan.h:3575
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2490
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2496
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2543
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2519
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2524
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2514
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2501
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2510
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:391
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:607
VPRegionBlock * getParent()
Definition: VPlan.h:483
VPBlocksTy & getPredecessors()
Definition: VPlan.h:515
iterator_range< VPBlockBase ** > predecessors()
Definition: VPlan.h:512
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:666
void setName(const Twine &newName)
Definition: VPlan.h:476
size_t getNumSuccessors() const
Definition: VPlan.h:529
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:511
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:622
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:618
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:635
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:656
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:565
size_t getNumPredecessors() const
Definition: VPlan.h:530
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:598
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:200
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:514
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:468
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
VPlan * getPlan()
Definition: VPlan.cpp:153
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:172
const VPRegionBlock * getParent() const
Definition: VPlan.h:484
const std::string & getName() const
Definition: VPlan.h:474
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:617
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:555
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:589
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:525
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:549
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:614
unsigned getVPBlockID() const
Definition: VPlan.h:481
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition: VPlan.h:642
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:460
VPBlocksTy & getSuccessors()
Definition: VPlan.h:509
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:192
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:578
void setParent(VPRegionBlock *P)
Definition: VPlan.h:494
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:571
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:519
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:508
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlanUtils.h:79
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2850
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2886
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2874
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2852
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2858
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2893
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:3229
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:3272
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3244
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3236
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:3231
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3265
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:3260
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3248
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition: VPlan.h:3279
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:298
unsigned getVPDefID() const
Definition: VPlanValue.h:426
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:3406
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition: VPlan.h:3445
VPValue * getStepValue() const
Definition: VPlan.h:3462
Type * getScalarType() const
Definition: VPlan.h:3457
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3433
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3425
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3465
VPValue * getStartValue() const
Definition: VPlan.h:3461
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3417
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:3325
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3338
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3332
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3342
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition: VPlan.h:3348
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:3327
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3355
Recipe to expand a SCEV expression.
Definition: VPlan.h:3190
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:3195
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition: VPlan.h:3210
const SCEV * getSCEV() const
Definition: VPlan.h:3222
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3200
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:2027
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
static bool classof(const VPValue *V)
Definition: VPlan.h:2044
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:2029
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2075
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2064
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:2072
VPValue * getStartValue() const
Definition: VPlan.h:2067
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2040
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:2081
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition: VPlan.h:1777
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1789
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPHistogramRecipe(unsigned Opcode, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:1783
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1806
unsigned getOpcode() const
Definition: VPlan.h:1802
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3674
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:450
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3698
~VPIRBasicBlock() override
Definition: VPlan.h:3686
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3688
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.cpp:472
A recipe to wrap on original IR instruction not to be modified during execution, execept for PHIs.
Definition: VPlan.h:1376
Instruction & getInstruction() const
Definition: VPlan.h:1400
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition: VPlan.h:1414
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1420
VPIRInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1387
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition: VPlan.h:1408
VPIRInstruction(Instruction &I)
Definition: VPlan.h:1380
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1188
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1288
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1263
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1299
bool hasResult() const
Definition: VPlan.h:1329
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition: VPlan.h:1369
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition: VPlan.h:1314
unsigned getOpcode() const
Definition: VPlan.h:1306
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1275
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1268
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1206
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1194
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1209
@ CalculateTripCountMinusVF
Definition: VPlan.h:1207
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1280
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2557
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2640
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2598
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2569
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2604
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2590
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2611
Instruction * getInsertPos() const
Definition: VPlan.h:2646
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2631
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2635
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:4235
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:73
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:176
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:210
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:213
VPLane(unsigned Lane)
Definition: VPlan.h:175
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:200
static VPLane getFirstLane()
Definition: VPlan.h:178
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:156
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:216
A recipe for forming partial reductions.
Definition: VPlan.h:2446
~VPPartialReductionRecipe() override=default
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2479
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, Instruction *ReductionInst=nullptr)
Definition: VPlan.h:2454
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, VPValue *Op1)
Definition: VPlan.h:2450
VPPartialReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2464
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2905
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2937
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2913
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition: VPlan.h:2924
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2909
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:710
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:799
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:735
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:804
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:777
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:721
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:736
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition: VPlan.h:782
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:726
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:788
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:921
ExactFlagsTy ExactFlags
Definition: VPlan.h:971
FastMathFlagsTy FMFs
Definition: VPlan.h:974
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:973
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:968
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1141
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1023
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1102
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPNoWrapFlags GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1048
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1054
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1035
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1071
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1144
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:993
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:970
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1029
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1041
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:972
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:979
WrapFlagsTy WrapFlags
Definition: VPlan.h:969
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1148
bool isDisjoint() const
Definition: VPlan.h:1160
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1135
bool hasNoSignedWrap() const
Definition: VPlan.h:1154
static bool classof(const VPUser *U)
Definition: VPlan.h:1065
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:986
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2732
void execute(VPTransformState &State) override
Generate the reduction in the loop.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2762
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2759
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
Definition: VPlan.h:2734
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2743
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:2380
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2436
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2408
~VPReductionPHIRecipe() override=default
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false, unsigned VFScaleFactor=1)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:2397
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2439
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2418
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:2431
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2652
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2717
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2687
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL={})
Definition: VPlan.h:2672
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition: VPlan.h:2660
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2721
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2711
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2723
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2715
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2719
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2681
void execute(VPTransformState &State) override
Generate the reduction in the loop.
static bool classof(const VPUser *U)
Definition: VPlan.h:2692
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3709
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:701
const VPBlockBase * getEntry() const
Definition: VPlan.h:3745
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3777
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3762
VPBlockBase * getExiting()
Definition: VPlan.h:3758
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3750
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition: VPlan.cpp:764
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:802
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:710
const VPBlockBase * getExiting() const
Definition: VPlan.h:3757
VPBlockBase * getEntry()
Definition: VPlan.h:3746
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3770
~VPRegionBlock() override
Definition: VPlan.h:3738
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3741
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2773
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2822
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2829
bool isUniform() const
Definition: VPlan.h:2817
bool isPredicated() const
Definition: VPlan.h:2819
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2792
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2782
unsigned getOpcode() const
Definition: VPlan.h:2846
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2841
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
A recipe to compute the pointers for widened memory accesses of IndexTy in reverse order.
Definition: VPlan.h:1904
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition: VPlan.h:1928
VPReverseVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1942
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1935
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1921
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1908
const VPValue * getVFValue() const
Definition: VPlan.h:1917
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1581
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarCastRecipe.
Definition: VPlan.h:1606
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1596
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1620
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
Definition: VPlan.h:1589
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1618
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3475
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3522
VPValue * getStepValue() const
Definition: VPlan.h:3519
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition: VPlan.h:3507
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:3485
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3495
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:3479
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Recipe to generate a scalar PHI.
Definition: VPlan.h:2263
VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL, StringRef Name)
Definition: VPlan.h:2267
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2286
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPScalarPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPScalarPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2276
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:837
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:843
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:907
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:852
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:910
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:840
static bool classof(const VPUser *U)
Definition: VPlan.h:899
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:848
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:447
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:40
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition: VPlan.h:1173
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:206
operand_range operands()
Definition: VPlanValue.h:263
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:248
unsigned getNumOperands() const
Definition: VPlanValue.h:242
operand_iterator op_end()
Definition: VPlanValue.h:261
operand_iterator op_begin()
Definition: VPlanValue.h:259
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:243
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:237
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1416
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:89
unsigned getNumUsers() const
Definition: VPlanValue.h:117
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:178
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:173
friend class VPRecipeBase
Definition: VPlanValue.h:56
user_range users()
Definition: VPlanValue.h:138
A recipe to compute the pointers for widened memory accesses of IndexTy.
Definition: VPlan.h:1957
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1961
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1978
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1971
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition: VPlan.h:1991
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1985
A recipe for widening Call instructions using library calls.
Definition: VPlan.h:1721
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1761
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1740
Function * getCalledScalarFunction() const
Definition: VPlan.h:1754
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1758
~VPWidenCallRecipe() override=default
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL={})
Definition: VPlan.h:1728
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3370
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition: VPlan.h:3390
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3377
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:3372
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1529
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1537
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1574
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1577
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1545
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1551
A recipe for widening operations with vector-predication intrinsics with explicit vector length (EVL)...
Definition: VPlan.h:1482
const VPValue * getEVL() const
Definition: VPlan.h:1506
~VPWidenEVLRecipe() override=default
VPWidenEVLRecipe(Instruction &I, iterator_range< IterT > Operands, VPValue &EVL)
Definition: VPlan.h:1487
VPWidenRecipe * clone() override final
Clone the current recipe.
Definition: VPlan.h:1498
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC)
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
Definition: VPlan.h:1491
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1505
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1513
A recipe for handling GEP instructions.
Definition: VPlan.h:1855
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition: VPlan.h:1888
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1877
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1872
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:2089
static bool classof(const VPValue *V)
Definition: VPlan.h:2105
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2125
PHINode * getPHINode() const
Definition: VPlan.h:2120
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2093
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2117
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2123
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:2132
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2100
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2110
const VPValue * getStepValue() const
Definition: VPlan.h:2118
virtual void execute(VPTransformState &State) override=0
Generate the phi nodes.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:2142
const TruncInst * getTruncInst() const
Definition: VPlan.h:2196
const VPValue * getVFValue() const
Definition: VPlan.h:2185
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition: VPlan.h:2155
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2166
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2195
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2146
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition: VPlan.h:2211
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2204
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition: VPlan.h:1629
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, std::initializer_list< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1670
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition: VPlan.h:1694
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition: VPlan.h:1703
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1655
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition: VPlan.h:1709
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1678
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition: VPlan.h:1706
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1697
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1646
A common base class for widening memory operations.
Definition: VPlan.h:2946
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2957
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2954
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2993
static bool classof(const VPUser *U)
Definition: VPlan.h:2987
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:3013
Instruction & Ingredient
Definition: VPlan.h:2948
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2976
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition: VPlan.h:3021
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2951
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2980
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2967
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:3007
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:3003
void setMask(VPValue *Mask)
Definition: VPlan.h:2959
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:3000
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2997
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:2302
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:2333
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL={})
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition: VPlan.h:2309
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:2342
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2315
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:2339
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2232
~VPWidenPointerInductionRecipe() override=default
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:2223
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2249
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1431
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1447
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1442
unsigned getOpcode() const
Definition: VPlan.h:1471
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1436
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1453
static bool classof(const VPUser *U)
Definition: VPlan.h:1458
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:4135
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:4176
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1273
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:4242
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:4332
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:4320
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:4329
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3808
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1144
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1120
void prepareToExecute(Value *TripCount, Value *VectorTripCount, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:923
bool hasScalableVF()
Definition: VPlan.h:4000
VPBasicBlock * getEntry()
Definition: VPlan.h:3921
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition: VPlan.h:4101
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3983
void setName(const Twine &newName)
Definition: VPlan.h:4028
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3989
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:3986
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3962
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3976
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:4006
VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition: VPlan.h:3883
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3922
unsigned getUF() const
Definition: VPlan.h:4014
static VPlanPtr createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header) which cont...
Definition: VPlan.cpp:844
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition: VPlan.cpp:1245
bool hasVF(ElementCount VF)
Definition: VPlan.h:3999
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:4079
bool hasUF(unsigned UF) const
Definition: VPlan.h:4012
void setVF(ElementCount VF)
Definition: VPlan.h:3993
VPRegionBlock * createVPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Name and entry and exiting blocks set to nullptr.
Definition: VPlan.h:4113
auto getExitBlocks()
Return an iterator range over the VPIRBasicBlock wrapping the exit blocks of the VPlan,...
Definition: VPlanCFG.h:310
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1051
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1045
const VPBasicBlock * getMiddleBlock() const
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:3940
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3969
VPBasicBlock * getMiddleBlock()
Definition: VPlan.h:3943
void setEntry(VPBasicBlock *VPBB)
Definition: VPlan.h:3891
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition: VPlan.h:4091
VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:1251
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:4032
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1150
bool hasScalarVFOnly() const
Definition: VPlan.h:4010
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition: VPlan.h:3948
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:955
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4066
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1103
void addVF(ElementCount VF)
Definition: VPlan.h:3991
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition: VPlan.h:3953
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:4049
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:4075
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1067
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition: VPlan.h:3926
void setUF(unsigned UF)
Definition: VPlan.h:4019
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:144
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ Other
Any other memory.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
Definition: VPlan.h:92
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:97
iterator end()
Definition: VPlan.h:138
const ElementCount Start
Definition: VPlan.h:99
ElementCount End
Definition: VPlan.h:102
iterator begin()
Definition: VPlan.h:137
bool isEmpty() const
Definition: VPlan.h:104
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:108
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:676
LLVMContext & LLVMCtx
Definition: VPlan.h:680
LoopVectorizationCostModel & CM
Definition: VPlan.h:681
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, Type *CanIVTy, LoopVectorizationCostModel &CM, TargetTransformInfo::TargetCostKind CostKind)
Definition: VPlan.h:685
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1664
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
TargetTransformInfo::TargetCostKind CostKind
Definition: VPlan.h:683
VPTypeAnalysis Types
Definition: VPlan.h:679
const TargetLibraryInfo & TLI
Definition: VPlan.h:678
const TargetTransformInfo & TTI
Definition: VPlan.h:677
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:682
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:2348
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2358
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:2349
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2354
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:938
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:332
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:338
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:346
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:334
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:342
CFGState(DominatorTree *DT)
Definition: VPlan.h:351
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:347
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:349
DenseMap< VPValue *, Value * > VPV2Vector
Definition: VPlan.h:249
DenseMap< VPValue *, SmallVector< Value *, 4 > > VPV2Scalars
Definition: VPlan.h:251
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:230
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:263
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:261
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:360
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:383
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:386
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:360
void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:393
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:249
struct llvm::VPTransformState::CFGState CFG
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:379
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:352
void reset(VPValue *Def, Value *V, const VPLane &Lane)
Reset an existing scalar value for Def and a given Lane.
Definition: VPlan.h:300
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:244
void set(VPValue *Def, Value *V, const VPLane &Lane)
Set the generated scalar V for Def and the given Lane.
Definition: VPlan.h:290
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:363
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:236
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:369
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:366
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:284
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:239
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:371
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlan.h:372
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:273
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:3065
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3077
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3093
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3066
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:3026
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:3027
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3053
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3035
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1818
bool isInvariantCond() const
Definition: VPlan.h:1849
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1825
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1820
VPValue * getCond() const
Definition: VPlan.h:1845
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:3145
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3156
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3175
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3146
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3159
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:3104
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3133
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:3105
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3121
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3112
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:4185
const Value * V
Definition: VPlan.h:4183
void print(raw_ostream &O) const
Definition: VPlan.cpp:1389