[LV] VPValues for memory operation pointers (NFCI)

author Gil Rapaport <gil.rapaport@intel.com>

Thu, 28 Nov 2019 11:54:38 +0000 (13:54 +0200)

committer Gil Rapaport <gil.rapaport@intel.com>

Fri, 10 Jan 2020 07:24:59 +0000 (09:24 +0200)
author Gil Rapaport <gil.rapaport@intel.com>
Thu, 28 Nov 2019 11:54:38 +0000 (13:54 +0200)
committer Gil Rapaport <gil.rapaport@intel.com>
Fri, 10 Jan 2020 07:24:59 +0000 (09:24 +0200)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

index 614f931..c3ca43f 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -214,6 +214,8 @@ class LoopVectorizationPlanner {
      VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {}
  
      Value *getOrCreateVectorValues(Value *V, unsigned Part) override;
+    Value *getOrCreateScalarValue(Value *V,
+                                  const VPIteration &Instance) override;
    };
  
    /// A builder used to construct the current plan.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 0400e44..180a9e5 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -482,15 +482,20 @@ public:
    /// Construct the vector value of a scalarized value \p V one lane at a time.
    void packScalarIntoVectorValue(Value *V, const VPIteration &Instance);
  
-  /// Try to vectorize the interleaved access group that \p Instr belongs to,
-  /// optionally masking the vector operations if \p BlockInMask is non-null.
-  void vectorizeInterleaveGroup(Instruction *Instr,
-                                VectorParts *BlockInMask = nullptr);
-
-  /// Vectorize Load and Store instructions, optionally masking the vector
-  /// operations if \p BlockInMask is non-null.
-  void vectorizeMemoryInstruction(Instruction *Instr,
-                                  VectorParts *BlockInMask = nullptr);
+  /// Try to vectorize the interleaved access group that \p Instr belongs to
+  /// with the base address given in \p Addr, optionally masking the vector
+  /// operations if \p BlockInMask is non-null. Use \p State to translate given
+  /// VPValues to IR values in the vectorized loop.
+  void vectorizeInterleaveGroup(Instruction *Instr, VPTransformState &State,
+                                VPValue *Addr, VPValue *BlockInMask = nullptr);
+
+  /// Vectorize Load and Store instructions with the base address given in \p
+  /// Addr, optionally masking the vector operations if \p BlockInMask is
+  /// non-null. Use \p State to translate given VPValues to IR values in the
+  /// vectorized loop.
+  void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
+                                  VPValue *Addr,
+                                  VPValue *BlockInMask = nullptr);
  
    /// Set the debug location in the builder using the debug location in
    /// the instruction.
@@ -2162,7 +2167,9 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
  //        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements
  //   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B
  void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
-                                                   VectorParts *BlockInMask) {
+                                                   VPTransformState &State,
+                                                   VPValue *Addr,
+                                                   VPValue *BlockInMask) {
    const InterleaveGroup<Instruction> *Group =
        Cost->getInterleavedAccessGroup(Instr);
    assert(Group && "Fail to get an interleaved access group.");
@@ -2172,27 +2179,19 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
      return;
  
    const DataLayout &DL = Instr->getModule()->getDataLayout();
-  Value *Ptr = getLoadStorePointerOperand(Instr);
  
    // Prepare for the vector type of the interleaved load/store.
    Type *ScalarTy = getMemInstValueType(Instr);
    unsigned InterleaveFactor = Group->getFactor();
    Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
-  Type *PtrTy = VecTy->getPointerTo(getLoadStoreAddressSpace(Instr));
  
    // Prepare for the new pointers.
-  setDebugLocFromInst(Builder, Ptr);
-  SmallVector<Value *, 2> NewPtrs;
+  SmallVector<Value *, 2> AddrParts;
    unsigned Index = Group->getIndex(Instr);
  
-  VectorParts Mask;
-  bool IsMaskForCondRequired = BlockInMask;
-  if (IsMaskForCondRequired) {
-    Mask = *BlockInMask;
-    // TODO: extend the masked interleaved-group support to reversed access.
-    assert(!Group->isReverse() && "Reversed masked interleave-group "
-                                  "not supported.");
-  }
+  // TODO: extend the masked interleaved-group support to reversed access.
+  assert((!BlockInMask || !Group->isReverse()) &&
+         "Reversed masked interleave-group not supported.");
  
    // If the group is reverse, adjust the index to refer to the last vector lane
    // instead of the first. We adjust the index from the first vector lane,
@@ -2203,12 +2202,9 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
    if (Group->isReverse())
      Index += (VF - 1) * Group->getFactor();
  
-  bool InBounds = false;
-  if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
-    InBounds = gep->isInBounds();
-
    for (unsigned Part = 0; Part < UF; Part++) {
-    Value *NewPtr = getOrCreateScalarValue(Ptr, {Part, 0});
+    Value *AddrPart = State.get(Addr, {Part, 0});
+    setDebugLocFromInst(Builder, AddrPart);
  
      // Notice current instruction could be any index. Need to adjust the address
      // to the member of index 0.
@@ -2221,12 +2217,17 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
      //       A[i]   = b;     // Member of index 0
      //       A[i+2] = c;     // Member of index 2 (Current instruction)
      // Current pointer is pointed to A[i+2], adjust it to A[i].
-    NewPtr = Builder.CreateGEP(ScalarTy, NewPtr, Builder.getInt32(-Index));
-    if (InBounds)
-      cast<GetElementPtrInst>(NewPtr)->setIsInBounds(true);
+
+    bool InBounds = false;
+    if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts()))
+      InBounds = gep->isInBounds();
+    AddrPart = Builder.CreateGEP(ScalarTy, AddrPart, Builder.getInt32(-Index));
+    cast<GetElementPtrInst>(AddrPart)->setIsInBounds(InBounds);
  
      // Cast to the vector pointer type.
-    NewPtrs.push_back(Builder.CreateBitCast(NewPtr, PtrTy));
+    unsigned AddressSpace = AddrPart->getType()->getPointerAddressSpace();
+    Type *PtrTy = VecTy->getPointerTo(AddressSpace);
+    AddrParts.push_back(Builder.CreateBitCast(AddrPart, PtrTy));
    }
  
    setDebugLocFromInst(Builder, Instr);
@@ -2244,26 +2245,27 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
      SmallVector<Value *, 2> NewLoads;
      for (unsigned Part = 0; Part < UF; Part++) {
        Instruction *NewLoad;
-      if (IsMaskForCondRequired || MaskForGaps) {
+      if (BlockInMask || MaskForGaps) {
          assert(useMaskedInterleavedAccesses(*TTI) &&
                 "masked interleaved groups are not allowed.");
          Value *GroupMask = MaskForGaps;
-        if (IsMaskForCondRequired) {
-          auto *Undefs = UndefValue::get(Mask[Part]->getType());
+        if (BlockInMask) {
+          Value *BlockInMaskPart = State.get(BlockInMask, Part);
+          auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
            auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
            Value *ShuffledMask = Builder.CreateShuffleVector(
-              Mask[Part], Undefs, RepMask, "interleaved.mask");
+              BlockInMaskPart, Undefs, RepMask, "interleaved.mask");
            GroupMask = MaskForGaps
                            ? Builder.CreateBinOp(Instruction::And, ShuffledMask,
                                                  MaskForGaps)
                            : ShuffledMask;
          }
          NewLoad =
-            Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(),
+            Builder.CreateMaskedLoad(AddrParts[Part], Group->getAlignment(),
                                       GroupMask, UndefVec, "wide.masked.vec");
        }
        else
-        NewLoad = Builder.CreateAlignedLoad(VecTy, NewPtrs[Part],
+        NewLoad = Builder.CreateAlignedLoad(VecTy, AddrParts[Part],
                                              Group->getAlignment(), "wide.vec");
        Group->addMetadata(NewLoad);
        NewLoads.push_back(NewLoad);
@@ -2332,24 +2334,27 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
                                                "interleaved.vec");
  
      Instruction *NewStoreInstr;
-    if (IsMaskForCondRequired) {
-      auto *Undefs = UndefValue::get(Mask[Part]->getType());
+    if (BlockInMask) {
+      Value *BlockInMaskPart = State.get(BlockInMask, Part);
+      auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
        auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
        Value *ShuffledMask = Builder.CreateShuffleVector(
-          Mask[Part], Undefs, RepMask, "interleaved.mask");
+          BlockInMaskPart, Undefs, RepMask, "interleaved.mask");
        NewStoreInstr = Builder.CreateMaskedStore(
-          IVec, NewPtrs[Part], Group->getAlignment(), ShuffledMask);
+          IVec, AddrParts[Part], Group->getAlignment(), ShuffledMask);
      }
      else
-      NewStoreInstr = Builder.CreateAlignedStore(IVec, NewPtrs[Part], 
-        Group->getAlignment());
+      NewStoreInstr = Builder.CreateAlignedStore(IVec, AddrParts[Part],
+                                                 Group->getAlignment());
  
      Group->addMetadata(NewStoreInstr);
    }
  }
  
  void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
-                                                     VectorParts *BlockInMask) {
+                                                     VPTransformState &State,
+                                                     VPValue *Addr,
+                                                     VPValue *BlockInMask) {
    // Attempt to issue a wide load.
    LoadInst *LI = dyn_cast<LoadInst>(Instr);
    StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -2361,17 +2366,15 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
    assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
           "CM decision should be taken at this point");
    if (Decision == LoopVectorizationCostModel::CM_Interleave)
-    return vectorizeInterleaveGroup(Instr);
+    return vectorizeInterleaveGroup(Instr, State, Addr, BlockInMask);
  
    Type *ScalarDataTy = getMemInstValueType(Instr);
    Type *DataTy = VectorType::get(ScalarDataTy, VF);
-  Value *Ptr = getLoadStorePointerOperand(Instr);
    // An alignment of 0 means target abi alignment. We need to use the scalar's
    // target abi alignment in such a case.
    const DataLayout &DL = Instr->getModule()->getDataLayout();
    const Align Alignment =
        DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy);
-  unsigned AddressSpace = getLoadStoreAddressSpace(Instr);
  
    // Determine if the pointer operand of the access is either consecutive or
    // reverse consecutive.
@@ -2386,24 +2389,20 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
    assert((ConsecutiveStride || CreateGatherScatter) &&
           "The instruction should be scalarized");
  
-  // Handle consecutive loads/stores.
-  if (ConsecutiveStride)
-    Ptr = getOrCreateScalarValue(Ptr, {0, 0});
-
-  VectorParts Mask;
+  VectorParts BlockInMaskParts(UF);
    bool isMaskRequired = BlockInMask;
    if (isMaskRequired)
-    Mask = *BlockInMask;
-
-  bool InBounds = false;
-  if (auto *gep = dyn_cast<GetElementPtrInst>(
-          getLoadStorePointerOperand(Instr)->stripPointerCasts()))
-    InBounds = gep->isInBounds();
+    for (unsigned Part = 0; Part < UF; ++Part)
+      BlockInMaskParts[Part] = State.get(BlockInMask, Part);
  
    const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
      // Calculate the pointer for the specific unroll-part.
      GetElementPtrInst *PartPtr = nullptr;
  
+    bool InBounds = false;
+    if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
+      InBounds = gep->isInBounds();
+
      if (Reverse) {
        // If the address is consecutive but reversed, then the
        // wide store needs to start at the last vector element.
@@ -2414,13 +2413,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
            Builder.CreateGEP(ScalarDataTy, PartPtr, Builder.getInt32(1 - VF)));
        PartPtr->setIsInBounds(InBounds);
        if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
-        Mask[Part] = reverseVector(Mask[Part]);
+        BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
      } else {
        PartPtr = cast<GetElementPtrInst>(
            Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(Part * VF)));
        PartPtr->setIsInBounds(InBounds);
      }
  
+    unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
      return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
    };
  
@@ -2432,8 +2432,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
        Instruction *NewSI = nullptr;
        Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part);
        if (CreateGatherScatter) {
-        Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
-        Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+        Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+        Value *VectorGep = State.get(Addr, Part);
          NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep,
                                              Alignment.value(), MaskPart);
        } else {
@@ -2444,10 +2444,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
            // We don't want to update the value in the map as it might be used in
            // another expression. So don't call resetVectorValue(StoredVal).
          }
-        auto *VecPtr = CreateVecPtr(Part, Ptr);
+        auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0}));
          if (isMaskRequired)
-          NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr,
-                                            Alignment.value(), Mask[Part]);
+          NewSI = Builder.CreateMaskedStore(
+              StoredVal, VecPtr, Alignment.value(), BlockInMaskParts[Part]);
          else
            NewSI =
                Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment.value());
@@ -2463,17 +2463,17 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
    for (unsigned Part = 0; Part < UF; ++Part) {
      Value *NewLI;
      if (CreateGatherScatter) {
-      Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
-      Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+      Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+      Value *VectorGep = State.get(Addr, Part);
        NewLI = Builder.CreateMaskedGather(VectorGep, Alignment.value(), MaskPart,
                                           nullptr, "wide.masked.gather");
        addMetadata(NewLI, LI);
      } else {
-      auto *VecPtr = CreateVecPtr(Part, Ptr);
+      auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0}));
        if (isMaskRequired)
-        NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment.value(), Mask[Part],
-                                         UndefValue::get(DataTy),
-                                         "wide.masked.load");
+        NewLI = Builder.CreateMaskedLoad(
+            VecPtr, Alignment.value(), BlockInMaskParts[Part],
+            UndefValue::get(DataTy), "wide.masked.load");
        else
          NewLI = Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment.value(),
                                            "wide.load");
@@ -6799,7 +6799,8 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
    if (Legal->isMaskRequired(I))
      Mask = createBlockInMask(I->getParent(), Plan);
  
-  return new VPWidenMemoryInstructionRecipe(*I, Mask);
+  VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I));
+  return new VPWidenMemoryInstructionRecipe(*I, Addr, Mask);
  }
  
  VPWidenIntOrFpInductionRecipe *
@@ -7248,7 +7249,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
    for (auto IG : InterleaveGroups) {
      auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
          RecipeBuilder.getRecipe(IG->getInsertPos()));
-    (new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe);
+    (new VPInterleaveRecipe(IG, Recipe->getAddr(), Recipe->getMask()))
+        ->insertBefore(Recipe);
  
      for (unsigned i = 0; i < IG->getFactor(); ++i)
        if (Instruction *Member = IG->getMember(i)) {
@@ -7322,13 +7324,21 @@ getOrCreateVectorValues(Value *V, unsigned Part) {
        return ILV.getOrCreateVectorValue(V, Part);
  }
  
+Value *LoopVectorizationPlanner::VPCallbackILV::getOrCreateScalarValue(
+    Value *V, const VPIteration &Instance) {
+  return ILV.getOrCreateScalarValue(V, Instance);
+}
+
  void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const {
    O << " +\n"
      << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
    IG->getInsertPos()->printAsOperand(O, false);
-  if (User) {
+  O << ", ";
+  getAddr()->printAsOperand(O);
+  VPValue *Mask = getMask();
+  if (Mask) {
      O << ", ";
-    User->getOperand(0)->printAsOperand(O);
+    Mask->printAsOperand(O);
    }
    O << "\\l\"";
    for (unsigned i = 0; i < IG->getFactor(); ++i)
@@ -7397,15 +7407,8 @@ void VPBlendRecipe::execute(VPTransformState &State) {
  
  void VPInterleaveRecipe::execute(VPTransformState &State) {
    assert(!State.Instance && "Interleave group being replicated.");
-  if (!User)
-    return State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
-
-  // Last (and currently only) operand is a mask.
-  InnerLoopVectorizer::VectorParts MaskValues(State.UF);
-  VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
-  for (unsigned Part = 0; Part < State.UF; ++Part)
-    MaskValues[Part] = State.get(Mask, Part);
-  State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), &MaskValues);
+  State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), State, getAddr(),
+                                      getMask());
  }
  
  void VPReplicateRecipe::execute(VPTransformState &State) {
@@ -7492,14 +7495,7 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
  }
  
  void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
-  VPValue *Mask = getMask();
-  if (!Mask)
-    return State.ILV->vectorizeMemoryInstruction(&Instr);
-
-  InnerLoopVectorizer::VectorParts MaskValues(State.UF);
-  for (unsigned Part = 0; Part < State.UF; ++Part)
-    MaskValues[Part] = State.get(Mask, Part);
-  State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
+  State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), getMask());
  }
  
  // Determine how to lower the scalar epilogue, which depends on 1) optimising
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp

index bd58144..f1c7087 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -739,6 +739,8 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent) const {
  void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
                                             const Twine &Indent) const {
    O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr);
+  O << ", ";
+  getAddr()->printAsOperand(O);
    VPValue *Mask = getMask();
    if (Mask) {
      O << ", ";
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h

index 87d3e9e..c65abc3 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -227,6 +227,8 @@ public:
  struct VPCallback {
    virtual ~VPCallback() {}
    virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
+  virtual Value *getOrCreateScalarValue(Value *V,
+                                        const VPIteration &Instance) = 0;
  };
  
  /// VPTransformState holds information passed down when "executing" a VPlan,
@@ -269,6 +271,13 @@ struct VPTransformState {
      return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
    }
  
+  /// Get the generated Value for a given VPValue and given Part and Lane. Note
+  /// that as per-lane Defs are still created by ILV and managed in its ValueMap
+  /// this method currently just delegates the call to ILV.
+  Value *get(VPValue *Def, const VPIteration &Instance) {
+    return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance);
+  }
+
    /// Set the generated Value for a given VPValue and a given Part.
    void set(VPValue *Def, Value *V, unsigned Part) {
      if (!Data.PerPartOutput.count(Def)) {
@@ -862,13 +871,14 @@ public:
  class VPInterleaveRecipe : public VPRecipeBase {
  private:
    const InterleaveGroup<Instruction> *IG;
-  std::unique_ptr<VPUser> User;
+  VPUser User;
  
  public:
-  VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Mask)
-      : VPRecipeBase(VPInterleaveSC), IG(IG) {
-    if (Mask) // Create a VPInstruction to register as a user of the mask.
-      User.reset(new VPUser({Mask}));
+  VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
+                     VPValue *Mask)
+      : VPRecipeBase(VPInterleaveSC), IG(IG), User({Addr}) {
+    if (Mask)
+      User.addOperand(Mask);
    }
    ~VPInterleaveRecipe() override = default;
  
@@ -877,6 +887,18 @@ public:
      return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC;
    }
  
+  /// Return the address accessed by this recipe.
+  VPValue *getAddr() const {
+    return User.getOperand(0); // Address is the 1st, mandatory operand.
+  }
+
+  /// Return the mask used by this recipe. Note that a full mask is represented
+  /// by a nullptr.
+  VPValue *getMask() const {
+    // Mask is optional and therefore the last, currently 2nd operand.
+    return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
+  }
+
    /// Generate the wide load or store, and shuffles.
    void execute(VPTransformState &State) override;
  
@@ -999,13 +1021,14 @@ public:
  class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
  private:
    Instruction &Instr;
-  std::unique_ptr<VPUser> User;
+  VPUser User;
  
  public:
-  VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Mask)
-      : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) {
-    if (Mask) // Create a VPInstruction to register as a user of the mask.
-      User.reset(new VPUser({Mask}));
+  VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Addr,
+                                 VPValue *Mask)
+      : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr), User({Addr}) {
+    if (Mask)
+      User.addOperand(Mask);
    }
  
    /// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -1013,11 +1036,16 @@ public:
      return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
    }
  
+  /// Return the address accessed by this recipe.
+  VPValue *getAddr() const {
+    return User.getOperand(0); // Address is the 1st, mandatory operand.
+  }
+
    /// Return the mask used by this recipe. Note that a full mask is represented
    /// by a nullptr.
    VPValue *getMask() const {
-    // Mask is the last operand.
-    return User ? User->getOperand(User->getNumOperands() - 1) : nullptr;
+    // Mask is optional and therefore the last, currently 2nd operand.
+    return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
    }
  
    /// Generate the wide load/store.
@@ -1414,6 +1442,13 @@ public:
      return Value2VPValue[V];
    }
  
+  VPValue *getOrAddVPValue(Value *V) {
+    assert(V && "Trying to get or add the VPValue of a null Value");
+    if (!Value2VPValue.count(V))
+      addVPValue(V);
+    return getVPValue(V);
+  }
+
    /// Return the VPLoopInfo analysis for this VPlan.
    VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
    const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

index fb81a39..3f6a2ef 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -56,7 +56,9 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
        VPRecipeBase *NewRecipe = nullptr;
        // Create VPWidenMemoryInstructionRecipe for loads and stores.
        if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
-        NewRecipe = new VPWidenMemoryInstructionRecipe(*Inst, nullptr /*Mask*/);
+        NewRecipe = new VPWidenMemoryInstructionRecipe(
+            *Inst, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
+            nullptr /*Mask*/);
        else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
          InductionDescriptor II = Inductions->lookup(Phi);
          if (II.getKind() == InductionDescriptor::IK_IntInduction ||
author	Gil Rapaport <gil.rapaport@intel.com>
	Thu, 28 Nov 2019 11:54:38 +0000 (13:54 +0200)
committer	Gil Rapaport <gil.rapaport@intel.com>
	Fri, 10 Jan 2020 07:24:59 +0000 (09:24 +0200)
llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/VPlan.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/VPlan.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp		patch \| blob \| history