From c8eb535aed0368c20b25fe05bca563ab38dd91e9 Mon Sep 17 00:00:00 2001 From: eopXD Date: Thu, 23 Mar 2023 01:51:39 -0700 Subject: [PATCH] [1/11][IR] Permit load/store/alloca for struct of the same scalable vector type This patch-set aims to simplify the existing RVV segment load/store intrinsics to use a type that represents a tuple of vectors instead. To achieve this, first we need to relax the current limitation for an aggregate type to be a target of load/store/alloca when the aggregate type contains homogeneous scalable vector types. Then to adjust the prolog of an LLVM function during lowering to clang. Finally we re-define the RVV segment load/store intrinsics to use the tuple types. The pull request under the RVV intrinsic specification is riscv-non-isa/rvv-intrinsic-doc#198 --- This is the 1st patch of the patch-set. This patch is originated from D98169. This patch allows aggregate type (StructType) that contains homogeneous scalable vector types to be a target of load/store/alloca. The RFC of this patch was posted in LLVM Discourse. https://discourse.llvm.org/t/rfc-ir-permit-load-store-alloca-for-struct-of-the-same-scalable-vector-type/69527 The main changes in this patch are: Extend `StructLayout::StructSize` from `uint64_t` to `TypeSize` to accommodate an expression of scalable size. Allow `StructType:isSized` to also return true for homogeneous scalable vector types. Let `Type::isScalableTy` return true when `Type` is `StructType` and contains scalable vectors Extra description is added in the LLVM Language Reference Manual on the relaxation of this patch. Authored-by: Hsiangkai Wang Co-Authored-by: eop Chen Reviewed By: craig.topper, nikic Differential Revision: https://reviews.llvm.org/D146872 --- llvm/docs/LangRef.rst | 15 +++++- llvm/include/llvm/CodeGen/Analysis.h | 22 +++++++- llvm/include/llvm/IR/DataLayout.h | 28 +++++----- llvm/include/llvm/IR/DerivedTypes.h | 15 +++++- llvm/include/llvm/IR/Type.h | 4 +- llvm/lib/Analysis/ScalarEvolution.cpp | 6 ++- llvm/lib/AsmParser/LLParser.cpp | 5 ++ llvm/lib/CodeGen/Analysis.cpp | 63 +++++++++++++++++++--- .../CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 7 +-- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 12 ++--- llvm/lib/IR/DataLayout.cpp | 50 +++++++++++------ llvm/lib/IR/Type.cpp | 62 ++++++++++++++++++--- llvm/lib/IR/Verifier.cpp | 14 ++++- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 10 ++++ .../InstCombine/InstructionCombining.cpp | 5 ++ llvm/lib/Transforms/Scalar/SROA.cpp | 4 ++ .../Transforms/Utils/ScalarEvolutionExpander.cpp | 3 ++ llvm/test/Assembler/scalable-vector-struct.ll | 30 +++++++++++ llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll | 20 +++---- .../RISCV/rvv/alloca-load-store-scalable-struct.ll | 52 ++++++++++++++++++ .../InstCombine/scalable-vector-struct.ll | 30 +++++++++++ .../test/Transforms/SROA/scalable-vector-struct.ll | 22 ++++++++ llvm/test/Verifier/scalable-vector-struct-gep.ll | 9 ++++ 23 files changed, 415 insertions(+), 73 deletions(-) create mode 100644 llvm/test/Assembler/scalable-vector-struct.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll create mode 100644 llvm/test/Transforms/InstCombine/scalable-vector-struct.ll create mode 100644 llvm/test/Transforms/SROA/scalable-vector-struct.ll create mode 100644 llvm/test/Verifier/scalable-vector-struct-gep.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 96454e8..83f0e35 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -744,8 +744,14 @@ Variables and aliases can have a :ref:`Scalable vectors ` cannot be global variables or members of arrays because their size is unknown at compile time. They are allowed in -structs to facilitate intrinsics returning multiple values. Structs containing -scalable vectors cannot be used in loads, stores, allocas, or GEPs. +structs to facilitate intrinsics returning multiple values. Generally, structs +containing scalable vectors are not considered "sized" and cannot be used in +loads, stores, allocas, or GEPs. The only exception to this rule is for structs +that contain scalable vectors of the same type (e.g. ``{, +}`` contains the same type while ``{, +}`` doesn't). These kinds of structs (we may call them +homogeneous scalable vector structs) are considered sized and can be used in +loads, stores, allocas, but not GEPs. Syntax:: @@ -10287,6 +10293,11 @@ allocation on any convenient boundary compatible with the type. '``type``' may be any sized type. +Structs containing scalable vectors cannot be used in allocas unless all +fields are the same scalable vector type (e.g. ``{, +}`` contains the same type while ``{, +}`` doesn't). + Semantics: """""""""" diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h index 1a09820..1c67fe2 100644 --- a/llvm/include/llvm/CodeGen/Analysis.h +++ b/llvm/include/llvm/CodeGen/Analysis.h @@ -64,15 +64,33 @@ inline unsigned ComputeLinearIndex(Type *Ty, /// void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets = nullptr, + SmallVectorImpl *Offsets, + TypeSize StartingOffset); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets = nullptr, uint64_t StartingOffset = 0); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *FixedOffsets, + uint64_t StartingOffset); /// Variant of ComputeValueVTs that also produces the memory VTs. void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, SmallVectorImpl *MemVTs, - SmallVectorImpl *Offsets = nullptr, + SmallVectorImpl *Offsets, + TypeSize StartingOffset); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *Offsets = nullptr, uint64_t StartingOffset = 0); +void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *FixedOffsets, + uint64_t StartingOffset); /// computeValueLLTs - Given an LLVM IR type, compute a sequence of /// LLTs that represent all the individual underlying diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index fb25e5e..0b2346e 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -620,16 +620,16 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) { /// Used to lazily calculate structure layout information for a target machine, /// based on the DataLayout structure. -class StructLayout final : public TrailingObjects { - uint64_t StructSize; +class StructLayout final : public TrailingObjects { + TypeSize StructSize; Align StructAlignment; unsigned IsPadded : 1; unsigned NumElements : 31; public: - uint64_t getSizeInBytes() const { return StructSize; } + TypeSize getSizeInBytes() const { return StructSize; } - uint64_t getSizeInBits() const { return 8 * StructSize; } + TypeSize getSizeInBits() const { return 8 * StructSize; } Align getAlignment() const { return StructAlignment; } @@ -639,23 +639,22 @@ public: /// Given a valid byte offset into the structure, returns the structure /// index that contains it. - unsigned getElementContainingOffset(uint64_t Offset) const; + unsigned getElementContainingOffset(uint64_t FixedOffset) const; - MutableArrayRef getMemberOffsets() { - return llvm::MutableArrayRef(getTrailingObjects(), - NumElements); + MutableArrayRef getMemberOffsets() { + return llvm::MutableArrayRef(getTrailingObjects(), NumElements); } - ArrayRef getMemberOffsets() const { - return llvm::ArrayRef(getTrailingObjects(), NumElements); + ArrayRef getMemberOffsets() const { + return llvm::ArrayRef(getTrailingObjects(), NumElements); } - uint64_t getElementOffset(unsigned Idx) const { + TypeSize getElementOffset(unsigned Idx) const { assert(Idx < NumElements && "Invalid element idx!"); return getMemberOffsets()[Idx]; } - uint64_t getElementOffsetInBits(unsigned Idx) const { + TypeSize getElementOffsetInBits(unsigned Idx) const { return getElementOffset(Idx) * 8; } @@ -664,7 +663,7 @@ private: StructLayout(StructType *ST, const DataLayout &DL); - size_t numTrailingObjects(OverloadToken) const { + size_t numTrailingObjects(OverloadToken) const { return NumElements; } }; @@ -685,8 +684,7 @@ inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const { } case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. - return TypeSize::Fixed( - getStructLayout(cast(Ty))->getSizeInBits()); + return getStructLayout(cast(Ty))->getSizeInBits(); case Type::IntegerTyID: return TypeSize::Fixed(Ty->getIntegerBitWidth()); case Type::HalfTyID: diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index 85a41c8..4f8c9e6 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -218,7 +218,9 @@ class StructType : public Type { SCDB_HasBody = 1, SCDB_Packed = 2, SCDB_IsLiteral = 4, - SCDB_IsSized = 8 + SCDB_IsSized = 8, + SCDB_ContainsScalableVector = 16, + SCDB_NotContainsScalableVector = 32 }; /// For a named struct that actually has a name, this is a pointer to the @@ -284,7 +286,16 @@ public: bool isSized(SmallPtrSetImpl *Visited = nullptr) const; /// Returns true if this struct contains a scalable vector. - bool containsScalableVectorType() const; + bool + containsScalableVectorType(SmallPtrSetImpl *Visited = nullptr) const; + + /// Returns true if this struct contains homogeneous scalable vector types. + /// Note that the definition of homogeneous scalable vector type is not + /// recursive here. That means the following structure will return false + /// when calling this function. + /// {{, }, + /// {, }} + bool containsHomogeneousScalableVectorTypes() const; /// Return true if this is a named struct that has a non-empty name. bool hasName() const { return SymbolTableEntry != nullptr; } diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index c516f37..57d1773 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -211,9 +211,7 @@ public: /// Return true if this is a scalable vector type or a target extension type /// with a scalable layout. - bool isScalableTy() const { - return getTypeID() == ScalableVectorTyID || isScalableTargetExtTy(); - } + bool isScalableTy() const; /// Return true if this is a FP type or a vector of FP. bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 504c94f..aec1e34 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4345,8 +4345,10 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, // We can bypass creating a target-independent constant expression and then // folding it back into a ConstantInt. This is just a compile-time // optimization. - return getConstant( - IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); + const StructLayout *SL = getDataLayout().getStructLayout(STy); + assert(!SL->getSizeInBits().isScalable() && + "Cannot get offset for structure containing scalable vector types"); + return getConstant(IntTy, SL->getElementOffset(FieldNo)); } const SCEV *ScalarEvolution::getUnknown(Value *V) { diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index b92da25..7f887bb 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7990,6 +7990,11 @@ int LLParser::parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { if (!Indices.empty() && !Ty->isSized(&Visited)) return error(Loc, "base element of getelementptr must be sized"); + auto *STy = dyn_cast(Ty); + if (STy && STy->containsScalableVectorType()) + return error(Loc, "getelementptr cannot target structure that contains " + "scalable vector type"); + if (!GetElementPtrInst::getIndexedType(Ty, Indices)) return error(Loc, "invalid getelementptr indices"); Inst = GetElementPtrInst::Create(Ty, Ptr, Indices); diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index b957944..2065bfb 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -79,8 +79,8 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, SmallVectorImpl *MemVTs, - SmallVectorImpl *Offsets, - uint64_t StartingOffset) { + SmallVectorImpl *Offsets, + TypeSize StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast(Ty)) { // If the Offsets aren't needed, don't query the struct layout. This allows @@ -92,7 +92,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, EE = STy->element_end(); EI != EE; ++EI) { // Don't compute the element offset if we didn't get a StructLayout above. - uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0; + TypeSize EltOffset = SL ? SL->getElementOffset(EI - EB) + : TypeSize::get(0, StartingOffset.isScalable()); ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets, StartingOffset + EltOffset); } @@ -101,7 +102,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, // Given an array type, recursively traverse the elements. if (ArrayType *ATy = dyn_cast(Ty)) { Type *EltTy = ATy->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue(); + TypeSize EltSize = DL.getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets, StartingOffset + i * EltSize); @@ -120,12 +121,62 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets, - uint64_t StartingOffset) { + SmallVectorImpl *Offsets, + TypeSize StartingOffset) { return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets, StartingOffset); } +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets, + uint64_t StartingOffset) { + TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); + return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, Offset); +} + +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *FixedOffsets, + uint64_t StartingOffset) { + TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); + SmallVector Offsets; + if (FixedOffsets) + ComputeValueVTs(TLI, DL, Ty, ValueVTs, &Offsets, Offset); + else + ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offset); + + if (FixedOffsets) + for (TypeSize Offset : Offsets) + FixedOffsets->push_back(Offset.getKnownMinValue()); +} + +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *Offsets, + uint64_t StartingOffset) { + TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); + return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, Offset); +} + +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *MemVTs, + SmallVectorImpl *FixedOffsets, + uint64_t StartingOffset) { + TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy()); + SmallVector Offsets; + if (FixedOffsets) + ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, &Offsets, Offset); + else + ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, nullptr, Offset); + + if (FixedOffsets) + for (TypeSize Offset : Offsets) + FixedOffsets->push_back(Offset.getKnownMinValue()); +} + void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty, SmallVectorImpl &ValueTys, SmallVectorImpl *Offsets, diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 1d15dbb..1d0a03c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -152,9 +152,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, false, AI); } - // Scalable vectors may need a special StackID to distinguish - // them from other (fixed size) stack objects. - if (isa(Ty)) + // Scalable vectors and structures that contain scalable vectors may + // need a special StackID to distinguish them from other (fixed size) + // stack objects. + if (Ty->isScalableTy()) MF->getFrameInfo().setStackID(FrameIndex, TFI->getStackIDForScalableVectors()); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6785ef0..19fc413 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2025,7 +2025,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector ValueVTs, MemVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs, - &Offsets); + &Offsets, 0); unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); @@ -4161,7 +4161,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); SmallVector ValueVTs, MemVTs; SmallVector Offsets; - ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); + ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -4260,7 +4260,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SmallVector Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &Offsets); + SrcV->getType(), ValueVTs, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -4296,7 +4296,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, - ValueVTs, &Offsets); + ValueVTs, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -4333,7 +4333,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector ValueVTs, MemVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &MemVTs, &Offsets); + SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -9903,7 +9903,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SmallVector RetTys; SmallVector Offsets; auto &DL = CLI.DAG.getDataLayout(); - ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets, 0); if (CLI.IsPostTypeLegalization) { // If we are lowering a libcall after legalization, split the return type. diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index d4094c0..99e722c 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -45,21 +45,30 @@ using namespace llvm; // Support for StructLayout //===----------------------------------------------------------------------===// -StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { +StructLayout::StructLayout(StructType *ST, const DataLayout &DL) + : StructSize(TypeSize::Fixed(0)) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); - StructSize = 0; IsPadded = false; NumElements = ST->getNumElements(); // Loop over each of the elements, placing them in memory. for (unsigned i = 0, e = NumElements; i != e; ++i) { Type *Ty = ST->getElementType(i); + if (i == 0 && Ty->isScalableTy()) + StructSize = TypeSize::Scalable(0); + const Align TyAlign = ST->isPacked() ? Align(1) : DL.getABITypeAlign(Ty); // Add padding if necessary to align the data element properly. - if (!isAligned(TyAlign, StructSize)) { + // Currently the only structure with scalable size will be the homogeneous + // scalable vector types. Homogeneous scalable vector types have members of + // the same data type so no alignment issue will happen. The condition here + // assumes so and needs to be adjusted if this assumption changes (e.g. we + // support structures with arbitrary scalable data type, or structure that + // contains both fixed size and scalable size data type members). + if (!StructSize.isScalable() && !isAligned(TyAlign, StructSize)) { IsPadded = true; - StructSize = alignTo(StructSize, TyAlign); + StructSize = TypeSize::Fixed(alignTo(StructSize, TyAlign)); } // Keep track of maximum alignment constraint. @@ -67,28 +76,39 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { getMemberOffsets()[i] = StructSize; // Consume space for this data item - StructSize += DL.getTypeAllocSize(Ty).getFixedValue(); + StructSize += DL.getTypeAllocSize(Ty); } // Add padding to the end of the struct so that it could be put in an array // and all array elements would be aligned correctly. - if (!isAligned(StructAlignment, StructSize)) { + if (!StructSize.isScalable() && !isAligned(StructAlignment, StructSize)) { IsPadded = true; - StructSize = alignTo(StructSize, StructAlignment); + StructSize = TypeSize::Fixed(alignTo(StructSize, StructAlignment)); } } /// getElementContainingOffset - Given a valid offset into the structure, /// return the structure index that contains it. -unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { - ArrayRef MemberOffsets = getMemberOffsets(); - auto SI = llvm::upper_bound(MemberOffsets, Offset); +unsigned StructLayout::getElementContainingOffset(uint64_t FixedOffset) const { + assert(!StructSize.isScalable() && + "Cannot get element at offset for structure containing scalable " + "vector types"); + TypeSize Offset = TypeSize::Fixed(FixedOffset); + ArrayRef MemberOffsets = getMemberOffsets(); + + const auto *SI = + std::upper_bound(MemberOffsets.begin(), MemberOffsets.end(), Offset, + [](TypeSize LHS, TypeSize RHS) -> bool { + return TypeSize::isKnownLT(LHS, RHS); + }); assert(SI != MemberOffsets.begin() && "Offset not in structure type!"); --SI; - assert(*SI <= Offset && "upper_bound didn't work"); - assert((SI == MemberOffsets.begin() || *(SI - 1) <= Offset) && - (SI + 1 == MemberOffsets.end() || *(SI + 1) > Offset) && - "Upper bound didn't work!"); + assert(TypeSize::isKnownLE(*SI, Offset) && "upper_bound didn't work"); + assert( + (SI == MemberOffsets.begin() || TypeSize::isKnownLE(*(SI - 1), Offset)) && + (SI + 1 == MemberOffsets.end() || + TypeSize::isKnownGT(*(SI + 1), Offset)) && + "Upper bound didn't work!"); // Multiple fields can have the same offset if any of them are zero sized. // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop @@ -706,7 +726,7 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const { // Otherwise, create the struct layout. Because it is variable length, we // malloc it, then use placement new. StructLayout *L = (StructLayout *)safe_malloc( - StructLayout::totalSizeToAlloc(Ty->getNumElements())); + StructLayout::totalSizeToAlloc(Ty->getNumElements())); // Set SL before calling StructLayout's ctor. The ctor could cause other // entries to be added to TheMap, invalidating our reference. diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 3b8fc12..7d21666b 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -63,6 +63,14 @@ bool Type::isOpaquePointerTy() const { return false; } +bool Type::isScalableTy() const { + if (const auto *STy = dyn_cast(this)) { + SmallPtrSet Visited; + return STy->containsScalableVectorType(&Visited); + } + return getTypeID() == ScalableVectorTyID || isScalableTargetExtTy(); +} + const fltSemantics &Type::getFltSemantics() const { switch (getTypeID()) { case HalfTyID: return APFloat::IEEEhalf(); @@ -450,18 +458,51 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef ETypes, return ST; } -bool StructType::containsScalableVectorType() const { +bool StructType::containsScalableVectorType( + SmallPtrSetImpl *Visited) const { + if ((getSubclassData() & SCDB_ContainsScalableVector) != 0) + return true; + + if ((getSubclassData() & SCDB_NotContainsScalableVector) != 0) + return false; + + if (Visited && !Visited->insert(const_cast(this)).second) + return false; + for (Type *Ty : elements()) { - if (isa(Ty)) + if (isa(Ty)) { + const_cast(this)->setSubclassData( + getSubclassData() | SCDB_ContainsScalableVector); return true; - if (auto *STy = dyn_cast(Ty)) - if (STy->containsScalableVectorType()) + } + if (auto *STy = dyn_cast(Ty)) { + if (STy->containsScalableVectorType(Visited)) { + const_cast(this)->setSubclassData( + getSubclassData() | SCDB_ContainsScalableVector); return true; + } + } } + // For structures that are opaque, return false but do not set the + // SCDB_NotContainsScalableVector flag since it may gain scalable vector type + // when it becomes non-opaque. + if (!isOpaque()) + const_cast(this)->setSubclassData( + getSubclassData() | SCDB_NotContainsScalableVector); return false; } +bool StructType::containsHomogeneousScalableVectorTypes() const { + Type *FirstTy = getNumElements() > 0 ? elements()[0] : nullptr; + if (!FirstTy || !isa(FirstTy)) + return false; + for (Type *Ty : elements()) + if (Ty != FirstTy) + return false; + return true; +} + void StructType::setBody(ArrayRef Elements, bool isPacked) { assert(isOpaque() && "Struct body already set!"); @@ -581,10 +622,19 @@ bool StructType::isSized(SmallPtrSetImpl *Visited) const { // Okay, our struct is sized if all of the elements are, but if one of the // elements is opaque, the struct isn't sized *yet*, but may become sized in // the future, so just bail out without caching. + // The ONLY special case inside a struct that is considered sized is when the + // elements are homogeneous of a scalable vector type. + if (containsHomogeneousScalableVectorTypes()) { + const_cast(this)->setSubclassData(getSubclassData() | + SCDB_IsSized); + return true; + } for (Type *Ty : elements()) { // If the struct contains a scalable vector type, don't consider it sized. - // This prevents it from being used in loads/stores/allocas/GEPs. - if (isa(Ty)) + // This prevents it from being used in loads/stores/allocas/GEPs. The ONLY + // special case right now is a structure of homogenous scalable vector + // types and is handled by the if-statement before this for-loop. + if (Ty->isScalableTy()) return false; if (!Ty->isSized(Visited)) return false; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ef3ba22..24333dd 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -828,9 +828,11 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { Check(!isa(GV.getValueType()), "Globals cannot contain scalable vectors", &GV); - if (auto *STy = dyn_cast(GV.getValueType())) - Check(!STy->containsScalableVectorType(), + if (auto *STy = dyn_cast(GV.getValueType())) { + SmallPtrSet Visited; + Check(!STy->containsScalableVectorType(&Visited), "Globals cannot contain scalable vectors", &GV); + } // Check if it's a target extension type that disallows being used as a // global. @@ -3836,6 +3838,14 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { "GEP base pointer is not a vector or a vector of pointers", &GEP); Check(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP); + if (auto *STy = dyn_cast(GEP.getSourceElementType())) { + SmallPtrSet Visited; + Check(!STy->containsScalableVectorType(&Visited), + "getelementptr cannot target structure that contains scalable vector" + "type", + &GEP); + } + SmallVector Idxs(GEP.indices()); Check( all_of(Idxs, [](Value *V) { return V->getType()->isIntOrIntVectorTy(); }), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index e30e8d0..f62a8ca 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -768,6 +768,11 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) { // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); auto *SL = DL.getStructLayout(ST); + + // Don't unpack for structure with scalable vector. + if (SL->getSizeInBits().isScalable()) + return nullptr; + if (SL->hasPadding()) return nullptr; @@ -1291,6 +1296,11 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) { // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); auto *SL = DL.getStructLayout(ST); + + // Don't unpack for structure with scalable vector. + if (SL->getSizeInBits().isScalable()) + return false; + if (SL->hasPadding()) return false; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 41cbcb1..b1f4e9f 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2787,6 +2787,11 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { return R; if (LoadInst *L = dyn_cast(Agg)) { + // Bail out if the aggregate contains scalable vector type + if (auto *STy = dyn_cast(Agg->getType()); + STy && STy->containsScalableVectorType()) + return nullptr; + // If the (non-volatile) load only has one use, we can rewrite this to a // load from a GEP. This reduces the size of the load. If a load is used // only by extractvalue instructions then this either must have been diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index cecd4ac..f6848d1 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3977,6 +3977,10 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, return nullptr; const StructLayout *SL = DL.getStructLayout(STy); + + if (SL->getSizeInBits().isScalable()) + return nullptr; + if (Offset >= SL->getSizeInBytes()) return nullptr; uint64_t EndOffset = Offset + Size; diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 14604df..003e7d3 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -522,6 +522,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // the struct fields. if (Ops.empty()) break; + assert( + !STy->containsScalableVectorType() && + "GEPs are not supported on structures containing scalable vectors"); if (const SCEVConstant *C = dyn_cast(Ops[0])) if (SE.getTypeSizeInBits(C->getType()) <= 64) { const StructLayout &SL = *DL.getStructLayout(STy); diff --git a/llvm/test/Assembler/scalable-vector-struct.ll b/llvm/test/Assembler/scalable-vector-struct.ll new file mode 100644 index 0000000..c40210f --- /dev/null +++ b/llvm/test/Assembler/scalable-vector-struct.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=verify -S < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define @load(%struct.test* %x) { +; CHECK-LABEL: define @load +; CHECK-SAME: (ptr [[X:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load [[STRUCT_TEST:%.*]], ptr [[X]], align 4 +; CHECK-NEXT: [[B:%.*]] = extractvalue [[STRUCT_TEST]] [[A]], 1 +; CHECK-NEXT: ret [[B]] +; + %a = load %struct.test, %struct.test* %x + %b = extractvalue %struct.test %a, 1 + ret %b +} + +define void @store(%struct.test* %x, %y, %z) { +; CHECK-LABEL: define void @store +; CHECK-SAME: (ptr [[X:%.*]], [[Y:%.*]], [[Z:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, [[Y]], 0 +; CHECK-NEXT: [[B:%.*]] = insertvalue [[STRUCT_TEST]] [[A]], [[Z]], 1 +; CHECK-NEXT: store [[STRUCT_TEST]] [[B]], ptr [[X]], align 4 +; CHECK-NEXT: ret void +; + %a = insertvalue %struct.test undef, %y, 0 + %b = insertvalue %struct.test %a, %z, 1 + store %struct.test %b, %struct.test* %x + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll index 69ccc30..47a72a8 100644 --- a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll +++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll @@ -42,19 +42,21 @@ define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcou ; CHECKO3-NEXT: ret ; CHECK-O0-LABEL: test_alloca_store_reload: ; CHECK-O0: // %bb.0: -; CHECK-O0-NEXT: sub sp, sp, #16 -; CHECK-O0-NEXT: add x8, sp, #14 -; CHECK-O0-NEXT: str p0, [x8] -; CHECK-O0-NEXT: ldr p0, [x8] -; CHECK-O0-NEXT: add sp, sp, #16 +; CHECK-O0-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-O0-NEXT: addvl sp, sp, #-1 +; CHECK-O0-NEXT: str p0, [sp, #7, mul vl] +; CHECK-O0-NEXT: ldr p0, [sp, #7, mul vl] +; CHECK-O0-NEXT: addvl sp, sp, #1 +; CHECK-O0-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-O0-NEXT: ret ; ; CHECK-O3-LABEL: test_alloca_store_reload: ; CHECK-O3: // %bb.0: -; CHECK-O3-NEXT: sub sp, sp, #16 -; CHECK-O3-NEXT: add x8, sp, #14 -; CHECK-O3-NEXT: str p0, [x8] -; CHECK-O3-NEXT: add sp, sp, #16 +; CHECK-O3-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-O3-NEXT: addvl sp, sp, #-1 +; CHECK-O3-NEXT: str p0, [sp, #7, mul vl] +; CHECK-O3-NEXT: addvl sp, sp, #1 +; CHECK-O3-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-O3-NEXT: ret %ptr = alloca target("aarch64.svcount"), align 1 store target("aarch64.svcount") %val, ptr %ptr diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll new file mode 100644 index 0000000..80de207 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s + +target triple = "riscv64-unknown-unknown-elf" + +%struct.test = type { , } + +define @test(%struct.test* %addr, i64 %vl) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrrs a2, vlenb, zero +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, a0, 8 +; CHECK-NEXT: vl1re64.v v8, (a2) +; CHECK-NEXT: vl1re64.v v9, (a0) +; CHECK-NEXT: addi a0, sp, 24 +; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v9, (a2) +; CHECK-NEXT: vl1re64.v v8, (a0) +; CHECK-NEXT: vl1re64.v v9, (a2) +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vfadd.vv v8, v9, v8 +; CHECK-NEXT: csrrs a0, vlenb, zero +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %ret = alloca %struct.test, align 8 + %val = load %struct.test, %struct.test* %addr + store %struct.test %val, %struct.test* %ret, align 8 + %0 = load %struct.test, %struct.test* %ret, align 8 + %1 = extractvalue %struct.test %0, 0 + %2 = extractvalue %struct.test %0, 1 + %3 = call @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( + poison, + %1, + %2, i64 %vl) + ret %3 +} + +declare @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( + , + , + , + i64) diff --git a/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll b/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll new file mode 100644 index 0000000..b560993 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=instcombine -S < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define @load(%struct.test* %x) { +; CHECK-LABEL: define @load +; CHECK-SAME: (ptr [[X:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load [[STRUCT_TEST:%.*]], ptr [[X]], align 4 +; CHECK-NEXT: [[B:%.*]] = extractvalue [[STRUCT_TEST]] [[A]], 1 +; CHECK-NEXT: ret [[B]] +; + %a = load %struct.test, %struct.test* %x + %b = extractvalue %struct.test %a, 1 + ret %b +} + +define void @store(%struct.test* %x, %y, %z) { +; CHECK-LABEL: define void @store +; CHECK-SAME: (ptr [[X:%.*]], [[Y:%.*]], [[Z:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, [[Y]], 0 +; CHECK-NEXT: [[B:%.*]] = insertvalue [[STRUCT_TEST]] [[A]], [[Z]], 1 +; CHECK-NEXT: store [[STRUCT_TEST]] [[B]], ptr [[X]], align 4 +; CHECK-NEXT: ret void +; + %a = insertvalue %struct.test undef, %y, 0 + %b = insertvalue %struct.test %a, %z, 1 + store %struct.test %b, %struct.test* %x + ret void +} diff --git a/llvm/test/Transforms/SROA/scalable-vector-struct.ll b/llvm/test/Transforms/SROA/scalable-vector-struct.ll new file mode 100644 index 0000000..92cd44d --- /dev/null +++ b/llvm/test/Transforms/SROA/scalable-vector-struct.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK + +; This test checks that SROA runs mem2reg on structure that contains +; homogeneous scalable vectors. + +%struct.test = type { , } + +define %struct.test @alloca( %x, %y) { +; CHECK-LABEL: @alloca( +; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, [[X:%.*]], 0 +; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [[STRUCT_TEST]] [[AGG0]], [[Y:%.*]], 1 +; CHECK-NEXT: ret [[STRUCT_TEST]] [[AGG1]] +; + %addr = alloca %struct.test, align 4 + %agg0 = insertvalue %struct.test undef, %x, 0 + %agg1 = insertvalue %struct.test %agg0, %y, 1 + store %struct.test %agg1, %struct.test* %addr, align 4 + %val = load %struct.test, %struct.test* %addr, align 4 + ret %struct.test %val +} diff --git a/llvm/test/Verifier/scalable-vector-struct-gep.ll b/llvm/test/Verifier/scalable-vector-struct-gep.ll new file mode 100644 index 0000000..c8c8c74 --- /dev/null +++ b/llvm/test/Verifier/scalable-vector-struct-gep.ll @@ -0,0 +1,9 @@ +; RUN: not opt -S -passes=verify < %s 2>&1 | FileCheck %s + +%struct.test = type { , } + +define void @gep(ptr %a) { +; CHECK: error: getelementptr cannot target structure that contains scalable vector type + %a.addr = getelementptr %struct.test, ptr %a, i32 0 + ret void +} -- 2.7.4