From 9d010fffe139750b71427e5d7921c8c7fcd68e1e Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 3 Jul 2014 00:23:43 +0000 Subject: [PATCH] [codegen,aarch64] Add a target hook to the code generator to control vector type legalization strategies in a more fine grained manner, and change the legalization of several v1iN types and v1f32 to be widening rather than scalarization on AArch64. This fixes an assertion failure caused by scalarizing nodes like "v1i32 trunc v1i64". As v1i64 is legal it will fail to scalarize v1i32. This also provides a foundation for other targets to have more granular control over how vector types are legalized. Patch by Hao Liu, reviewed by Tim Northover. I'm committing it to allow some work to start taking place on top of this patch as it adds some really important hooks to the backend that I'd like to immediately start using. =] http://reviews.llvm.org/D4322 llvm-svn: 212242 --- llvm/include/llvm/Target/TargetLowering.h | 13 ++-- llvm/lib/CodeGen/TargetLoweringBase.cpp | 88 ++++++++++++----------- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 12 ++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 + llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 8 ++- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 3 +- llvm/lib/Target/R600/SIISelLowering.cpp | 8 ++- llvm/lib/Target/R600/SIISelLowering.h | 4 +- llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 16 ++--- llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll | 4 +- 10 files changed, 99 insertions(+), 60 deletions(-) diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 61c3bd9..2695bc5 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -185,10 +185,15 @@ public: /// Return true if the target has BitExtract instructions. bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } - /// Return true if a vector of the given type should be split - /// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type - /// legalization. - virtual bool shouldSplitVectorType(EVT /*VT*/) const { return false; } + /// Return the preferred vector type legalization action. + virtual TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const { + // The default action for one element vectors is to scalarize + if (VT.getVectorNumElements() == 1) + return TypeScalarizeVector; + // The default action for other vectors is to promote + return TypePromoteInteger; + } // There are two general methods for expanding a BUILD_VECTOR node: // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index f530485..cc4445f 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1084,24 +1084,25 @@ void TargetLoweringBase::computeRegisterProperties() { // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; - if (isTypeLegal(VT)) continue; + MVT VT = (MVT::SimpleValueType) i; + if (isTypeLegal(VT)) + continue; - // Determine if there is a legal wider type. If so, we should promote to - // that wider vector type. MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); - if (NElts != 1 && !shouldSplitVectorType(VT)) { - bool IsLegalWiderType = false; - // First try to promote the elements of integer vectors. If no legal - // promotion was found, fallback to the widen-vector method. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; + bool IsLegalWiderType = false; + LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); + switch (PreferredAction) { + case TypePromoteInteger: { + // Try to promote the elements of integer vectors. If no legal + // promotion was found, fall through to the widen-vector method. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() - && SVT.getVectorNumElements() == NElts && - isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT) + && SVT.getScalarType().isInteger()) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1110,15 +1111,15 @@ void TargetLoweringBase::computeRegisterProperties() { break; } } - - if (IsLegalWiderType) continue; - + if (IsLegalWiderType) + break; + } + case TypeWidenVector: { // Try to widen the vector. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; - if (SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && - isTypeLegal(SVT)) { + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + if (SVT.getVectorElementType() == EltVT + && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1127,27 +1128,34 @@ void TargetLoweringBase::computeRegisterProperties() { break; } } - if (IsLegalWiderType) continue; + if (IsLegalWiderType) + break; } - - MVT IntermediateVT; - MVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - MVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - unsigned NumElts = VT.getVectorNumElements(); - ValueTypeActions.setTypeAction(VT, - NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); + case TypeSplitVector: + case TypeScalarizeVector: { + MVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT, + NumIntermediates, RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + if (PreferredAction == TypeScalarizeVector) + ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); + else + ValueTypeActions.setTypeAction(VT, TypeSplitVector); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + } + break; + } + default: + llvm_unreachable("Unknown vector legalization action!"); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e30ebdd..1c1be65 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7886,6 +7886,18 @@ bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { return Inst->getType()->getPrimitiveSizeInBits() <= 128; } +TargetLoweringBase::LegalizeTypeAction +AArch64TargetLowering::getPreferredVectorAction(EVT VT) const { + MVT SVT = VT.getSimpleVT(); + // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8, + // v4i16, v2i32 instead of to promote. + if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32 + || SVT == MVT::v1f32) + return TypeWidenVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); +} + Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 139217d..cb0b9ef 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -324,6 +324,9 @@ public: bool shouldExpandAtomicInIR(Instruction *Inst) const override; + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; + private: /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 82c6a77..cb452ff 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -473,8 +473,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const { - return VT.getScalarType() == MVT::i1; +TargetLoweringBase::LegalizeTypeAction +NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) + return TypeSplitVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); } SDValue diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index d311f68..7b4026d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -242,7 +242,8 @@ public: // PTX always uses 32-bit shift amounts MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } - bool shouldSplitVectorType(EVT VT) const override; + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; private: const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index f491116..05903c5 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -270,8 +270,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return VT.bitsGT(MVT::i32); } -bool SITargetLowering::shouldSplitVectorType(EVT VT) const { - return VT.getScalarType().bitsLE(MVT::i16); +TargetLoweringBase::LegalizeTypeAction +SITargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16)) + return TypeSplitVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); } bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h index 2f97a9a..e25323a 100644 --- a/llvm/lib/Target/R600/SIISelLowering.h +++ b/llvm/lib/Target/R600/SIISelLowering.h @@ -50,7 +50,9 @@ public: SITargetLowering(TargetMachine &tm); bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, bool *IsFast) const override; - bool shouldSplitVectorType(EVT VT) const override; + + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index cfc2ebf..1cfba82 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -842,7 +842,7 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { ; CHECK-LABEL: testDUP.v1i8: -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +; CHECK: dup v0.8b, v0.b[0] %b = extractelement <1 x i8> %a, i32 0 %c = insertelement <8 x i8> undef, i8 %b, i32 0 %d = insertelement <8 x i8> %c, i8 %b, i32 1 @@ -857,7 +857,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { ; CHECK-LABEL: testDUP.v1i16: -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +; CHECK: dup v0.8h, v0.h[0] %b = extractelement <1 x i16> %a, i32 0 %c = insertelement <8 x i16> undef, i16 %b, i32 0 %d = insertelement <8 x i16> %c, i16 %b, i32 1 @@ -872,7 +872,7 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { ; CHECK-LABEL: testDUP.v1i32: -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} +; CHECK: dup v0.4s, v0.s[0] %b = extractelement <1 x i32> %a, i32 0 %c = insertelement <4 x i32> undef, i32 %b, i32 0 %d = insertelement <4 x i32> %c, i32 %b, i32 1 @@ -1411,35 +1411,35 @@ define <16 x i8> @concat_vector_v16i8_const() { define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { ; CHECK-LABEL: concat_vector_v4i16: -; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} +; CHECK: dup v0.4h, v0.h[0] %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer ret <4 x i16> %r } define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { ; CHECK-LABEL: concat_vector_v4i32: -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} +; CHECK: dup v0.4s, v0.s[0] %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %r } define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { ; CHECK-LABEL: concat_vector_v8i8: -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +; CHECK: dup v0.8b, v0.b[0] %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer ret <8 x i8> %r } define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { ; CHECK-LABEL: concat_vector_v8i16: -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +; CHECK: dup v0.8h, v0.h[0] %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %r } define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { ; CHECK-LABEL: concat_vector_v16i8: -; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} +; CHECK: dup v0.16b, v0.b[0] %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer ret <16 x i8> %r } diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll index 255b90d..95c582a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll @@ -136,8 +136,8 @@ define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { ; CHECK-LABEL: test_select_cc_v1f32: -; CHECK: fcmp s0, s1 -; CHECK-NEXT: fcsel s0, s2, s3, eq +; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s +; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b %cmp31 = fcmp oeq float %a, %b %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d ret <1 x float> %e -- 2.7.4