From 37f4ccb27545ca28a52a1a1c21cbccee03044d04 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Fri, 6 Nov 2020 15:53:59 +0000 Subject: [PATCH] [AArch64]Add memory op cost model for SVE This patch adds/fixes memory op cost model for SVE with fixed-width vector. Differential Revision: https://reviews.llvm.org/D90950 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 11 ++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 5 +- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 5 ++ llvm/lib/Target/AArch64/AArch64Subtarget.h | 1 + .../Target/AArch64/AArch64TargetTransformInfo.cpp | 6 +- .../Target/AArch64/AArch64TargetTransformInfo.h | 1 + .../CostModel/AArch64/mem-op-cost-model.ll | 88 ++++++++++++++++++++++ .../AArch64/scalable-mem-op-cost-model.ll | 51 +++++++++++++ 8 files changed, 157 insertions(+), 11 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll create mode 100644 llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 5d30b5f..15c67b4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -269,7 +269,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass); } - if (useSVEForFixedLengthVectors()) { + if (Subtarget->useSVEForFixedLengthVectors()) { for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) if (useSVEForFixedLengthVectorVT(VT)) addRegisterClass(VT, &AArch64::ZPRRegClass); @@ -1085,7 +1085,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // NOTE: Currently this has to happen after computeRegisterProperties rather // than the preferred option of combining it with the addRegisterClass call. - if (useSVEForFixedLengthVectors()) { + if (Subtarget->useSVEForFixedLengthVectors()) { for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) if (useSVEForFixedLengthVectorVT(VT)) addTypeForFixedLengthSVE(VT); @@ -4140,14 +4140,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, } } -bool AArch64TargetLowering::useSVEForFixedLengthVectors() const { - // Prefer NEON unless larger SVE registers are available. - return Subtarget->hasSVE() && Subtarget->getMinSVEVectorSizeInBits() >= 256; +bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const { + return !Subtarget->useSVEForFixedLengthVectors(); } bool AArch64TargetLowering::useSVEForFixedLengthVectorVT( EVT VT, bool OverrideNEON) const { - if (!useSVEForFixedLengthVectors()) + if (!Subtarget->useSVEForFixedLengthVectors()) return false; if (!VT.isFixedLengthVector()) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index bfc83a9..47248b9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -773,9 +773,7 @@ public: /// illegal as the original, thus leading to an infinite legalisation loop. /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal /// vector types this override can be removed. - bool mergeStoresAfterLegalization(EVT VT) const override { - return !useSVEForFixedLengthVectors(); - } + bool mergeStoresAfterLegalization(EVT VT) const override; private: /// Keep a pointer to the AArch64Subtarget around so that we can @@ -1008,7 +1006,6 @@ private: bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const override; - bool useSVEForFixedLengthVectors() const; // Normally SVE is only used for byte size vectors that do not fit within a // NEON vector. This changes when OverrideNEON is true, allowing SVE to be // used for 64bit and 128bit vectors as well. diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index fdf979b..b4d71ac 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -368,3 +368,8 @@ unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const { return (SVEVectorBitsMin / 128) * 128; return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128; } + +bool AArch64Subtarget::useSVEForFixedLengthVectors() const { + // Prefer NEON unless larger SVE registers are available. + return hasSVE() && getMinSVEVectorSizeInBits() >= 256; +} diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 67c682c..4eb4843 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -555,6 +555,7 @@ public: // implied by the architecture. unsigned getMaxSVEVectorSizeInBits() const; unsigned getMinSVEVectorSizeInBits() const; + bool useSVEForFixedLengthVectors() const; }; } // End llvm namespace diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 595f403..4f7ebff 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -751,6 +751,10 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { return Options; } +bool AArch64TTIImpl::useNeonVector(const Type *Ty) const { + return isa(Ty) && !ST->useSVEForFixedLengthVectors(); +} + int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -778,7 +782,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, return LT.first * 2 * AmortizationCost; } - if (Ty->isVectorTy() && + if (useNeonVector(Ty) && cast(Ty)->getElementType()->isIntegerTy(8)) { unsigned ProfitableNumElements; if (Opcode == Instruction::Store) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index a624f8b..baf11cd 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -147,6 +147,7 @@ public: TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; + bool useNeonVector(const Type *Ty) const; int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, diff --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll new file mode 100644 index 0000000..3a4e0f0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll @@ -0,0 +1,88 @@ +; Check memory cost model action for fixed vector SVE and Neon +; Vector bits size lower than 256 bits end up assuming Neon cost model +; CHECK-NEON has same performance as CHECK-SVE-128 + +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s --check-prefix=CHECK-SVE-128 +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s --check-prefix=CHECK-SVE-256 +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefix=CHECK-SVE-512 + +define <16 x i8> @load16(<16 x i8>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load16': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <16 x i8>, <16 x i8>* %ptr + ret <16 x i8> %out +} + +define void @store16(<16 x i8>* %ptr, <16 x i8> %val) { +; CHECK: 'Cost Model Analysis' for function 'store16': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + store <16 x i8> %val, <16 x i8>* %ptr + ret void +} + +define <8 x i8> @load8(<8 x i8>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load8': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <8 x i8>, <8 x i8>* %ptr + ret <8 x i8> %out +} + +define void @store8(<8 x i8>* %ptr, <8 x i8> %val) { +; CHECK: 'Cost Model Analysis' for function 'store8': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + store <8 x i8> %val, <8 x i8>* %ptr + ret void +} + +define <4 x i8> @load4(<4 x i8>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load4': +; CHECK-NEON: Cost Model: Found an estimated cost of 64 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 64 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <4 x i8>, <4 x i8>* %ptr + ret <4 x i8> %out +} + +define void @store4(<4 x i8>* %ptr, <4 x i8> %val) { +; CHECK: 'Cost Model Analysis' for function 'store4': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + store <4 x i8> %val, <4 x i8>* %ptr + ret void +} + +define <16 x i16> @load_256(<16 x i16>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load_256': +; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <16 x i16>, <16 x i16>* %ptr + ret <16 x i16> %out +} + +define <8 x i64> @load_512(<8 x i64>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load_512': +; CHECK-NEON: Cost Model: Found an estimated cost of 4 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 4 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <8 x i64>, <8 x i64>* %ptr + ret <8 x i64> %out +} diff --git a/llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll new file mode 100644 index 0000000..1a7b262 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll @@ -0,0 +1,51 @@ +; Checks if the memory cost model does not break when using scalable vectors + +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define @load-sve-8(* %ptr) { +; CHECK-LABEL: 'load-sve-8': +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + %retval = load , * %ptr + ret %retval +} + +define void @store-sve-8(* %ptr, %val) { +; CHECK-LABEL: 'store-sve-8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + store %val, * %ptr + ret void +} + +define @load-sve-16(* %ptr) { +; CHECK-LABEL: 'load-sve-16': +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + %retval = load , * %ptr + ret %retval +} + +define void @store-sve-16(* %ptr, %val) { +; CHECK-LABEL: 'store-sve-16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + store %val, * %ptr + ret void +} + +define @load-sve-32(* %ptr) { +; CHECK-LABEL: 'load-sve-32': +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + %retval = load , * %ptr + ret %retval +} + +define void @store-sve-32(* %ptr, %val) { +; CHECK-LABEL: 'store-sve-32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + store %val, * %ptr + ret void +} -- 2.7.4