From fa7057a415d5de8bec0063b2e9c96836c08468ab Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 29 Mar 2016 01:36:01 +0000 Subject: [PATCH] [PowerPC] Refactor popcnt[dw] target features Instead of using two feature bits, one to indicate the availability of the popcnt[dw] instructions, and another to indicate whether or not they're fast, use a single enum. This allows more consistent control via target attribute strings, and via Clang's command line. llvm-svn: 264690 --- llvm/lib/Target/PowerPC/PPC.td | 20 +++++++++++--------- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 4 ++-- llvm/lib/Target/PowerPC/PPCSubtarget.h | 15 +++++++++++---- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 5 +++-- llvm/test/CodeGen/PowerPC/popcnt.ll | 2 ++ 6 files changed, 30 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index fc516d9..d05226e 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -86,8 +86,6 @@ def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true", "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; -def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", - "Enable the popcnt[dw] instructions">; def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true", "Enable the bpermd instruction">; def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true", @@ -152,14 +150,18 @@ def FeatureFloat128 : "Enable the __float128 data type for IEEE-754R Binary128.", [FeatureVSX]>; -def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", - "Treat vector data stream cache control instructions as deprecated">; - +def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", + "POPCNTD_Fast", + "Enable the popcnt[dw] instructions">; // Note that for the a2/a2q processor models we should not use popcnt[dw] by // default. These processors do support the instructions, but they're // microcoded, and the software emulation is about twice as fast. -def SlowPOPCNTD : SubtargetFeature<"slow-popcntd","SlowPOPCNTD", "true", - "The popcnt[dw] instructions are slow">; +def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD", + "POPCNTD_Slow", + "Has slow popcnt[dw] instructions">; + +def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", + "Treat vector data stream cache control instructions as deprecated">; /* Since new processors generally contain a superset of features of those that came before them, the idea is to make implementations of new processors @@ -343,7 +345,7 @@ def : ProcessorModel<"a2", PPCA2Model, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, SlowPOPCNTD, FeatureCMPB, FeatureLDBRX, + FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>; def : ProcessorModel<"a2q", PPCA2Model, [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF, @@ -351,7 +353,7 @@ def : ProcessorModel<"a2q", PPCA2Model, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, SlowPOPCNTD, FeatureCMPB, FeatureLDBRX, + FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureQPX, FeatureMFTB]>; def : ProcessorModel<"pwr3", G5Model, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 6a16e01..f3251ba 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -214,7 +214,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - if (Subtarget.hasPOPCNTD() && !Subtarget.isPOPCNTDSlow()) { + if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { setOperationAction(ISD::CTPOP, MVT::i32 , Legal); setOperationAction(ISD::CTPOP, MVT::i64 , Legal); } else { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 6ab79c1..556db16 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -84,7 +84,6 @@ void PPCSubtarget::initializeEnvironment() { HasFPRND = false; HasFPCVT = false; HasISEL = false; - HasPOPCNTD = false; HasBPERMD = false; HasExtDiv = false; HasCMPB = false; @@ -105,7 +104,8 @@ void PPCSubtarget::initializeEnvironment() { HasHTM = false; HasFusion = false; HasFloat128 = false; - SlowPOPCNTD = false; + + HasPOPCNTD = POPCNTD_Unavailable; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index fc4ba3f..a91cdd0 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -64,6 +64,13 @@ class GlobalValue; class TargetMachine; class PPCSubtarget : public PPCGenSubtargetInfo { +public: + enum POPCNTDKind { + POPCNTD_Unavailable, + POPCNTD_Slow, + POPCNTD_Fast + }; + protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -103,7 +110,6 @@ protected: bool HasFPRND; bool HasFPCVT; bool HasISEL; - bool HasPOPCNTD; bool HasBPERMD; bool HasExtDiv; bool HasCMPB; @@ -124,7 +130,8 @@ protected: bool HasHTM; bool HasFusion; bool HasFloat128; - bool SlowPOPCNTD; + + POPCNTDKind HasPOPCNTD; /// When targeting QPX running a stock PPC64 Linux kernel where the stack /// alignment has not been changed, we need to keep the 16-byte alignment @@ -237,7 +244,6 @@ public: bool hasP9Altivec() const { return HasP9Altivec; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } - bool hasPOPCNTD() const { return HasPOPCNTD; } bool hasBPERMD() const { return HasBPERMD; } bool hasExtDiv() const { return HasExtDiv; } bool hasCMPB() const { return HasCMPB; } @@ -249,7 +255,6 @@ public: bool isE500() const { return IsE500; } bool isFeatureMFTB() const { return FeatureMFTB; } bool isDeprecatedDST() const { return DeprecatedDST; } - bool isPOPCNTDSlow() const { return SlowPOPCNTD; } bool hasICBT() const { return HasICBT; } bool hasInvariantFunctionDescriptors() const { return HasInvariantFunctionDescriptors; @@ -268,6 +273,8 @@ public: bool hasFusion() const { return HasFusion; } bool hasFloat128() const { return HasFloat128; } + POPCNTDKind hasPOPCNTD() const { return HasPOPCNTD; } + const Triple &getTargetTriple() const { return TargetTriple; } /// isDarwin - True if this is any darwin platform. diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 879671b..a1f6528 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -42,8 +42,9 @@ PrefDist("ppc-loop-prefetch-distance", cl::Hidden, cl::init(300), TargetTransformInfo::PopcntSupportKind PPCTTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - if (ST->hasPOPCNTD() && TyWidth <= 64) - return ST->isPOPCNTDSlow() ? TTI::PSK_SlowHardware : TTI::PSK_FastHardware; + if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64) + return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ? + TTI::PSK_SlowHardware : TTI::PSK_FastHardware; return TTI::PSK_Software; } diff --git a/llvm/test/CodeGen/PowerPC/popcnt.ll b/llvm/test/CodeGen/PowerPC/popcnt.ll index 79fc40e..5acaa29 100644 --- a/llvm/test/CodeGen/PowerPC/popcnt.ll +++ b/llvm/test/CodeGen/PowerPC/popcnt.ll @@ -1,6 +1,8 @@ ; RUN: llc -march=ppc64 -mattr=+popcntd < %s | FileCheck %s +; RUN: llc -march=ppc64 -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOWPC ; RUN: llc -march=ppc64 -mcpu=pwr7 < %s | FileCheck %s ; RUN: llc -march=ppc64 -mcpu=a2q < %s | FileCheck %s --check-prefix=SLOWPC +; RUN: llc -march=ppc64 -mcpu=a2q -mattr=+popcntd < %s | FileCheck %s define i8 @cnt8(i8 %x) nounwind readnone { %cnt = tail call i8 @llvm.ctpop.i8(i8 %x) -- 2.7.4