[X86] Disable shouldFoldConstantShiftPairToMask for scalar shifts on AMD targets...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 14 May 2019 15:21:28 +0000 (15:21 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 14 May 2019 15:21:28 +0000 (15:21 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 14 May 2019 15:21:28 +0000 (15:21 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 14 May 2019 15:21:28 +0000 (15:21 +0000)
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td

index a799c1fda49ebf132d5865baf447c6e8c46efd90..8f6d201bbb4059b1ccb0ae74f03ecd2973282ba4 100644 (file)
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -427,6 +427,11 @@ def FeatureFastHorizontalOps
          "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
          "normal vector instructions with shuffles", [FeatureSSE3]>;
  
+def FeatureFastScalarShiftMasks
+    : SubtargetFeature<
+        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
+        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
+
  def FeatureFastVectorShiftMasks
      : SubtargetFeature<
          "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
@@ -784,6 +789,7 @@ def ProcessorFeatures {
                                                        FeatureSlowSHLD,
                                                        FeatureLAHFSAHF,
                                                        FeatureFast15ByteNOP,
+                                                      FeatureFastScalarShiftMasks,
                                                        FeatureFastVectorShiftMasks];
    list<SubtargetFeature> BtVer1Features = BtVer1InheritableFeatures;
  
@@ -825,6 +831,7 @@ def ProcessorFeatures {
                                                        FeatureSlowSHLD,
                                                        FeatureLAHFSAHF,
                                                        FeatureFast11ByteNOP,
+                                                      FeatureFastScalarShiftMasks,
                                                        FeatureBranchFusion];
    list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;
  
@@ -876,6 +883,7 @@ def ProcessorFeatures {
                                         FeatureFastBEXTR,
                                         FeatureFast15ByteNOP,
                                         FeatureBranchFusion,
+                                       FeatureFastScalarShiftMasks,
                                         FeatureMMX,
                                         FeatureMOVBE,
                                         FeatureMWAITX,
@@ -1092,20 +1100,22 @@ foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
  foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
    def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
                   FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
-                 Feature64Bit, FeatureSlowSHLD, FeatureCMOV]>;
+                 Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
+                 FeatureFastScalarShiftMasks]>;
  }
  
  foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
    def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
                   Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
-                 FeatureSlowSHLD, FeatureCMOV, Feature64Bit]>;
+                 FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
+                 FeatureFastScalarShiftMasks]>;
  }
  
  foreach P = ["amdfam10", "barcelona"] in {
    def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE4A, Feature3DNowA,
                   FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT,
                   FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV,
-                 Feature64Bit]>;
+                 Feature64Bit, FeatureFastScalarShiftMasks]>;
  }
  
  // Bobcat
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 28bd08f57c103341d85ad499625f131f82446258..43911a1b0165b56ab48fb0c6b21a5171b46de382 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5021,11 +5021,12 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
            (N->getOpcode() == ISD::SRL &&
             N->getOperand(0).getOpcode() == ISD::SHL)) &&
           "Expected shift-shift mask");
-
-  if (Subtarget.hasFastVectorShiftMasks() && N->getValueType(0).isVector()) {
+  EVT VT = N->getValueType(0);
+  if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
+      (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
      // Only fold if the shift values are equal - so it folds to AND.
-    // TODO - we should fold if either is non-uniform but we don't do the
-    // fold for non-splats yet.
+    // TODO - we should fold if either is a non-uniform vector but we don't do
+    // the fold for non-splats yet.
      return N->getOperand(1) == N->getOperand(0).getOperand(1);
    }
    return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h

index 3b11bb12f6270d8d4d189a4ca4ef551cc4c9b44d..43d4ab713181dc25f07eb51c3381491bf5f3eb64 100644 (file)
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -396,6 +396,9 @@ protected:
    /// Try harder to combine to horizontal vector ops if they are fast.
    bool HasFastHorizontalOps = false;
  
+  /// Prefer a left/right scalar logical shifts pair over a shift+and pair.
+  bool HasFastScalarShiftMasks = false;
+
    /// Prefer a left/right vector logical shifts pair over a shift+and pair.
    bool HasFastVectorShiftMasks = false;
  
@@ -650,6 +653,7 @@ public:
    bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
    bool hasFastBEXTR() const { return HasFastBEXTR; }
    bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
+  bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
    bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
    bool hasMacroFusion() const { return HasMacroFusion; }
    bool hasBranchFusion() const { return HasBranchFusion; }
diff --git a/llvm/test/CodeGen/X86/shift-mask.ll b/llvm/test/CodeGen/X86/shift-mask.ll

index 9bff5687f0e67536a820e5cb0ec07a01d5ee04ea..bfad23a127b554e74944b9f0460412fae0fd3c51 100644 (file)
--- a/llvm/test/CodeGen/X86/shift-mask.ll
+++ b/llvm/test/CodeGen/X86/shift-mask.ll
@@ -43,13 +43,21 @@ define i8 @test_i8_shl_lshr_1(i8 %a0) {
  ; X86-NEXT:    andb $-32, %al
  ; X86-NEXT:    retl
  ;
-; X64-LABEL: test_i8_shl_lshr_1:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    leal (,%rdi,4), %eax
-; X64-NEXT:    andb $-32, %al
-; X64-NEXT:    # kill: def $al killed $al killed $eax
-; X64-NEXT:    retq
+; X64-MASK-LABEL: test_i8_shl_lshr_1:
+; X64-MASK:       # %bb.0:
+; X64-MASK-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-MASK-NEXT:    leal (,%rdi,4), %eax
+; X64-MASK-NEXT:    andb $-32, %al
+; X64-MASK-NEXT:    # kill: def $al killed $al killed $eax
+; X64-MASK-NEXT:    retq
+;
+; X64-SHIFT-LABEL: test_i8_shl_lshr_1:
+; X64-SHIFT:       # %bb.0:
+; X64-SHIFT-NEXT:    movl %edi, %eax
+; X64-SHIFT-NEXT:    shrb $3, %al
+; X64-SHIFT-NEXT:    shlb $5, %al
+; X64-SHIFT-NEXT:    # kill: def $al killed $al killed $eax
+; X64-SHIFT-NEXT:    retq
    %1 = lshr i8 %a0, 3
    %2 = shl i8 %1, 5
    ret i8 %2
@@ -63,13 +71,21 @@ define i8 @test_i8_shl_lshr_2(i8 %a0) {
  ; X86-NEXT:    andb $56, %al
  ; X86-NEXT:    retl
  ;
-; X64-LABEL: test_i8_shl_lshr_2:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shrb $2, %al
-; X64-NEXT:    andb $56, %al
-; X64-NEXT:    # kill: def $al killed $al killed $eax
-; X64-NEXT:    retq
+; X64-MASK-LABEL: test_i8_shl_lshr_2:
+; X64-MASK:       # %bb.0:
+; X64-MASK-NEXT:    movl %edi, %eax
+; X64-MASK-NEXT:    shrb $2, %al
+; X64-MASK-NEXT:    andb $56, %al
+; X64-MASK-NEXT:    # kill: def $al killed $al killed $eax
+; X64-MASK-NEXT:    retq
+;
+; X64-SHIFT-LABEL: test_i8_shl_lshr_2:
+; X64-SHIFT:       # %bb.0:
+; X64-SHIFT-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-SHIFT-NEXT:    shrb $5, %dil
+; X64-SHIFT-NEXT:    leal (,%rdi,8), %eax
+; X64-SHIFT-NEXT:    # kill: def $al killed $al killed $eax
+; X64-SHIFT-NEXT:    retq
    %1 = lshr i8 %a0, 5
    %2 = shl i8 %1, 3
    ret i8 %2
@@ -103,13 +119,21 @@ define i16 @test_i16_shl_lshr_1(i16 %a0) {
  ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
  ; X86-NEXT:    retl
  ;
-; X64-LABEL: test_i16_shl_lshr_1:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    leal (,%rdi,4), %eax
-; X64-NEXT:    andl $65504, %eax # imm = 0xFFE0
-; X64-NEXT:    # kill: def $ax killed $ax killed $eax
-; X64-NEXT:    retq
+; X64-MASK-LABEL: test_i16_shl_lshr_1:
+; X64-MASK:       # %bb.0:
+; X64-MASK-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-MASK-NEXT:    leal (,%rdi,4), %eax
+; X64-MASK-NEXT:    andl $65504, %eax # imm = 0xFFE0
+; X64-MASK-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-MASK-NEXT:    retq
+;
+; X64-SHIFT-LABEL: test_i16_shl_lshr_1:
+; X64-SHIFT:       # %bb.0:
+; X64-SHIFT-NEXT:    movzwl %di, %eax
+; X64-SHIFT-NEXT:    shrl $3, %eax
+; X64-SHIFT-NEXT:    shll $5, %eax
+; X64-SHIFT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-SHIFT-NEXT:    retq
    %1 = lshr i16 %a0, 3
    %2 = shl i16 %1, 5
    ret i16 %2
@@ -124,13 +148,21 @@ define i16 @test_i16_shl_lshr_2(i16 %a0) {
  ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
  ; X86-NEXT:    retl
  ;
-; X64-LABEL: test_i16_shl_lshr_2:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shrl $2, %eax
-; X64-NEXT:    andl $16376, %eax # imm = 0x3FF8
-; X64-NEXT:    # kill: def $ax killed $ax killed $eax
-; X64-NEXT:    retq
+; X64-MASK-LABEL: test_i16_shl_lshr_2:
+; X64-MASK:       # %bb.0:
+; X64-MASK-NEXT:    movl %edi, %eax
+; X64-MASK-NEXT:    shrl $2, %eax
+; X64-MASK-NEXT:    andl $16376, %eax # imm = 0x3FF8
+; X64-MASK-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-MASK-NEXT:    retq
+;
+; X64-SHIFT-LABEL: test_i16_shl_lshr_2:
+; X64-SHIFT:       # %bb.0:
+; X64-SHIFT-NEXT:    movzwl %di, %eax
+; X64-SHIFT-NEXT:    shrl $5, %eax
+; X64-SHIFT-NEXT:    shll $3, %eax
+; X64-SHIFT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-SHIFT-NEXT:    retq
    %1 = lshr i16 %a0, 5
    %2 = shl i16 %1, 3
    ret i16 %2
@@ -161,12 +193,19 @@ define i32 @test_i32_shl_lshr_1(i32 %a0) {
  ; X86-NEXT:    andl $-32, %eax
  ; X86-NEXT:    retl
  ;
-; X64-LABEL: test_i32_shl_lshr_1:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    leal (,%rdi,4), %eax
-; X64-NEXT:    andl $-32, %eax
-; X64-NEXT:    retq
+; X64-MASK-LABEL: test_i32_shl_lshr_1:
+; X64-MASK:       # %bb.0:
+; X64-MASK-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-MASK-NEXT:    leal (,%rdi,4), %eax
+; X64-MASK-NEXT:    andl $-32, %eax
+; X64-MASK-NEXT:    retq
+;
+; X64-SHIFT-LABEL: test_i32_shl_lshr_1:
+; X64-SHIFT:       # %bb.0:
+; X64-SHIFT-NEXT:    movl %edi, %eax
+; X64-SHIFT-NEXT:    shrl $3, %eax
+; X64-SHIFT-NEXT:    shll $5, %eax
+; X64-SHIFT-NEXT:    retq
    %1 = lshr i32 %a0, 3
    %2 = shl i32 %1, 5
    ret i32 %2
@@ -180,12 +219,19 @@ define i32 @test_i32_shl_lshr_2(i32 %a0) {
  ; X86-NEXT:    andl $-8, %eax
  ; X86-NEXT:    retl
  ;
-; X64-LABEL: test_i32_shl_lshr_2:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shrl $2, %eax
-; X64-NEXT:    andl $-8, %eax
-; X64-NEXT:    retq
+; X64-MASK-LABEL: test_i32_shl_lshr_2:
+; X64-MASK:       # %bb.0:
+; X64-MASK-NEXT:    movl %edi, %eax
+; X64-MASK-NEXT:    shrl $2, %eax
+; X64-MASK-NEXT:    andl $-8, %eax
+; X64-MASK-NEXT:    retq
+;
+; X64-SHIFT-LABEL: test_i32_shl_lshr_2:
+; X64-SHIFT:       # %bb.0:
+; X64-SHIFT-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-SHIFT-NEXT:    shrl $5, %edi
+; X64-SHIFT-NEXT:    leal (,%rdi,8), %eax
+; X64-SHIFT-NEXT:    retq
    %1 = lshr i32 %a0, 5
    %2 = shl i32 %1, 3
    ret i32 %2
@@ -219,11 +265,18 @@ define i64 @test_i64_shl_lshr_1(i64 %a0) {
  ; X86-NEXT:    shldl $2, %ecx, %edx
  ; X86-NEXT:    retl
  ;
-; X64-LABEL: test_i64_shl_lshr_1:
-; X64:       # %bb.0:
-; X64-NEXT:    leaq (,%rdi,4), %rax
-; X64-NEXT:    andq $-32, %rax
-; X64-NEXT:    retq
+; X64-MASK-LABEL: test_i64_shl_lshr_1:
+; X64-MASK:       # %bb.0:
+; X64-MASK-NEXT:    leaq (,%rdi,4), %rax
+; X64-MASK-NEXT:    andq $-32, %rax
+; X64-MASK-NEXT:    retq
+;
+; X64-SHIFT-LABEL: test_i64_shl_lshr_1:
+; X64-SHIFT:       # %bb.0:
+; X64-SHIFT-NEXT:    movq %rdi, %rax
+; X64-SHIFT-NEXT:    shrq $3, %rax
+; X64-SHIFT-NEXT:    shlq $5, %rax
+; X64-SHIFT-NEXT:    retq
    %1 = lshr i64 %a0, 3
    %2 = shl i64 %1, 5
    ret i64 %2
@@ -239,12 +292,18 @@ define i64 @test_i64_shl_lshr_2(i64 %a0) {
  ; X86-NEXT:    shrl $2, %edx
  ; X86-NEXT:    retl
  ;
-; X64-LABEL: test_i64_shl_lshr_2:
-; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $2, %rax
-; X64-NEXT:    andq $-8, %rax
-; X64-NEXT:    retq
+; X64-MASK-LABEL: test_i64_shl_lshr_2:
+; X64-MASK:       # %bb.0:
+; X64-MASK-NEXT:    movq %rdi, %rax
+; X64-MASK-NEXT:    shrq $2, %rax
+; X64-MASK-NEXT:    andq $-8, %rax
+; X64-MASK-NEXT:    retq
+;
+; X64-SHIFT-LABEL: test_i64_shl_lshr_2:
+; X64-SHIFT:       # %bb.0:
+; X64-SHIFT-NEXT:    shrq $5, %rdi
+; X64-SHIFT-NEXT:    leaq (,%rdi,8), %rax
+; X64-SHIFT-NEXT:    retq
    %1 = lshr i64 %a0, 5
    %2 = shl i64 %1, 3
    ret i64 %2
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 14 May 2019 15:21:28 +0000 (15:21 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 14 May 2019 15:21:28 +0000 (15:21 +0000)
llvm/lib/Target/X86/X86.td		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86Subtarget.h		patch \| blob \| history
llvm/test/CodeGen/X86/shift-mask.ll		patch \| blob \| history