From de565fc73e9047a3044e8680c88aa6a3530a16e6 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 16 Feb 2018 18:51:09 +0000
Subject: [PATCH] [X86] Only reorder srl/and on last DAG combiner run

This seems to interfere with a target independent brcond combine that looks for the (srl (and X, C1), C2) pattern to enable TEST instructions. Once we flip, that combine doesn't fire and we end up exposing it to the X86 specific BT combine which causes us to emit a BT instruction. BT has lower throughput than TEST.

We could try to make the brcond combine aware of the alternate pattern, but since the flip was just a code size reduction and not likely to enable other combines, it seemed easier to just delay it until after lowering.

Differential Revision: https://reviews.llvm.org/D43201

llvm-svn: 325371
---
 llvm/lib/Target/X86/X86ISelLowering.cpp    | 10 ++++--
 llvm/test/CodeGen/X86/live-out-reg-info.ll |  4 +--
 llvm/test/CodeGen/X86/test-shrink.ll       | 36 +++++++++++-----------
 llvm/test/CodeGen/X86/test-vs-bittest.ll   | 17 ++++++----
 llvm/test/CodeGen/X86/xor-icmp.ll          |  4 +--
 5 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7fd196ef24b7..70b04376e1bd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32941,11 +32941,17 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
+                                        TargetLowering::DAGCombinerInfo &DCI) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
 
+  // Only do this on the last DAG combine as it can interfere with other
+  // combines.
+  if (!DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
   // Try to improve a sequence of srl (and X, C1), C2 by inverting the order.
   // TODO: This is a generic DAG combine that became an x86-only combine to
   // avoid shortcomings in other folds such as bswap, bit-test ('bt'), and
@@ -32996,7 +33002,7 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
       return V;
 
   if (N->getOpcode() == ISD::SRL)
-    if (SDValue V = combineShiftRightLogical(N, DAG))
+    if (SDValue V = combineShiftRightLogical(N, DAG, DCI))
       return V;
 
   return SDValue();
diff --git a/llvm/test/CodeGen/X86/live-out-reg-info.ll b/llvm/test/CodeGen/X86/live-out-reg-info.ll
index e4644665d65f..882e17e12441 100644
--- a/llvm/test/CodeGen/X86/live-out-reg-info.ll
+++ b/llvm/test/CodeGen/X86/live-out-reg-info.ll
@@ -12,8 +12,8 @@ define void @foo(i32 %a) {
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    shrl $23, %edi
-; CHECK-NEXT:    btl $8, %edi
-; CHECK-NEXT:    jb .LBB0_2
+; CHECK-NEXT:    testl $256, %edi # imm = 0x100
+; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %true
 ; CHECK-NEXT:    callq qux
 ; CHECK-NEXT:  .LBB0_2: # %false
diff --git a/llvm/test/CodeGen/X86/test-shrink.ll b/llvm/test/CodeGen/X86/test-shrink.ll
index 0cc7849e8e46..e44233fdd943 100644
--- a/llvm/test/CodeGen/X86/test-shrink.ll
+++ b/llvm/test/CodeGen/X86/test-shrink.ll
@@ -6,8 +6,8 @@
 define void @g64xh(i64 inreg %x) nounwind {
 ; CHECK-LINUX64-LABEL: g64xh:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    btl $11, %edi
-; CHECK-LINUX64-NEXT:    jb .LBB0_2
+; CHECK-LINUX64-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-LINUX64-NEXT:    jne .LBB0_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
 ; CHECK-LINUX64-NEXT:    callq bar
@@ -18,8 +18,8 @@ define void @g64xh(i64 inreg %x) nounwind {
 ; CHECK-WIN32-64-LABEL: g64xh:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    btl $11, %ecx
-; CHECK-WIN32-64-NEXT:    jb .LBB0_2
+; CHECK-WIN32-64-NEXT:    testl $2048, %ecx # imm = 0x800
+; CHECK-WIN32-64-NEXT:    jne .LBB0_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
 ; CHECK-WIN32-64-NEXT:  .LBB0_2: # %no
@@ -28,8 +28,8 @@ define void @g64xh(i64 inreg %x) nounwind {
 ;
 ; CHECK-X86-LABEL: g64xh:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    btl $11, %eax
-; CHECK-X86-NEXT:    jb .LBB0_2
+; CHECK-X86-NEXT:    testl $2048, %eax # imm = 0x800
+; CHECK-X86-NEXT:    jne .LBB0_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
 ; CHECK-X86-NEXT:  .LBB0_2: # %no
@@ -90,8 +90,8 @@ no:
 define void @g32xh(i32 inreg %x) nounwind {
 ; CHECK-LINUX64-LABEL: g32xh:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    btl $11, %edi
-; CHECK-LINUX64-NEXT:    jb .LBB2_2
+; CHECK-LINUX64-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-LINUX64-NEXT:    jne .LBB2_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
 ; CHECK-LINUX64-NEXT:    callq bar
@@ -102,8 +102,8 @@ define void @g32xh(i32 inreg %x) nounwind {
 ; CHECK-WIN32-64-LABEL: g32xh:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    btl $11, %ecx
-; CHECK-WIN32-64-NEXT:    jb .LBB2_2
+; CHECK-WIN32-64-NEXT:    testl $2048, %ecx # imm = 0x800
+; CHECK-WIN32-64-NEXT:    jne .LBB2_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
 ; CHECK-WIN32-64-NEXT:  .LBB2_2: # %no
@@ -112,8 +112,8 @@ define void @g32xh(i32 inreg %x) nounwind {
 ;
 ; CHECK-X86-LABEL: g32xh:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    btl $11, %eax
-; CHECK-X86-NEXT:    jb .LBB2_2
+; CHECK-X86-NEXT:    testl $2048, %eax # imm = 0x800
+; CHECK-X86-NEXT:    jne .LBB2_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
 ; CHECK-X86-NEXT:  .LBB2_2: # %no
@@ -174,8 +174,8 @@ no:
 define void @g16xh(i16 inreg %x) nounwind {
 ; CHECK-LINUX64-LABEL: g16xh:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    btl $11, %edi
-; CHECK-LINUX64-NEXT:    jb .LBB4_2
+; CHECK-LINUX64-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-LINUX64-NEXT:    jne .LBB4_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
 ; CHECK-LINUX64-NEXT:    callq bar
@@ -186,8 +186,8 @@ define void @g16xh(i16 inreg %x) nounwind {
 ; CHECK-WIN32-64-LABEL: g16xh:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    btl $11, %ecx
-; CHECK-WIN32-64-NEXT:    jb .LBB4_2
+; CHECK-WIN32-64-NEXT:    testl $2048, %ecx # imm = 0x800
+; CHECK-WIN32-64-NEXT:    jne .LBB4_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
 ; CHECK-WIN32-64-NEXT:  .LBB4_2: # %no
@@ -196,8 +196,8 @@ define void @g16xh(i16 inreg %x) nounwind {
 ;
 ; CHECK-X86-LABEL: g16xh:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    btl $11, %eax
-; CHECK-X86-NEXT:    jb .LBB4_2
+; CHECK-X86-NEXT:    testl $2048, %eax # imm = 0x800
+; CHECK-X86-NEXT:    jne .LBB4_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
 ; CHECK-X86-NEXT:  .LBB4_2: # %no
diff --git a/llvm/test/CodeGen/X86/test-vs-bittest.ll b/llvm/test/CodeGen/X86/test-vs-bittest.ll
index 44f77e8b7ce5..d20a75790923 100644
--- a/llvm/test/CodeGen/X86/test-vs-bittest.ll
+++ b/llvm/test/CodeGen/X86/test-vs-bittest.ll
@@ -6,8 +6,8 @@ define void @test64(i64 inreg %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    btl $11, %edi
-; CHECK-NEXT:    jb .LBB0_2
+; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %yes
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB0_2: # %no
@@ -47,6 +47,11 @@ no:
   ret void
 }
 
+; This test is identical to test64 above with only the destination of the br
+; reversed. This somehow causes the two functions to get slightly different
+; initial IR. One has an extra invert of the setcc. This previous caused one
+; the functions to use a BT while the other used a TEST due to another DAG
+; combine messing with an expected canonical form.
 define void @test64_2(i64 inreg %x) {
 ; CHECK-LABEL: test64_2:
 ; CHECK:       # %bb.0:
@@ -190,8 +195,8 @@ define void @test32(i32 inreg %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    btl $11, %edi
-; CHECK-NEXT:    jb .LBB8_2
+; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-NEXT:    jne .LBB8_2
 ; CHECK-NEXT:  # %bb.1: # %yes
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB8_2: # %no
@@ -282,8 +287,8 @@ define void @test16(i16 inreg %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    btl $11, %edi
-; CHECK-NEXT:    jb .LBB12_2
+; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-NEXT:    jne .LBB12_2
 ; CHECK-NEXT:  # %bb.1: # %yes
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB12_2: # %no
diff --git a/llvm/test/CodeGen/X86/xor-icmp.ll b/llvm/test/CodeGen/X86/xor-icmp.ll
index 6cdc3186cd4c..1b5acce51d55 100644
--- a/llvm/test/CodeGen/X86/xor-icmp.ll
+++ b/llvm/test/CodeGen/X86/xor-icmp.ll
@@ -19,8 +19,8 @@ define i32 @t(i32 %a, i32 %b) nounwind ssp {
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    xorl %esi, %edi
 ; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    btl $14, %edi
-; X64-NEXT:    jae .LBB0_1
+; X64-NEXT:    testl $16384, %edi # imm = 0x4000
+; X64-NEXT:    je .LBB0_1
 ; X64-NEXT:  # %bb.2: # %bb1
 ; X64-NEXT:    jmp bar # TAILCALL
 ; X64-NEXT:  .LBB0_1: # %bb
-- 
2.34.1