[X86] Add combine to shrink 64-bit ands when one input is an any_extend and the other...

author Craig Topper <craig.topper@intel.com>

Tue, 13 Feb 2018 16:25:25 +0000 (16:25 +0000)

committer Craig Topper <craig.topper@intel.com>

Tue, 13 Feb 2018 16:25:25 +0000 (16:25 +0000)
author Craig Topper <craig.topper@intel.com>
Tue, 13 Feb 2018 16:25:25 +0000 (16:25 +0000)
committer Craig Topper <craig.topper@intel.com>
Tue, 13 Feb 2018 16:25:25 +0000 (16:25 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 6ee29417c545742ee8794e5d3dbea647dd122669..ce158a358c088e43a29a01a734a03bab0eeb7fff 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33592,6 +33592,20 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
                                  DAG.getBitcast(MVT::v4f32, N->getOperand(1))));
    }
  
+  // Use a 32-bit and+zext if upper bits known zero.
+  if (VT == MVT::i64 && Subtarget.is64Bit() &&
+      !isa<ConstantSDNode>(N->getOperand(1))) {
+    APInt HiMask = APInt::getHighBitsSet(64, 32);
+    if (DAG.MaskedValueIsZero(N->getOperand(1), HiMask) ||
+        DAG.MaskedValueIsZero(N->getOperand(0), HiMask)) {
+      SDLoc dl(N);
+      SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(0));
+      SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(1));
+      return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64,
+                         DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS));
+    }
+  }
+
    if (DCI.isBeforeLegalizeOps())
      return SDValue();
  
diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll

index ad436435af23c881fe7125e6521aa7ae62338dd0..a9f7ec5b3a3276fd55f4f79d9d653e8c08a72250 100644 (file)
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -822,13 +822,13 @@ define i64 @blsr_disguised_constant(i64 %x) {
    ret i64 %r
  }
  
-; The add here gets shrunk, but the and does not thus hiding the blsr pattern.
+; The add here used to get shrunk, but the and did not thus hiding the blsr pattern.
+; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too.
  define i64 @blsr_disguised_shrunk_add(i64 %x) {
  ; CHECK-LABEL: blsr_disguised_shrunk_add:
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    shrq $48, %rdi
-; CHECK-NEXT:    leal -1(%rdi), %eax
-; CHECK-NEXT:    andq %rdi, %rax
+; CHECK-NEXT:    blsrl %edi, %eax
  ; CHECK-NEXT:    retq
    %a = lshr i64 %x, 48
    %b = add i64 %a, -1
diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll

index 670fe7f52e126848f334496cef441d5d6c4bf0ec..1e9dd96def59e317874adb3a3fc8bd510c88f2bc 100644 (file)
--- a/llvm/test/CodeGen/X86/gather-addresses.ll
+++ b/llvm/test/CodeGen/X86/gather-addresses.ll
@@ -145,15 +145,15 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
  ; LIN-SSE2-NEXT:    movd %xmm1, %esi
  ; LIN-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
  ; LIN-SSE2-NEXT:    movd %xmm0, %edi
-; LIN-SSE2-NEXT:    andq %rcx, %rax
-; LIN-SSE2-NEXT:    andq %rcx, %rdx
-; LIN-SSE2-NEXT:    andq %rcx, %rsi
-; LIN-SSE2-NEXT:    andq %rcx, %rdi
-; LIN-SSE2-NEXT:    movq %rax, %xmm0
-; LIN-SSE2-NEXT:    movq %rdx, %xmm1
+; LIN-SSE2-NEXT:    andl %ecx, %eax
+; LIN-SSE2-NEXT:    andl %ecx, %edx
+; LIN-SSE2-NEXT:    andl %ecx, %esi
+; LIN-SSE2-NEXT:    andl %ecx, %edi
+; LIN-SSE2-NEXT:    movd %eax, %xmm0
+; LIN-SSE2-NEXT:    movd %edx, %xmm1
  ; LIN-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; LIN-SSE2-NEXT:    movq %rdi, %xmm2
-; LIN-SSE2-NEXT:    movq %rsi, %xmm1
+; LIN-SSE2-NEXT:    movd %edi, %xmm2
+; LIN-SSE2-NEXT:    movd %esi, %xmm1
  ; LIN-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
  ; LIN-SSE2-NEXT:    retq
  ;
@@ -165,15 +165,15 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
  ; LIN-SSE4-NEXT:    pextrd $1, %xmm0, %edx
  ; LIN-SSE4-NEXT:    pextrd $2, %xmm0, %esi
  ; LIN-SSE4-NEXT:    pextrd $3, %xmm0, %edi
-; LIN-SSE4-NEXT:    andq %rcx, %rax
-; LIN-SSE4-NEXT:    andq %rcx, %rdx
-; LIN-SSE4-NEXT:    andq %rcx, %rsi
-; LIN-SSE4-NEXT:    andq %rcx, %rdi
-; LIN-SSE4-NEXT:    movq %rdx, %xmm1
-; LIN-SSE4-NEXT:    movq %rax, %xmm0
+; LIN-SSE4-NEXT:    andl %ecx, %eax
+; LIN-SSE4-NEXT:    andl %ecx, %edx
+; LIN-SSE4-NEXT:    andl %ecx, %esi
+; LIN-SSE4-NEXT:    andl %ecx, %edi
+; LIN-SSE4-NEXT:    movd %edx, %xmm1
+; LIN-SSE4-NEXT:    movd %eax, %xmm0
  ; LIN-SSE4-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; LIN-SSE4-NEXT:    movq %rdi, %xmm2
-; LIN-SSE4-NEXT:    movq %rsi, %xmm1
+; LIN-SSE4-NEXT:    movd %edi, %xmm2
+; LIN-SSE4-NEXT:    movd %esi, %xmm1
  ; LIN-SSE4-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
  ; LIN-SSE4-NEXT:    retq
  ;
@@ -188,15 +188,15 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
  ; WIN-SSE2-NEXT:    movd %xmm1, %r8d
  ; WIN-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
  ; WIN-SSE2-NEXT:    movd %xmm0, %edx
-; WIN-SSE2-NEXT:    andq %r9, %rax
-; WIN-SSE2-NEXT:    andq %r9, %rcx
-; WIN-SSE2-NEXT:    andq %r9, %r8
-; WIN-SSE2-NEXT:    andq %r9, %rdx
-; WIN-SSE2-NEXT:    movq %rax, %xmm0
-; WIN-SSE2-NEXT:    movq %rcx, %xmm1
+; WIN-SSE2-NEXT:    andl %r9d, %eax
+; WIN-SSE2-NEXT:    andl %r9d, %ecx
+; WIN-SSE2-NEXT:    andl %r9d, %r8d
+; WIN-SSE2-NEXT:    andl %r9d, %edx
+; WIN-SSE2-NEXT:    movd %eax, %xmm0
+; WIN-SSE2-NEXT:    movd %ecx, %xmm1
  ; WIN-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; WIN-SSE2-NEXT:    movq %rdx, %xmm2
-; WIN-SSE2-NEXT:    movq %r8, %xmm1
+; WIN-SSE2-NEXT:    movd %edx, %xmm2
+; WIN-SSE2-NEXT:    movd %r8d, %xmm1
  ; WIN-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
  ; WIN-SSE2-NEXT:    retq
  ;
@@ -208,15 +208,15 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
  ; WIN-SSE4-NEXT:    pextrd $1, %xmm0, %ecx
  ; WIN-SSE4-NEXT:    pextrd $2, %xmm0, %r8d
  ; WIN-SSE4-NEXT:    pextrd $3, %xmm0, %edx
-; WIN-SSE4-NEXT:    andq %r9, %rax
-; WIN-SSE4-NEXT:    andq %r9, %rcx
-; WIN-SSE4-NEXT:    andq %r9, %r8
-; WIN-SSE4-NEXT:    andq %r9, %rdx
-; WIN-SSE4-NEXT:    movq %rcx, %xmm1
-; WIN-SSE4-NEXT:    movq %rax, %xmm0
+; WIN-SSE4-NEXT:    andl %r9d, %eax
+; WIN-SSE4-NEXT:    andl %r9d, %ecx
+; WIN-SSE4-NEXT:    andl %r9d, %r8d
+; WIN-SSE4-NEXT:    andl %r9d, %edx
+; WIN-SSE4-NEXT:    movd %ecx, %xmm1
+; WIN-SSE4-NEXT:    movd %eax, %xmm0
  ; WIN-SSE4-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; WIN-SSE4-NEXT:    movq %rdx, %xmm2
-; WIN-SSE4-NEXT:    movq %r8, %xmm1
+; WIN-SSE4-NEXT:    movd %edx, %xmm2
+; WIN-SSE4-NEXT:    movd %r8d, %xmm1
  ; WIN-SSE4-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
  ; WIN-SSE4-NEXT:    retq
  ;
diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll

index 9368303a4e6827a0dac46688de0751644c4ac54e..62e7a66a0f1600030719d55853e59d0e783911fe 100644 (file)
--- a/llvm/test/CodeGen/X86/pr32284.ll
+++ b/llvm/test/CodeGen/X86/pr32284.ll
@@ -479,17 +479,16 @@ define void @f3() #0 {
  ; X64-LABEL: f3:
  ; X64:       # %bb.0: # %entry
  ; X64-NEXT:    movl {{.*}}(%rip), %eax
-; X64-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
-; X64-NEXT:    xorq %rax, %rcx
-; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    testq %rax, %rax
-; X64-NEXT:    sete %dl
-; X64-NEXT:    movl {{.*}}(%rip), %eax
-; X64-NEXT:    xorl %ecx, %eax
-; X64-NEXT:    andq %rdx, %rax
-; X64-NEXT:    orq %rcx, %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl %ecx, {{.*}}(%rip)
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    notl %eax
+; X64-NEXT:    sete %cl
+; X64-NEXT:    movl {{.*}}(%rip), %edx
+; X64-NEXT:    xorl %eax, %edx
+; X64-NEXT:    andl %edx, %ecx
+; X64-NEXT:    orl %eax, %ecx
+; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl %eax, {{.*}}(%rip)
  ; X64-NEXT:    retq
  ;
  ; 686-O0-LABEL: f3:
author	Craig Topper <craig.topper@intel.com>
	Tue, 13 Feb 2018 16:25:25 +0000 (16:25 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Tue, 13 Feb 2018 16:25:25 +0000 (16:25 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/bmi.ll		patch \| blob \| history
llvm/test/CodeGen/X86/gather-addresses.ll		patch \| blob \| history
llvm/test/CodeGen/X86/pr32284.ll		patch \| blob \| history