From af194e938008262bb8a99514d21705eb06b953b5 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 24 Apr 2019 19:28:38 +0000
Subject: [PATCH] [X86] Prevent folding a load into an AND if that AND is
 really a ZEXT_INREG that should use movzx.

This can save a 32-bit immediate move.

We would shrink the load and fold it if it was non-volatile, but that's trickier to check for.

llvm-svn: 359129
---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp         | 9 +++++++++
 llvm/test/CodeGen/X86/2008-09-29-VolatileBug.ll | 4 ++--
 llvm/test/CodeGen/X86/fold-and-shift.ll         | 8 ++++----
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index df7f1fc..dd387e5e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -592,6 +592,15 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
             Imm->getAPIntValue().isIntN(32))
           return false;
 
+        // If this really a zext_inreg that can be represented with a movzx
+        // instruction, prefer that.
+        // TODO: We could shrink the load and fold if it is non-volatile.
+        if (U->getOpcode() == ISD::AND &&
+            (Imm->getAPIntValue() == UINT8_MAX ||
+             Imm->getAPIntValue() == UINT16_MAX ||
+             Imm->getAPIntValue() == UINT32_MAX))
+          return false;
+
         // ADD/SUB with can negate the immediate and use the opposite operation
         // to fit 128 into a sign extended 8 bit immediate.
         if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
diff --git a/llvm/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/llvm/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index ac18c2c..4c81df7 100644
--- a/llvm/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/llvm/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -8,8 +8,8 @@
 define i32 @main() nounwind {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl $255, %eax
-; CHECK-NEXT:    andl g_407, %eax
+; CHECK-NEXT:    movl g_407, %eax
+; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll func_45
 ; CHECK-NEXT:    addl $4, %esp
diff --git a/llvm/test/CodeGen/X86/fold-and-shift.ll b/llvm/test/CodeGen/X86/fold-and-shift.ll
index e14e337..6db4b25 100644
--- a/llvm/test/CodeGen/X86/fold-and-shift.ll
+++ b/llvm/test/CodeGen/X86/fold-and-shift.ll
@@ -5,8 +5,8 @@ define i32 @t1(i8* %X, i32 %i) {
 ; CHECK-LABEL: t1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl $255, %ecx
-; CHECK-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movzbl %cl, %ecx
 ; CHECK-NEXT:    movl (%eax,%ecx,4), %eax
 ; CHECK-NEXT:    retl
 
@@ -23,8 +23,8 @@ define i32 @t2(i16* %X, i32 %i) {
 ; CHECK-LABEL: t2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl $65535, %ecx # imm = 0xFFFF
-; CHECK-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movzwl %cx, %ecx
 ; CHECK-NEXT:    movl (%eax,%ecx,4), %eax
 ; CHECK-NEXT:    retl
 
-- 
2.7.4