From 010ae8dcbbd8861f4e9f6883218b2e51c3163b9c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 20 Feb 2018 17:41:00 +0000
Subject: [PATCH] [X86] Promote 16-bit cmovs to 32-bits

This allows us to avoid an opsize prefix. And forcing some move immediates to i32 avoids a length changing prefix on those instructions.

This mostly replaces the existing combine we had for zext/sext+cmov of constants. I left in a case for sign extending a 32 bit cmov of constants to 64 bits.

Differential Revision: https://reviews.llvm.org/D43327

llvm-svn: 325601
---
 llvm/lib/Target/X86/X86ISelLowering.cpp        | 57 ++++++++++++++++++++++++--
 llvm/test/CodeGen/X86/avx512-insert-extract.ll |  5 ++-
 llvm/test/CodeGen/X86/avx512-mask-op.ll        | 24 +++++------
 llvm/test/CodeGen/X86/avx512-schedule.ll       | 12 +++---
 llvm/test/CodeGen/X86/bool-simplify.ll         |  4 +-
 llvm/test/CodeGen/X86/select.ll                | 14 +++----
 llvm/test/CodeGen/X86/setcc-lowering.ll        |  4 +-
 7 files changed, 86 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c6916fd..2a63c9a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18713,6 +18713,15 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // Promote i16 cmovs if it won't prevent folding a load.
+  if (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && !MayFoldLoad(Op2)) {
+    Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
+    Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
+    SDValue Ops[] = { Op2, Op1, CC, Cond };
+    SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops);
+    return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
+  }
+
   // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
   // condition is true.
   SDValue Ops[] = { Op2, Op1, CC, Cond };
@@ -35935,12 +35944,54 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
-                                      const X86Subtarget &Subtarget) {
+// Try to combine sext_in_reg of a cmov of constants by extending the constants.
+static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
-  if (!VT.isVector())
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
+
+  if (ExtraVT != MVT::i16)
+    return SDValue();
+
+  // Look through single use any_extends.
+  if (N0.getOpcode() == ISD::ANY_EXTEND && N0.hasOneUse())
+    N0 = N0.getOperand(0);
+
+  // See if we have a single use cmov.
+  if (N0.getOpcode() != X86ISD::CMOV || !N0.hasOneUse())
+    return SDValue();
+
+  SDValue CMovOp0 = N0.getOperand(0);
+  SDValue CMovOp1 = N0.getOperand(1);
+
+  // Make sure both operands are constants.
+  if (!isa<ConstantSDNode>(CMovOp0.getNode()) ||
+      !isa<ConstantSDNode>(CMovOp1.getNode()))
     return SDValue();
 
+  SDLoc DL(N);
+
+  // If we looked through an any_extend above, add one to the constants.
+  if (N0.getValueType() != VT) {
+    CMovOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp0);
+    CMovOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp1);
+  }
+
+  CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp0, N1);
+  CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp1, N1);
+
+  return DAG.getNode(X86ISD::CMOV, DL, VT, CMovOp0, CMovOp1,
+                     N0.getOperand(2), N0.getOperand(3));
+}
+
+static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
+                                      const X86Subtarget &Subtarget) {
+  if (SDValue V = combineSextInRegCmov(N, DAG))
+    return V;
+
+  EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 205af79..2369855 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -287,8 +287,9 @@ define i16 @test15(i1 *%addr) {
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    cmpb $0, (%rdi)
-; CHECK-NEXT:    movw $-1, %ax
-; CHECK-NEXT:    cmovew %cx, %ax
+; CHECK-NEXT:    movl $65535, %eax ## imm = 0xFFFF
+; CHECK-NEXT:    cmovel %ecx, %eax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %x = load i1 , i1 * %addr, align 1
   %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 4a78632..71b17ff 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -818,9 +818,9 @@ define <16 x i1> @test15(i32 %x, i32 %y)  {
 ; KNL-LABEL: test15:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    cmpl %esi, %edi
-; KNL-NEXT:    movw $21845, %ax ## imm = 0x5555
-; KNL-NEXT:    movw $1, %cx
-; KNL-NEXT:    cmovgw %ax, %cx
+; KNL-NEXT:    movl $21845, %eax ## imm = 0x5555
+; KNL-NEXT:    movl $1, %ecx
+; KNL-NEXT:    cmovgl %eax, %ecx
 ; KNL-NEXT:    kmovw %ecx, %k1
 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
@@ -830,9 +830,9 @@ define <16 x i1> @test15(i32 %x, i32 %y)  {
 ; SKX-LABEL: test15:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    cmpl %esi, %edi
-; SKX-NEXT:    movw $21845, %ax ## imm = 0x5555
-; SKX-NEXT:    movw $1, %cx
-; SKX-NEXT:    cmovgw %ax, %cx
+; SKX-NEXT:    movl $21845, %eax ## imm = 0x5555
+; SKX-NEXT:    movl $1, %ecx
+; SKX-NEXT:    cmovgl %eax, %ecx
 ; SKX-NEXT:    kmovd %ecx, %k0
 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
 ; SKX-NEXT:    retq
@@ -840,9 +840,9 @@ define <16 x i1> @test15(i32 %x, i32 %y)  {
 ; AVX512BW-LABEL: test15:
 ; AVX512BW:       ## %bb.0:
 ; AVX512BW-NEXT:    cmpl %esi, %edi
-; AVX512BW-NEXT:    movw $21845, %ax ## imm = 0x5555
-; AVX512BW-NEXT:    movw $1, %cx
-; AVX512BW-NEXT:    cmovgw %ax, %cx
+; AVX512BW-NEXT:    movl $21845, %eax ## imm = 0x5555
+; AVX512BW-NEXT:    movl $1, %ecx
+; AVX512BW-NEXT:    cmovgl %eax, %ecx
 ; AVX512BW-NEXT:    kmovd %ecx, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
@@ -852,9 +852,9 @@ define <16 x i1> @test15(i32 %x, i32 %y)  {
 ; AVX512DQ-LABEL: test15:
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    cmpl %esi, %edi
-; AVX512DQ-NEXT:    movw $21845, %ax ## imm = 0x5555
-; AVX512DQ-NEXT:    movw $1, %cx
-; AVX512DQ-NEXT:    cmovgw %ax, %cx
+; AVX512DQ-NEXT:    movl $21845, %eax ## imm = 0x5555
+; AVX512DQ-NEXT:    movl $1, %ecx
+; AVX512DQ-NEXT:    cmovgl %eax, %ecx
 ; AVX512DQ-NEXT:    kmovw %ecx, %k0
 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index b506789..fcb5869 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -7201,10 +7201,10 @@ define <16 x i1> @vmov_test15(i32 %x, i32 %y)  {
 ; GENERIC-LABEL: vmov_test15:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
-; GENERIC-NEXT:    movw $21845, %ax # imm = 0x5555
+; GENERIC-NEXT:    movl $21845, %eax # imm = 0x5555
 ; GENERIC-NEXT:    # sched: [1:0.33]
-; GENERIC-NEXT:    movw $1, %cx # sched: [1:0.33]
-; GENERIC-NEXT:    cmovgw %ax, %cx # sched: [2:0.67]
+; GENERIC-NEXT:    movl $1, %ecx # sched: [1:0.33]
+; GENERIC-NEXT:    cmovgl %eax, %ecx # sched: [2:0.67]
 ; GENERIC-NEXT:    kmovd %ecx, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7212,10 +7212,10 @@ define <16 x i1> @vmov_test15(i32 %x, i32 %y)  {
 ; SKX-LABEL: vmov_test15:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
-; SKX-NEXT:    movw $21845, %ax # imm = 0x5555
+; SKX-NEXT:    movl $21845, %eax # imm = 0x5555
 ; SKX-NEXT:    # sched: [1:0.25]
-; SKX-NEXT:    movw $1, %cx # sched: [1:0.25]
-; SKX-NEXT:    cmovgw %ax, %cx # sched: [1:0.50]
+; SKX-NEXT:    movl $1, %ecx # sched: [1:0.25]
+; SKX-NEXT:    cmovgl %eax, %ecx # sched: [1:0.50]
 ; SKX-NEXT:    kmovd %ecx, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
diff --git a/llvm/test/CodeGen/X86/bool-simplify.ll b/llvm/test/CodeGen/X86/bool-simplify.ll
index 60931a7..bbb7eb7 100644
--- a/llvm/test/CodeGen/X86/bool-simplify.ll
+++ b/llvm/test/CodeGen/X86/bool-simplify.ll
@@ -53,7 +53,7 @@ define i16 @rnd16(i16 %arg) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    rdrandw %cx
-; CHECK-NEXT:    cmovbw %di, %ax
+; CHECK-NEXT:    cmovbl %edi, %eax
 ; CHECK-NEXT:    addl %ecx, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
@@ -105,7 +105,7 @@ define i16 @seed16(i16 %arg) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    rdseedw %cx
-; CHECK-NEXT:    cmovbw %di, %ax
+; CHECK-NEXT:    cmovbl %edi, %eax
 ; CHECK-NEXT:    addl %ecx, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index e1c0703..4865d5b 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -938,8 +938,8 @@ define void @clamp(i32 %src, i16* %dst) {
 ; GENERIC-NEXT:    movl $32767, %eax ## imm = 0x7FFF
 ; GENERIC-NEXT:    cmovlel %edi, %eax
 ; GENERIC-NEXT:    cmpl $-32768, %eax ## imm = 0x8000
-; GENERIC-NEXT:    movw $-32768, %cx ## imm = 0x8000
-; GENERIC-NEXT:    cmovgew %ax, %cx
+; GENERIC-NEXT:    movl $32768, %ecx ## imm = 0x8000
+; GENERIC-NEXT:    cmovgel %eax, %ecx
 ; GENERIC-NEXT:    movw %cx, (%rsi)
 ; GENERIC-NEXT:    retq
 ;
@@ -948,9 +948,9 @@ define void @clamp(i32 %src, i16* %dst) {
 ; ATOM-NEXT:    cmpl $32767, %edi ## imm = 0x7FFF
 ; ATOM-NEXT:    movl $32767, %eax ## imm = 0x7FFF
 ; ATOM-NEXT:    cmovlel %edi, %eax
-; ATOM-NEXT:    movw $-32768, %cx ## imm = 0x8000
+; ATOM-NEXT:    movl $32768, %ecx ## imm = 0x8000
 ; ATOM-NEXT:    cmpl $-32768, %eax ## imm = 0x8000
-; ATOM-NEXT:    cmovgew %ax, %cx
+; ATOM-NEXT:    cmovgel %eax, %ecx
 ; ATOM-NEXT:    movw %cx, (%rsi)
 ; ATOM-NEXT:    retq
 ;
@@ -963,7 +963,7 @@ define void @clamp(i32 %src, i16* %dst) {
 ; MCU-NEXT:    movl %eax, %ecx
 ; MCU-NEXT:  .LBB23_2:
 ; MCU-NEXT:    cmpl $-32768, %ecx # imm = 0x8000
-; MCU-NEXT:    movw $-32768, %ax # imm = 0x8000
+; MCU-NEXT:    movl $32768, %eax # imm = 0x8000
 ; MCU-NEXT:    jl .LBB23_4
 ; MCU-NEXT:  # %bb.3:
 ; MCU-NEXT:    movl %ecx, %eax
@@ -1063,8 +1063,8 @@ define i16 @select_xor_1(i16 %A, i8 %cond) {
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    xorl $43, %eax
 ; CHECK-NEXT:    testb $1, %sil
-; CHECK-NEXT:    cmovnew %ax, %di
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmovel %edi, %eax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
 ;
 ; MCU-LABEL: select_xor_1:
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index a9f5b68..00fa427 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -73,7 +73,7 @@ define void @pr26232(i64 %a, <16 x i1> %b) {
 ; KNL-32-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; KNL-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; KNL-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; KNL-32-NEXT:    movw $-1, %dx
+; KNL-32-NEXT:    movl $65535, %edx # imm = 0xFFFF
 ; KNL-32-NEXT:    .p2align 4, 0x90
 ; KNL-32-NEXT:  .LBB1_1: # %for_loop599
 ; KNL-32-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -81,7 +81,7 @@ define void @pr26232(i64 %a, <16 x i1> %b) {
 ; KNL-32-NEXT:    movl %eax, %esi
 ; KNL-32-NEXT:    sbbl $0, %esi
 ; KNL-32-NEXT:    movl $0, %esi
-; KNL-32-NEXT:    cmovlw %dx, %si
+; KNL-32-NEXT:    cmovll %edx, %esi
 ; KNL-32-NEXT:    kmovw %esi, %k1
 ; KNL-32-NEXT:    kandw %k0, %k1, %k1
 ; KNL-32-NEXT:    kortestw %k1, %k1
-- 
2.7.4