From 010ae8dcbbd8861f4e9f6883218b2e51c3163b9c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 20 Feb 2018 17:41:00 +0000 Subject: [PATCH] [X86] Promote 16-bit cmovs to 32-bits This allows us to avoid an opsize prefix. And forcing some move immediates to i32 avoids a length changing prefix on those instructions. This mostly replaces the existing combine we had for zext/sext+cmov of constants. I left in a case for sign extending a 32 bit cmov of constants to 64 bits. Differential Revision: https://reviews.llvm.org/D43327 llvm-svn: 325601 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 57 ++++++++++++++++++++++++-- llvm/test/CodeGen/X86/avx512-insert-extract.ll | 5 ++- llvm/test/CodeGen/X86/avx512-mask-op.ll | 24 +++++------ llvm/test/CodeGen/X86/avx512-schedule.ll | 12 +++--- llvm/test/CodeGen/X86/bool-simplify.ll | 4 +- llvm/test/CodeGen/X86/select.ll | 14 +++---- llvm/test/CodeGen/X86/setcc-lowering.ll | 4 +- 7 files changed, 86 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c6916fd..2a63c9a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18713,6 +18713,15 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // Promote i16 cmovs if it won't prevent folding a load. + if (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && !MayFoldLoad(Op2)) { + Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); + Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); + SDValue Ops[] = { Op2, Op1, CC, Cond }; + SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops); + return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); + } + // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. SDValue Ops[] = { Op2, Op1, CC, Cond }; @@ -35935,12 +35944,54 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +// Try to combine sext_in_reg of a cmov of constants by extending the constants. +static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - if (!VT.isVector()) + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT ExtraVT = cast(N1)->getVT(); + + if (ExtraVT != MVT::i16) + return SDValue(); + + // Look through single use any_extends. + if (N0.getOpcode() == ISD::ANY_EXTEND && N0.hasOneUse()) + N0 = N0.getOperand(0); + + // See if we have a single use cmov. + if (N0.getOpcode() != X86ISD::CMOV || !N0.hasOneUse()) + return SDValue(); + + SDValue CMovOp0 = N0.getOperand(0); + SDValue CMovOp1 = N0.getOperand(1); + + // Make sure both operands are constants. + if (!isa(CMovOp0.getNode()) || + !isa(CMovOp1.getNode())) return SDValue(); + SDLoc DL(N); + + // If we looked through an any_extend above, add one to the constants. + if (N0.getValueType() != VT) { + CMovOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp0); + CMovOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp1); + } + + CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp0, N1); + CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp1, N1); + + return DAG.getNode(X86ISD::CMOV, DL, VT, CMovOp0, CMovOp1, + N0.getOperand(2), N0.getOperand(3)); +} + +static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + if (SDValue V = combineSextInRegCmov(N, DAG)) + return V; + + EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT ExtraVT = cast(N1)->getVT(); diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 205af79..2369855 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -287,8 +287,9 @@ define i16 @test15(i1 *%addr) { ; CHECK: ## %bb.0: ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: cmpb $0, (%rdi) -; CHECK-NEXT: movw $-1, %ax -; CHECK-NEXT: cmovew %cx, %ax +; CHECK-NEXT: movl $65535, %eax ## imm = 0xFFFF +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %x = load i1 , i1 * %addr, align 1 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 4a78632..71b17ff 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -818,9 +818,9 @@ define <16 x i1> @test15(i32 %x, i32 %y) { ; KNL-LABEL: test15: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: movw $21845, %ax ## imm = 0x5555 -; KNL-NEXT: movw $1, %cx -; KNL-NEXT: cmovgw %ax, %cx +; KNL-NEXT: movl $21845, %eax ## imm = 0x5555 +; KNL-NEXT: movl $1, %ecx +; KNL-NEXT: cmovgl %eax, %ecx ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 @@ -830,9 +830,9 @@ define <16 x i1> @test15(i32 %x, i32 %y) { ; SKX-LABEL: test15: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: movw $21845, %ax ## imm = 0x5555 -; SKX-NEXT: movw $1, %cx -; SKX-NEXT: cmovgw %ax, %cx +; SKX-NEXT: movl $21845, %eax ## imm = 0x5555 +; SKX-NEXT: movl $1, %ecx +; SKX-NEXT: cmovgl %eax, %ecx ; SKX-NEXT: kmovd %ecx, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: retq @@ -840,9 +840,9 @@ define <16 x i1> @test15(i32 %x, i32 %y) { ; AVX512BW-LABEL: test15: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: movw $21845, %ax ## imm = 0x5555 -; AVX512BW-NEXT: movw $1, %cx -; AVX512BW-NEXT: cmovgw %ax, %cx +; AVX512BW-NEXT: movl $21845, %eax ## imm = 0x5555 +; AVX512BW-NEXT: movl $1, %ecx +; AVX512BW-NEXT: cmovgl %eax, %ecx ; AVX512BW-NEXT: kmovd %ecx, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 @@ -852,9 +852,9 @@ define <16 x i1> @test15(i32 %x, i32 %y) { ; AVX512DQ-LABEL: test15: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: movw $21845, %ax ## imm = 0x5555 -; AVX512DQ-NEXT: movw $1, %cx -; AVX512DQ-NEXT: cmovgw %ax, %cx +; AVX512DQ-NEXT: movl $21845, %eax ## imm = 0x5555 +; AVX512DQ-NEXT: movl $1, %ecx +; AVX512DQ-NEXT: cmovgl %eax, %ecx ; AVX512DQ-NEXT: kmovw %ecx, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index b506789..fcb5869 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -7201,10 +7201,10 @@ define <16 x i1> @vmov_test15(i32 %x, i32 %y) { ; GENERIC-LABEL: vmov_test15: ; GENERIC: # %bb.0: ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: movw $21845, %ax # imm = 0x5555 +; GENERIC-NEXT: movl $21845, %eax # imm = 0x5555 ; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: movw $1, %cx # sched: [1:0.33] -; GENERIC-NEXT: cmovgw %ax, %cx # sched: [2:0.67] +; GENERIC-NEXT: movl $1, %ecx # sched: [1:0.33] +; GENERIC-NEXT: cmovgl %eax, %ecx # sched: [2:0.67] ; GENERIC-NEXT: kmovd %ecx, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7212,10 +7212,10 @@ define <16 x i1> @vmov_test15(i32 %x, i32 %y) { ; SKX-LABEL: vmov_test15: ; SKX: # %bb.0: ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: movw $21845, %ax # imm = 0x5555 +; SKX-NEXT: movl $21845, %eax # imm = 0x5555 ; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: movw $1, %cx # sched: [1:0.25] -; SKX-NEXT: cmovgw %ax, %cx # sched: [1:0.50] +; SKX-NEXT: movl $1, %ecx # sched: [1:0.25] +; SKX-NEXT: cmovgl %eax, %ecx # sched: [1:0.50] ; SKX-NEXT: kmovd %ecx, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] diff --git a/llvm/test/CodeGen/X86/bool-simplify.ll b/llvm/test/CodeGen/X86/bool-simplify.ll index 60931a7..bbb7eb7 100644 --- a/llvm/test/CodeGen/X86/bool-simplify.ll +++ b/llvm/test/CodeGen/X86/bool-simplify.ll @@ -53,7 +53,7 @@ define i16 @rnd16(i16 %arg) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: rdrandw %cx -; CHECK-NEXT: cmovbw %di, %ax +; CHECK-NEXT: cmovbl %edi, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq @@ -105,7 +105,7 @@ define i16 @seed16(i16 %arg) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: rdseedw %cx -; CHECK-NEXT: cmovbw %di, %ax +; CHECK-NEXT: cmovbl %edi, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index e1c0703..4865d5b 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -938,8 +938,8 @@ define void @clamp(i32 %src, i16* %dst) { ; GENERIC-NEXT: movl $32767, %eax ## imm = 0x7FFF ; GENERIC-NEXT: cmovlel %edi, %eax ; GENERIC-NEXT: cmpl $-32768, %eax ## imm = 0x8000 -; GENERIC-NEXT: movw $-32768, %cx ## imm = 0x8000 -; GENERIC-NEXT: cmovgew %ax, %cx +; GENERIC-NEXT: movl $32768, %ecx ## imm = 0x8000 +; GENERIC-NEXT: cmovgel %eax, %ecx ; GENERIC-NEXT: movw %cx, (%rsi) ; GENERIC-NEXT: retq ; @@ -948,9 +948,9 @@ define void @clamp(i32 %src, i16* %dst) { ; ATOM-NEXT: cmpl $32767, %edi ## imm = 0x7FFF ; ATOM-NEXT: movl $32767, %eax ## imm = 0x7FFF ; ATOM-NEXT: cmovlel %edi, %eax -; ATOM-NEXT: movw $-32768, %cx ## imm = 0x8000 +; ATOM-NEXT: movl $32768, %ecx ## imm = 0x8000 ; ATOM-NEXT: cmpl $-32768, %eax ## imm = 0x8000 -; ATOM-NEXT: cmovgew %ax, %cx +; ATOM-NEXT: cmovgel %eax, %ecx ; ATOM-NEXT: movw %cx, (%rsi) ; ATOM-NEXT: retq ; @@ -963,7 +963,7 @@ define void @clamp(i32 %src, i16* %dst) { ; MCU-NEXT: movl %eax, %ecx ; MCU-NEXT: .LBB23_2: ; MCU-NEXT: cmpl $-32768, %ecx # imm = 0x8000 -; MCU-NEXT: movw $-32768, %ax # imm = 0x8000 +; MCU-NEXT: movl $32768, %eax # imm = 0x8000 ; MCU-NEXT: jl .LBB23_4 ; MCU-NEXT: # %bb.3: ; MCU-NEXT: movl %ecx, %eax @@ -1063,8 +1063,8 @@ define i16 @select_xor_1(i16 %A, i8 %cond) { ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: xorl $43, %eax ; CHECK-NEXT: testb $1, %sil -; CHECK-NEXT: cmovnew %ax, %di -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmovel %edi, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_xor_1: diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll index a9f5b68..00fa427 100644 --- a/llvm/test/CodeGen/X86/setcc-lowering.ll +++ b/llvm/test/CodeGen/X86/setcc-lowering.ll @@ -73,7 +73,7 @@ define void @pr26232(i64 %a, <16 x i1> %b) { ; KNL-32-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; KNL-32-NEXT: movw $-1, %dx +; KNL-32-NEXT: movl $65535, %edx # imm = 0xFFFF ; KNL-32-NEXT: .p2align 4, 0x90 ; KNL-32-NEXT: .LBB1_1: # %for_loop599 ; KNL-32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -81,7 +81,7 @@ define void @pr26232(i64 %a, <16 x i1> %b) { ; KNL-32-NEXT: movl %eax, %esi ; KNL-32-NEXT: sbbl $0, %esi ; KNL-32-NEXT: movl $0, %esi -; KNL-32-NEXT: cmovlw %dx, %si +; KNL-32-NEXT: cmovll %edx, %esi ; KNL-32-NEXT: kmovw %esi, %k1 ; KNL-32-NEXT: kandw %k0, %k1, %k1 ; KNL-32-NEXT: kortestw %k1, %k1 -- 2.7.4