From 8d0c8c9be158d2c83864c3124f258f8790476602 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 8 Feb 2018 08:29:43 +0000 Subject: [PATCH] [X86] Support folding in a k-register OR when creating KORTEST from scalar compare of a bitcast from vXi1. This should allow us to remove the kortest intrinsic from IR and use compare+bitcast+or in IR instead. llvm-svn: 324580 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +++++++++- llvm/test/CodeGen/X86/avx512-mask-op.ll | 12 ++++-------- llvm/test/CodeGen/X86/avx512-schedule.ll | 14 +++++--------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 500a1d3..5aaf241 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18149,7 +18149,15 @@ static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, } else return SDValue(); - SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, Op0, Op0); + // If the input is an OR, we can combine it's operands into the KORTEST. + SDValue LHS = Op0; + SDValue RHS = Op0; + if (Op0.getOpcode() == ISD::OR && Op0.hasOneUse( && Op0.hasOneUse())) { + LHS = Op0.getOperand(0); + RHS = Op0.getOperand(1); + } + + SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); return getSETCC(X86CC, KORTEST, dl, DAG); } diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 4766c8b..6953cd9 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -583,8 +583,7 @@ define void @test7(<8 x i1> %mask) { ; SKX-NEXT: vpmovw2m %xmm0, %k0 ; SKX-NEXT: movb $85, %al ; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: kortestb %k0, %k0 +; SKX-NEXT: kortestb %k1, %k0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test7: @@ -606,8 +605,7 @@ define void @test7(<8 x i1> %mask) { ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: movb $85, %al ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: korb %k1, %k0, %k0 -; AVX512DQ-NEXT: kortestb %k0, %k0 +; AVX512DQ-NEXT: kortestb %k1, %k0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq allocas: @@ -1787,8 +1785,7 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 ; SKX-NEXT: kunpckwd %k1, %k2, %k1 -; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: kortestd %k0, %k0 +; SKX-NEXT: kortestd %k1, %k0 ; SKX-NEXT: je LBB43_2 ; SKX-NEXT: ## %bb.1: ## %L1 ; SKX-NEXT: vmovaps %zmm0, (%rdi) @@ -1813,8 +1810,7 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1 ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1 -; AVX512BW-NEXT: kord %k1, %k0, %k0 -; AVX512BW-NEXT: kortestd %k0, %k0 +; AVX512BW-NEXT: kortestd %k1, %k0 ; AVX512BW-NEXT: je LBB43_2 ; AVX512BW-NEXT: ## %bb.1: ## %L1 ; AVX512BW-NEXT: vmovaps %zmm0, (%rdi) diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 69d0a62..1055570 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -7030,8 +7030,7 @@ define void @vcmp_test7(<8 x i1> %mask) { ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: movb $85, %al # sched: [1:0.33] ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] -; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kortestb %k1, %k0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vcmp_test7: @@ -7040,8 +7039,7 @@ define void @vcmp_test7(<8 x i1> %mask) { ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: movb $85, %al # sched: [1:0.25] ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] -; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00] +; SKX-NEXT: kortestb %k1, %k0 # sched: [3:1.00] ; SKX-NEXT: retq # sched: [7:1.00] allocas: %a= or <8 x i1> %mask, @@ -7683,8 +7681,7 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] ; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kortestd %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:1.00] ; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.1: # %L1 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] @@ -7703,14 +7700,13 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; SKX-NEXT: vmovups 64(%rdi), %zmm3 # sched: [8:0.50] ; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] ; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] +; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00] ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50] ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00] ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] ; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00] -; SKX-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kortestd %k0, %k0 # sched: [3:1.00] +; SKX-NEXT: kortestd %k1, %k0 # sched: [3:1.00] ; SKX-NEXT: je .LBB411_2 # sched: [1:0.50] ; SKX-NEXT: # %bb.1: # %L1 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -- 2.7.4