From ac1b999e855ed9ab5908842be6c1e4cbd246d3bc Mon Sep 17 00:00:00 2001 From: "chenglin.bi" Date: Mon, 21 Nov 2022 22:10:55 +0800 Subject: [PATCH] [DAGCombiner] fold or (and x, y), x --> x Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D138398 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++ llvm/test/CodeGen/X86/avx512-select.ll | 32 +++++--------------- llvm/test/CodeGen/X86/bswap_tree2.ll | 22 ++++++-------- llvm/test/CodeGen/X86/known-signbits-vector.ll | 42 ++++++-------------------- 4 files changed, 31 insertions(+), 69 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0457e1d..8989ea3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6988,6 +6988,10 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); + // fold or (and x, y), x --> x + if (N00 == N1 || N01 == N1) + return N1; + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) // TODO: Set AllowUndefs = true. if (getBitwiseNotOperand(N01, N00, diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index 4c7633f..536c667 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -742,12 +742,8 @@ define void @select_v1i1(ptr %w, ptr %x, ptr %y, i1 %z) nounwind { define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) { ; X86-AVX512F-LABEL: julia_issue36955: ; X86-AVX512F: # %bb.0: -; X86-AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 -; X86-AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 -; X86-AVX512F-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; X86-AVX512F-NEXT: vcmplepd %zmm2, %zmm1, %k1 -; X86-AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; X86-AVX512F-NEXT: korw %k0, %k1, %k0 +; X86-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; X86-AVX512F-NEXT: vcmplepd %zmm0, %zmm1, %k0 ; X86-AVX512F-NEXT: kmovw %k0, %eax ; X86-AVX512F-NEXT: # kill: def $al killed $al killed $eax ; X86-AVX512F-NEXT: vzeroupper @@ -755,12 +751,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) { ; ; X64-AVX512F-LABEL: julia_issue36955: ; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 -; X64-AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 -; X64-AVX512F-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; X64-AVX512F-NEXT: vcmplepd %zmm2, %zmm1, %k1 -; X64-AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; X64-AVX512F-NEXT: korw %k0, %k1, %k0 +; X64-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; X64-AVX512F-NEXT: vcmplepd %zmm0, %zmm1, %k0 ; X64-AVX512F-NEXT: kmovw %k0, %eax ; X64-AVX512F-NEXT: # kill: def $al killed $al killed $eax ; X64-AVX512F-NEXT: vzeroupper @@ -768,12 +760,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) { ; ; X86-AVX512BW-LABEL: julia_issue36955: ; X86-AVX512BW: # %bb.0: -; X86-AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 -; X86-AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X86-AVX512BW-NEXT: vxorpd %xmm3, %xmm3, %xmm3 -; X86-AVX512BW-NEXT: vcmplepd %zmm3, %zmm1, %k1 -; X86-AVX512BW-NEXT: vpcmpgtw %zmm0, %zmm2, %k0 {%k1} -; X86-AVX512BW-NEXT: korw %k0, %k1, %k0 +; X86-AVX512BW-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; X86-AVX512BW-NEXT: vcmplepd %zmm0, %zmm1, %k0 ; X86-AVX512BW-NEXT: kmovd %k0, %eax ; X86-AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; X86-AVX512BW-NEXT: vzeroupper @@ -781,12 +769,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) { ; ; X64-AVX512BW-LABEL: julia_issue36955: ; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 -; X64-AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-AVX512BW-NEXT: vxorpd %xmm3, %xmm3, %xmm3 -; X64-AVX512BW-NEXT: vcmplepd %zmm3, %zmm1, %k1 -; X64-AVX512BW-NEXT: vpcmpgtw %zmm0, %zmm2, %k0 {%k1} -; X64-AVX512BW-NEXT: korw %k0, %k1, %k0 +; X64-AVX512BW-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: vcmplepd %zmm0, %zmm1, %k0 ; X64-AVX512BW-NEXT: kmovd %k0, %eax ; X64-AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; X64-AVX512BW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/bswap_tree2.ll b/llvm/test/CodeGen/X86/bswap_tree2.ll index ead7f4b..98b5146 100644 --- a/llvm/test/CodeGen/X86/bswap_tree2.ll +++ b/llvm/test/CodeGen/X86/bswap_tree2.ll @@ -10,23 +10,21 @@ ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movzwl %ax, %ecx -; CHECK-NEXT: orl %eax, %ecx -; CHECK-NEXT: orl $-16777216, %ecx # imm = 0xFF000000 -; CHECK-NEXT: shrl $8, %ecx -; CHECK-NEXT: andl $16711935, %eax # imm = 0xFF00FF -; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: andl $16711935, %ecx # imm = 0xFF00FF +; CHECK-NEXT: shll $8, %ecx +; CHECK-NEXT: orl $-16777216, %eax # imm = 0xFF000000 +; CHECK-NEXT: shrl $8, %eax ; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: -; CHECK64-NEXT: movzwl %di, %eax -; CHECK64-NEXT: orl %edi, %eax -; CHECK64-NEXT: orl $-16777216, %eax # imm = 0xFF000000 -; CHECK64-NEXT: shrl $8, %eax -; CHECK64-NEXT: andl $16711935, %edi # imm = 0xFF00FF -; CHECK64-NEXT: shll $8, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: andl $16711935, %eax # imm = 0xFF00FF +; CHECK64-NEXT: shll $8, %eax +; CHECK64-NEXT: orl $-16777216, %edi # imm = 0xFF000000 +; CHECK64-NEXT: shrl $8, %edi ; CHECK64-NEXT: orl %edi, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index 1eb1838..25d26372 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -371,14 +371,8 @@ define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4 ; X86-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: ; X86: # %bb.0: ; X86-NEXT: pushl %eax -; X86-NEXT: vpsrlq $60, %xmm0, %xmm2 -; X86-NEXT: vpsrlq $61, %xmm0, %xmm0 -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X86-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,8,0] -; X86-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; X86-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; X86-NEXT: vpand %xmm1, %xmm0, %xmm2 -; X86-NEXT: vpor %xmm1, %xmm2, %xmm1 +; X86-NEXT: vpsrad $29, %xmm0, %xmm0 +; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; X86-NEXT: vpxor %xmm0, %xmm1, %xmm0 ; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) @@ -386,31 +380,13 @@ define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4 ; X86-NEXT: popl %eax ; X86-NEXT: retl ; -; X64-AVX1-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vpsrlq $60, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpsrlq $61, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8] -; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 -; X64-AVX1-NEXT: vpxor %xmm0, %xmm1, %xmm0 -; X64-AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8] -; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpor %xmm1, %xmm2, %xmm1 -; X64-AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 -; X64-AVX2-NEXT: retq +; X64-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: +; X64: # %bb.0: +; X64-NEXT: vpsrad $29, %xmm0, %xmm0 +; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X64-NEXT: vpxor %xmm0, %xmm1, %xmm0 +; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 +; X64-NEXT: retq %1 = ashr <2 x i64> %a0, %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> %3 = sext <2 x i32> %2 to <2 x i64> -- 2.7.4