From: Craig Topper Date: Sun, 28 Feb 2021 16:20:57 +0000 (-0800) Subject: [DAGCombiner] Don't skip no overflow check on UMULO if the first computeKnownBits... X-Git-Tag: llvmorg-14-init~13820 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ca5247bb1770a1dfa56b78303d99f1cc9a0a06ee;p=platform%2Fupstream%2Fllvm.git [DAGCombiner] Don't skip no overflow check on UMULO if the first computeKnownBits call doesn't return any 0 bits. Even if the first computeKnownBits call doesn't have any zero bits it is possible the other operand has bitwidth-1 leading zero. In that case overflow is still impossible. So always call computeKnownBits for both operands. --- diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 686c7a4..01b9873 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4672,14 +4672,12 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); } else { KnownBits N1Known = DAG.computeKnownBits(N1); - if (N1Known.Zero.getBoolValue()) { - KnownBits N0Known = DAG.computeKnownBits(N0); - bool Overflow; - (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow); - if (!Overflow) - return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1), - DAG.getConstant(0, DL, CarryVT)); - } + KnownBits N0Known = DAG.computeKnownBits(N0); + bool Overflow; + (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow); + if (!Overflow) + return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1), + DAG.getConstant(0, DL, CarryVT)); } return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll index c84c76f..d703e76 100644 --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -291,23 +291,18 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind { ; CHECK-LABEL: umulo_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4h, #1 -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: mul v1.4h, v0.4h, v1.4h -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: umov w8, v1.h[0] +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: umov w8, v0.h[0] ; CHECK-NEXT: and w9, w9, #0x1 ; CHECK-NEXT: bfi w8, w9, #1, #1 -; CHECK-NEXT: umov w9, v1.h[2] +; CHECK-NEXT: umov w9, v0.h[2] ; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: ushr v0.4h, v1.4h, #1 ; CHECK-NEXT: bfi w8, w9, #2, #1 -; CHECK-NEXT: umov w9, v1.h[3] -; CHECK-NEXT: cmtst v0.4h, v0.4h, v0.4h +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: bfi w8, w9, #3, #29 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: and w8, w8, #0xf +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll index 7f9f394..5d29e20 100644 --- a/llvm/test/CodeGen/X86/vec_umulo.ll +++ b/llvm/test/CodeGen/X86/vec_umulo.ll @@ -3172,240 +3172,40 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind { ; SSE-LABEL: umulo_v4i1: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: pmaddwd %xmm1, %xmm0 -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: psrld $1, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm2 -; SSE-NEXT: pcmpeqd %xmm2, %xmm1 -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: pand %xmm1, %xmm0 ; SSE-NEXT: pslld $31, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax ; SSE-NEXT: movb %al, (%rdi) -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: umulo_v4i1: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpsrld $1, %xmm1, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX1-NEXT: vmovmskps %xmm1, %eax -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: retq -; -; AVX2-LABEL: umulo_v4i1: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpsrld $1, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX2-NEXT: vmovmskps %xmm1, %eax -; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: retq +; AVX-LABEL: umulo_v4i1: +; AVX: # %bb.0: +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX-NEXT: vmovmskps %xmm0, %eax +; AVX-NEXT: movb %al, (%rdi) +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq ; ; AVX512F-LABEL: umulo_v4i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbx +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k0 -; AVX512F-NEXT: kshiftrw $3, %k0, %k1 -; AVX512F-NEXT: kmovw %k1, %r8d -; AVX512F-NEXT: andb $1, %r8b -; AVX512F-NEXT: vpslld $31, %xmm1, %xmm0 -; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k1 -; AVX512F-NEXT: kshiftrw $3, %k1, %k2 -; AVX512F-NEXT: kmovw %k2, %r9d -; AVX512F-NEXT: andb $1, %r9b -; AVX512F-NEXT: kshiftrw $2, %k0, %k2 -; AVX512F-NEXT: kmovw %k2, %r10d -; AVX512F-NEXT: andb $1, %r10b -; AVX512F-NEXT: kshiftrw $2, %k1, %k2 -; AVX512F-NEXT: kmovw %k2, %r11d -; AVX512F-NEXT: andb $1, %r11b -; AVX512F-NEXT: kshiftrw $1, %k0, %k2 -; AVX512F-NEXT: kmovw %k2, %ecx -; AVX512F-NEXT: andb $1, %cl -; AVX512F-NEXT: kshiftrw $1, %k1, %k2 -; AVX512F-NEXT: kmovw %k2, %edx -; AVX512F-NEXT: andb $1, %dl -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: andb $1, %al -; AVX512F-NEXT: kmovw %k1, %esi -; AVX512F-NEXT: andb $1, %sil -; AVX512F-NEXT: movw $-3, %bx -; AVX512F-NEXT: kmovw %ebx, %k0 -; AVX512F-NEXT: # kill: def $al killed $al killed $eax -; AVX512F-NEXT: mulb %sil -; AVX512F-NEXT: movl %eax, %esi -; AVX512F-NEXT: testb $2, %al -; AVX512F-NEXT: setne %al -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k0, %k1, %k1 -; AVX512F-NEXT: movl %ecx, %eax -; AVX512F-NEXT: mulb %dl -; AVX512F-NEXT: movl %eax, %ecx -; AVX512F-NEXT: testb $2, %al -; AVX512F-NEXT: setne %al -; AVX512F-NEXT: kmovw %eax, %k2 -; AVX512F-NEXT: kshiftlw $15, %k2, %k2 -; AVX512F-NEXT: kshiftrw $14, %k2, %k2 -; AVX512F-NEXT: korw %k2, %k1, %k2 -; AVX512F-NEXT: movw $-5, %ax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kandw %k1, %k2, %k2 -; AVX512F-NEXT: movl %r10d, %eax -; AVX512F-NEXT: mulb %r11b -; AVX512F-NEXT: movl %eax, %edx -; AVX512F-NEXT: testb $2, %al -; AVX512F-NEXT: setne %al -; AVX512F-NEXT: kmovw %eax, %k3 -; AVX512F-NEXT: kshiftlw $2, %k3, %k3 -; AVX512F-NEXT: korw %k3, %k2, %k2 -; AVX512F-NEXT: kshiftlw $13, %k2, %k2 -; AVX512F-NEXT: kshiftrw $13, %k2, %k2 -; AVX512F-NEXT: movl %r8d, %eax -; AVX512F-NEXT: mulb %r9b -; AVX512F-NEXT: # kill: def $al killed $al def $eax -; AVX512F-NEXT: testb $2, %al -; AVX512F-NEXT: setne %bl -; AVX512F-NEXT: kmovw %ebx, %k3 -; AVX512F-NEXT: kshiftlw $3, %k3, %k3 -; AVX512F-NEXT: korw %k3, %k2, %k2 -; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z} -; AVX512F-NEXT: andl $1, %esi -; AVX512F-NEXT: kmovw %esi, %k2 -; AVX512F-NEXT: kandw %k0, %k2, %k0 -; AVX512F-NEXT: kmovw %ecx, %k2 -; AVX512F-NEXT: kshiftlw $15, %k2, %k2 -; AVX512F-NEXT: kshiftrw $14, %k2, %k2 -; AVX512F-NEXT: korw %k2, %k0, %k0 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %edx, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $13, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 -; AVX512F-NEXT: movw $-9, %cx -; AVX512F-NEXT: kmovw %ecx, %k1 -; AVX512F-NEXT: kandw %k1, %k0, %k0 -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $12, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: movb %al, (%rdi) -; AVX512F-NEXT: popq %rbx +; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: umulo_v4i1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: pushq %rbx +; AVX512BW-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k0 -; AVX512BW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512BW-NEXT: kmovd %k1, %r8d -; AVX512BW-NEXT: andb $1, %r8b -; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0 -; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k1 -; AVX512BW-NEXT: kshiftrw $3, %k1, %k2 -; AVX512BW-NEXT: kmovd %k2, %r9d -; AVX512BW-NEXT: andb $1, %r9b -; AVX512BW-NEXT: kshiftrw $2, %k0, %k2 -; AVX512BW-NEXT: kmovd %k2, %r10d -; AVX512BW-NEXT: andb $1, %r10b -; AVX512BW-NEXT: kshiftrw $2, %k1, %k2 -; AVX512BW-NEXT: kmovd %k2, %r11d -; AVX512BW-NEXT: andb $1, %r11b -; AVX512BW-NEXT: kshiftrw $1, %k0, %k2 -; AVX512BW-NEXT: kmovd %k2, %ecx -; AVX512BW-NEXT: andb $1, %cl -; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512BW-NEXT: kmovd %k2, %edx -; AVX512BW-NEXT: andb $1, %dl -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: andb $1, %al -; AVX512BW-NEXT: kmovd %k1, %esi -; AVX512BW-NEXT: andb $1, %sil -; AVX512BW-NEXT: movw $-3, %bx -; AVX512BW-NEXT: kmovd %ebx, %k0 -; AVX512BW-NEXT: # kill: def $al killed $al killed $eax -; AVX512BW-NEXT: mulb %sil -; AVX512BW-NEXT: movl %eax, %esi -; AVX512BW-NEXT: testb $2, %al -; AVX512BW-NEXT: setne %al -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kandw %k0, %k1, %k1 -; AVX512BW-NEXT: movl %ecx, %eax -; AVX512BW-NEXT: mulb %dl -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: testb $2, %al -; AVX512BW-NEXT: setne %al -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kshiftlw $15, %k2, %k2 -; AVX512BW-NEXT: kshiftrw $14, %k2, %k2 -; AVX512BW-NEXT: korw %k2, %k1, %k2 -; AVX512BW-NEXT: movw $-5, %ax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kandw %k1, %k2, %k2 -; AVX512BW-NEXT: movl %r10d, %eax -; AVX512BW-NEXT: mulb %r11b -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: testb $2, %al -; AVX512BW-NEXT: setne %al -; AVX512BW-NEXT: kmovd %eax, %k3 -; AVX512BW-NEXT: kshiftlw $2, %k3, %k3 -; AVX512BW-NEXT: korw %k3, %k2, %k2 -; AVX512BW-NEXT: kshiftlw $13, %k2, %k2 -; AVX512BW-NEXT: kshiftrw $13, %k2, %k2 -; AVX512BW-NEXT: movl %r8d, %eax -; AVX512BW-NEXT: mulb %r9b -; AVX512BW-NEXT: # kill: def $al killed $al def $eax -; AVX512BW-NEXT: testb $2, %al -; AVX512BW-NEXT: setne %bl -; AVX512BW-NEXT: kmovd %ebx, %k3 -; AVX512BW-NEXT: kshiftlw $3, %k3, %k3 -; AVX512BW-NEXT: korw %k3, %k2, %k2 -; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z} -; AVX512BW-NEXT: andl $1, %esi -; AVX512BW-NEXT: kmovw %esi, %k2 -; AVX512BW-NEXT: kandw %k0, %k2, %k0 -; AVX512BW-NEXT: kmovd %ecx, %k2 -; AVX512BW-NEXT: kshiftlw $15, %k2, %k2 -; AVX512BW-NEXT: kshiftrw $14, %k2, %k2 -; AVX512BW-NEXT: korw %k2, %k0, %k0 -; AVX512BW-NEXT: kandw %k1, %k0, %k0 -; AVX512BW-NEXT: kmovd %edx, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512BW-NEXT: korw %k1, %k0, %k0 -; AVX512BW-NEXT: movw $-9, %cx -; AVX512BW-NEXT: kmovd %ecx, %k1 -; AVX512BW-NEXT: kandw %k1, %k0, %k0 -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) -; AVX512BW-NEXT: popq %rbx +; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: retq %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) %val = extractvalue {<4 x i1>, <4 x i1>} %t, 0