From: David Bolvansky Date: Sat, 29 Sep 2018 21:00:37 +0000 (+0000) Subject: [DAGCombiner][NFC] Tests for X div/rem Y single bit fold X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=09fd8172df9c859d0a18a2a1e7361cd9c75135b1;p=platform%2Fupstream%2Fllvm.git [DAGCombiner][NFC] Tests for X div/rem Y single bit fold llvm-svn: 343392 --- diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index 07a9dfa..72d458c 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -3284,3 +3284,327 @@ define <16 x i8> @pr38658(<16 x i8> %x) { %1 = sdiv <16 x i8> %x, ret <16 x i8> %1 } + +define i1 @bool_sdiv(i1 %x, i1 %y) { +; CHECK-LABEL: bool_sdiv: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: negb %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: cbtw +; CHECK-NEXT: andb $1, %sil +; CHECK-NEXT: negb %sil +; CHECK-NEXT: idivb %sil +; CHECK-NEXT: retq + %r = sdiv i1 %x, %y + ret i1 %r +} + +define <4 x i1> @boolvec_sdiv(<4 x i1> %x, <4 x i1> %y) { +; SSE2-LABEL: boolvec_sdiv: +; SSE2: # %bb.0: +; SSE2-NEXT: pslld $31, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pslld $31, %xmm0 +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] +; SSE2-NEXT: movd %xmm2, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] +; SSE2-NEXT: movd %xmm2, %ecx +; SSE2-NEXT: cltd +; SSE2-NEXT: idivl %ecx +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] +; SSE2-NEXT: movd %xmm3, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; SSE2-NEXT: movd %xmm3, %ecx +; SSE2-NEXT: cltd +; SSE2-NEXT: idivl %ecx +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movd %xmm1, %ecx +; SSE2-NEXT: cltd +; SSE2-NEXT: idivl %ecx +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %ecx +; SSE2-NEXT: cltd +; SSE2-NEXT: idivl %ecx +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: boolvec_sdiv: +; SSE41: # %bb.0: +; SSE41-NEXT: pslld $31, %xmm1 +; SSE41-NEXT: psrad $31, %xmm1 +; SSE41-NEXT: pslld $31, %xmm0 +; SSE41-NEXT: psrad $31, %xmm0 +; SSE41-NEXT: pextrd $1, %xmm0, %eax +; SSE41-NEXT: pextrd $1, %xmm1, %ecx +; SSE41-NEXT: cltd +; SSE41-NEXT: idivl %ecx +; SSE41-NEXT: movl %eax, %ecx +; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: movd %xmm1, %esi +; SSE41-NEXT: cltd +; SSE41-NEXT: idivl %esi +; SSE41-NEXT: movd %eax, %xmm2 +; SSE41-NEXT: pinsrd $1, %ecx, %xmm2 +; SSE41-NEXT: pextrd $2, %xmm0, %eax +; SSE41-NEXT: pextrd $2, %xmm1, %ecx +; SSE41-NEXT: cltd +; SSE41-NEXT: idivl %ecx +; SSE41-NEXT: pinsrd $2, %eax, %xmm2 +; SSE41-NEXT: pextrd $3, %xmm0, %eax +; SSE41-NEXT: pextrd $3, %xmm1, %ecx +; SSE41-NEXT: cltd +; SSE41-NEXT: idivl %ecx +; SSE41-NEXT: pinsrd $3, %eax, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: boolvec_sdiv: +; AVX1: # %bb.0: +; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 +; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: vpextrd $1, %xmm1, %ecx +; AVX1-NEXT: cltd +; AVX1-NEXT: idivl %ecx +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vmovd %xmm1, %esi +; AVX1-NEXT: cltd +; AVX1-NEXT: idivl %esi +; AVX1-NEXT: vmovd %eax, %xmm2 +; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: vpextrd $2, %xmm1, %ecx +; AVX1-NEXT: cltd +; AVX1-NEXT: idivl %ecx +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm0, %eax +; AVX1-NEXT: vpextrd $3, %xmm1, %ecx +; AVX1-NEXT: cltd +; AVX1-NEXT: idivl %ecx +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: boolvec_sdiv: +; AVX2: # %bb.0: +; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 +; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX2-NEXT: vpextrd $1, %xmm0, %eax +; AVX2-NEXT: vpextrd $1, %xmm1, %ecx +; AVX2-NEXT: cltd +; AVX2-NEXT: idivl %ecx +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vmovd %xmm1, %esi +; AVX2-NEXT: cltd +; AVX2-NEXT: idivl %esi +; AVX2-NEXT: vmovd %eax, %xmm2 +; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; AVX2-NEXT: vpextrd $2, %xmm0, %eax +; AVX2-NEXT: vpextrd $2, %xmm1, %ecx +; AVX2-NEXT: cltd +; AVX2-NEXT: idivl %ecx +; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrd $3, %xmm0, %eax +; AVX2-NEXT: vpextrd $3, %xmm1, %ecx +; AVX2-NEXT: cltd +; AVX2-NEXT: idivl %ecx +; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: boolvec_sdiv: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3 +; AVX512F-NEXT: kshiftrw $3, %k3, %k0 +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4 +; AVX512F-NEXT: kshiftrw $3, %k4, %k1 +; AVX512F-NEXT: kshiftrw $2, %k3, %k2 +; AVX512F-NEXT: kshiftrw $2, %k4, %k5 +; AVX512F-NEXT: kmovw %k5, %ecx +; AVX512F-NEXT: kshiftrw $1, %k3, %k5 +; AVX512F-NEXT: kmovw %k3, %edi +; AVX512F-NEXT: kshiftrw $1, %k4, %k3 +; AVX512F-NEXT: kmovw %k4, %esi +; AVX512F-NEXT: kmovw %k5, %edx +; AVX512F-NEXT: kmovw %k3, %eax +; AVX512F-NEXT: andb $1, %al +; AVX512F-NEXT: negb %al +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: cbtw +; AVX512F-NEXT: andb $1, %dl +; AVX512F-NEXT: negb %dl +; AVX512F-NEXT: idivb %dl +; AVX512F-NEXT: movl %eax, %edx +; AVX512F-NEXT: andb $1, %sil +; AVX512F-NEXT: negb %sil +; AVX512F-NEXT: movl %esi, %eax +; AVX512F-NEXT: cbtw +; AVX512F-NEXT: andb $1, %dil +; AVX512F-NEXT: negb %dil +; AVX512F-NEXT: idivb %dil +; AVX512F-NEXT: movl %eax, %esi +; AVX512F-NEXT: andb $1, %cl +; AVX512F-NEXT: negb %cl +; AVX512F-NEXT: movl %ecx, %eax +; AVX512F-NEXT: cbtw +; AVX512F-NEXT: kmovw %k2, %ecx +; AVX512F-NEXT: andb $1, %cl +; AVX512F-NEXT: negb %cl +; AVX512F-NEXT: idivb %cl +; AVX512F-NEXT: movl %eax, %ecx +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: andb $1, %al +; AVX512F-NEXT: negb %al +; AVX512F-NEXT: # kill: def $al killed $al killed $eax +; AVX512F-NEXT: cbtw +; AVX512F-NEXT: kmovw %k0, %edi +; AVX512F-NEXT: andb $1, %dil +; AVX512F-NEXT: negb %dil +; AVX512F-NEXT: idivb %dil +; AVX512F-NEXT: # kill: def $al killed $al def $eax +; AVX512F-NEXT: kmovw %edx, %k0 +; AVX512F-NEXT: kmovw %esi, %k1 +; AVX512F-NEXT: kshiftrw $1, %k1, %k2 +; AVX512F-NEXT: kxorw %k0, %k2, %k0 +; AVX512F-NEXT: kshiftlw $15, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k0 +; AVX512F-NEXT: kxorw %k0, %k1, %k0 +; AVX512F-NEXT: kshiftrw $2, %k0, %k1 +; AVX512F-NEXT: kmovw %ecx, %k2 +; AVX512F-NEXT: kxorw %k2, %k1, %k1 +; AVX512F-NEXT: kshiftlw $15, %k1, %k1 +; AVX512F-NEXT: kshiftrw $13, %k1, %k1 +; AVX512F-NEXT: kxorw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftlw $13, %k0, %k0 +; AVX512F-NEXT: kshiftrw $13, %k0, %k0 +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: kshiftlw $3, %k1, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: boolvec_sdiv: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX512BW-NEXT: vptestmd %xmm1, %xmm1, %k3 +; AVX512BW-NEXT: kshiftrw $3, %k3, %k0 +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k4 +; AVX512BW-NEXT: kshiftrw $3, %k4, %k1 +; AVX512BW-NEXT: kshiftrw $2, %k3, %k2 +; AVX512BW-NEXT: kshiftrw $2, %k4, %k5 +; AVX512BW-NEXT: kmovd %k5, %ecx +; AVX512BW-NEXT: kshiftrw $1, %k3, %k5 +; AVX512BW-NEXT: kmovd %k3, %edi +; AVX512BW-NEXT: kshiftrw $1, %k4, %k3 +; AVX512BW-NEXT: kmovd %k4, %esi +; AVX512BW-NEXT: kmovd %k5, %edx +; AVX512BW-NEXT: kmovd %k3, %eax +; AVX512BW-NEXT: andb $1, %al +; AVX512BW-NEXT: negb %al +; AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; AVX512BW-NEXT: cbtw +; AVX512BW-NEXT: andb $1, %dl +; AVX512BW-NEXT: negb %dl +; AVX512BW-NEXT: idivb %dl +; AVX512BW-NEXT: movl %eax, %edx +; AVX512BW-NEXT: andb $1, %sil +; AVX512BW-NEXT: negb %sil +; AVX512BW-NEXT: movl %esi, %eax +; AVX512BW-NEXT: cbtw +; AVX512BW-NEXT: andb $1, %dil +; AVX512BW-NEXT: negb %dil +; AVX512BW-NEXT: idivb %dil +; AVX512BW-NEXT: movl %eax, %esi +; AVX512BW-NEXT: andb $1, %cl +; AVX512BW-NEXT: negb %cl +; AVX512BW-NEXT: movl %ecx, %eax +; AVX512BW-NEXT: cbtw +; AVX512BW-NEXT: kmovd %k2, %ecx +; AVX512BW-NEXT: andb $1, %cl +; AVX512BW-NEXT: negb %cl +; AVX512BW-NEXT: idivb %cl +; AVX512BW-NEXT: movl %eax, %ecx +; AVX512BW-NEXT: kmovd %k1, %eax +; AVX512BW-NEXT: andb $1, %al +; AVX512BW-NEXT: negb %al +; AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; AVX512BW-NEXT: cbtw +; AVX512BW-NEXT: kmovd %k0, %edi +; AVX512BW-NEXT: andb $1, %dil +; AVX512BW-NEXT: negb %dil +; AVX512BW-NEXT: idivb %dil +; AVX512BW-NEXT: # kill: def $al killed $al def $eax +; AVX512BW-NEXT: kmovd %edx, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 +; AVX512BW-NEXT: kxorw %k0, %k2, %k0 +; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 +; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 +; AVX512BW-NEXT: kxorw %k0, %k1, %k0 +; AVX512BW-NEXT: kshiftrw $2, %k0, %k1 +; AVX512BW-NEXT: kmovd %ecx, %k2 +; AVX512BW-NEXT: kxorw %k2, %k1, %k1 +; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 +; AVX512BW-NEXT: kxorw %k1, %k0, %k0 +; AVX512BW-NEXT: kshiftlw $13, %k0, %k0 +; AVX512BW-NEXT: kshiftrw $13, %k0, %k0 +; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: kshiftlw $3, %k1, %k1 +; AVX512BW-NEXT: korw %k1, %k0, %k1 +; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512BW-NEXT: retq +; +; XOP-LABEL: boolvec_sdiv: +; XOP: # %bb.0: +; XOP-NEXT: vpslld $31, %xmm1, %xmm1 +; XOP-NEXT: vpsrad $31, %xmm1, %xmm1 +; XOP-NEXT: vpslld $31, %xmm0, %xmm0 +; XOP-NEXT: vpsrad $31, %xmm0, %xmm0 +; XOP-NEXT: vpextrd $1, %xmm0, %eax +; XOP-NEXT: vpextrd $1, %xmm1, %ecx +; XOP-NEXT: cltd +; XOP-NEXT: idivl %ecx +; XOP-NEXT: movl %eax, %ecx +; XOP-NEXT: vmovd %xmm0, %eax +; XOP-NEXT: vmovd %xmm1, %esi +; XOP-NEXT: cltd +; XOP-NEXT: idivl %esi +; XOP-NEXT: vmovd %eax, %xmm2 +; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; XOP-NEXT: vpextrd $2, %xmm0, %eax +; XOP-NEXT: vpextrd $2, %xmm1, %ecx +; XOP-NEXT: cltd +; XOP-NEXT: idivl %ecx +; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; XOP-NEXT: vpextrd $3, %xmm0, %eax +; XOP-NEXT: vpextrd $3, %xmm1, %ecx +; XOP-NEXT: cltd +; XOP-NEXT: idivl %ecx +; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; XOP-NEXT: retq + %r = sdiv <4 x i1> %x, %y + ret <4 x i1> %r +} diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll index 75ee775..7af33fe 100644 --- a/llvm/test/CodeGen/X86/combine-srem.ll +++ b/llvm/test/CodeGen/X86/combine-srem.ll @@ -458,3 +458,83 @@ define i32 @ossfuzz6883() { %B6 = and i32 %B16, %B10 ret i32 %B6 } + +define i1 @bool_srem(i1 %x, i1 %y) { +; CHECK-LABEL: bool_srem: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: negb %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: cbtw +; CHECK-NEXT: andb $1, %sil +; CHECK-NEXT: negb %sil +; CHECK-NEXT: idivb %sil +; CHECK-NEXT: movsbl %ah, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + %r = srem i1 %x, %y + ret i1 %r +} +define <4 x i1> @boolvec_srem(<4 x i1> %x, <4 x i1> %y) { +; SSE-LABEL: boolvec_srem: +; SSE: # %bb.0: +; SSE-NEXT: pslld $31, %xmm1 +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: psrad $31, %xmm0 +; SSE-NEXT: pextrd $1, %xmm0, %eax +; SSE-NEXT: pextrd $1, %xmm1, %ecx +; SSE-NEXT: cltd +; SSE-NEXT: idivl %ecx +; SSE-NEXT: movl %edx, %ecx +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: movd %xmm1, %esi +; SSE-NEXT: cltd +; SSE-NEXT: idivl %esi +; SSE-NEXT: movd %edx, %xmm2 +; SSE-NEXT: pinsrd $1, %ecx, %xmm2 +; SSE-NEXT: pextrd $2, %xmm0, %eax +; SSE-NEXT: pextrd $2, %xmm1, %ecx +; SSE-NEXT: cltd +; SSE-NEXT: idivl %ecx +; SSE-NEXT: pinsrd $2, %edx, %xmm2 +; SSE-NEXT: pextrd $3, %xmm0, %eax +; SSE-NEXT: pextrd $3, %xmm1, %ecx +; SSE-NEXT: cltd +; SSE-NEXT: idivl %ecx +; SSE-NEXT: pinsrd $3, %edx, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: boolvec_srem: +; AVX: # %bb.0: +; AVX-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX-NEXT: vpsrad $31, %xmm1, %xmm1 +; AVX-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: vpextrd $1, %xmm1, %ecx +; AVX-NEXT: cltd +; AVX-NEXT: idivl %ecx +; AVX-NEXT: movl %edx, %ecx +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: vmovd %xmm1, %esi +; AVX-NEXT: cltd +; AVX-NEXT: idivl %esi +; AVX-NEXT: vmovd %edx, %xmm2 +; AVX-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; AVX-NEXT: vpextrd $2, %xmm0, %eax +; AVX-NEXT: vpextrd $2, %xmm1, %ecx +; AVX-NEXT: cltd +; AVX-NEXT: idivl %ecx +; AVX-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 +; AVX-NEXT: vpextrd $3, %xmm0, %eax +; AVX-NEXT: vpextrd $3, %xmm1, %ecx +; AVX-NEXT: cltd +; AVX-NEXT: idivl %ecx +; AVX-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 +; AVX-NEXT: retq + %r = srem <4 x i1> %x, %y + ret <4 x i1> %r +} diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index cddf746..d31d250 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -907,3 +907,170 @@ define <8 x i16> @pr38477(<8 x i16> %a0) { %1 = udiv <8 x i16> %a0, ret <8 x i16> %1 } + +define i1 @bool_udiv(i1 %x, i1 %y) { +; CHECK-LABEL: bool_udiv: +; CHECK: # %bb.0: +; CHECK-NEXT: andb $1, %sil +; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: # kill: def $eax killed $eax def $ax +; CHECK-NEXT: divb %sil +; CHECK-NEXT: retq + %r = udiv i1 %x, %y + ret i1 %r +} + +define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) { +; SSE2-LABEL: boolvec_udiv: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] +; SSE2-NEXT: movd %xmm2, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] +; SSE2-NEXT: movd %xmm2, %ecx +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: divl %ecx +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] +; SSE2-NEXT: movd %xmm3, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; SSE2-NEXT: movd %xmm3, %ecx +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: divl %ecx +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movd %xmm1, %ecx +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: divl %ecx +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: movd %xmm0, %ecx +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: divl %ecx +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: boolvec_udiv: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: pextrd $1, %xmm0, %eax +; SSE41-NEXT: pextrd $1, %xmm1, %ecx +; SSE41-NEXT: xorl %edx, %edx +; SSE41-NEXT: divl %ecx +; SSE41-NEXT: movl %eax, %ecx +; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: movd %xmm1, %esi +; SSE41-NEXT: xorl %edx, %edx +; SSE41-NEXT: divl %esi +; SSE41-NEXT: movd %eax, %xmm2 +; SSE41-NEXT: pinsrd $1, %ecx, %xmm2 +; SSE41-NEXT: pextrd $2, %xmm0, %eax +; SSE41-NEXT: pextrd $2, %xmm1, %ecx +; SSE41-NEXT: xorl %edx, %edx +; SSE41-NEXT: divl %ecx +; SSE41-NEXT: pinsrd $2, %eax, %xmm2 +; SSE41-NEXT: pextrd $3, %xmm0, %eax +; SSE41-NEXT: pextrd $3, %xmm1, %ecx +; SSE41-NEXT: xorl %edx, %edx +; SSE41-NEXT: divl %ecx +; SSE41-NEXT: pinsrd $3, %eax, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: boolvec_udiv: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: vpextrd $1, %xmm1, %ecx +; AVX1-NEXT: xorl %edx, %edx +; AVX1-NEXT: divl %ecx +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vmovd %xmm1, %esi +; AVX1-NEXT: xorl %edx, %edx +; AVX1-NEXT: divl %esi +; AVX1-NEXT: vmovd %eax, %xmm2 +; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: vpextrd $2, %xmm1, %ecx +; AVX1-NEXT: xorl %edx, %edx +; AVX1-NEXT: divl %ecx +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm0, %eax +; AVX1-NEXT: vpextrd $3, %xmm1, %ecx +; AVX1-NEXT: xorl %edx, %edx +; AVX1-NEXT: divl %ecx +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: boolvec_udiv: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpextrd $1, %xmm0, %eax +; AVX2-NEXT: vpextrd $1, %xmm1, %ecx +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: divl %ecx +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vmovd %xmm1, %esi +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: divl %esi +; AVX2-NEXT: vmovd %eax, %xmm2 +; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; AVX2-NEXT: vpextrd $2, %xmm0, %eax +; AVX2-NEXT: vpextrd $2, %xmm1, %ecx +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: divl %ecx +; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrd $3, %xmm0, %eax +; AVX2-NEXT: vpextrd $3, %xmm1, %ecx +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: divl %ecx +; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; AVX2-NEXT: retq +; +; XOP-LABEL: boolvec_udiv: +; XOP: # %bb.0: +; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] +; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1 +; XOP-NEXT: vpand %xmm2, %xmm0, %xmm0 +; XOP-NEXT: vpextrd $1, %xmm0, %eax +; XOP-NEXT: vpextrd $1, %xmm1, %ecx +; XOP-NEXT: xorl %edx, %edx +; XOP-NEXT: divl %ecx +; XOP-NEXT: movl %eax, %ecx +; XOP-NEXT: vmovd %xmm0, %eax +; XOP-NEXT: vmovd %xmm1, %esi +; XOP-NEXT: xorl %edx, %edx +; XOP-NEXT: divl %esi +; XOP-NEXT: vmovd %eax, %xmm2 +; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; XOP-NEXT: vpextrd $2, %xmm0, %eax +; XOP-NEXT: vpextrd $2, %xmm1, %ecx +; XOP-NEXT: xorl %edx, %edx +; XOP-NEXT: divl %ecx +; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; XOP-NEXT: vpextrd $3, %xmm0, %eax +; XOP-NEXT: vpextrd $3, %xmm1, %ecx +; XOP-NEXT: xorl %edx, %edx +; XOP-NEXT: divl %ecx +; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; XOP-NEXT: retq + %r = udiv <4 x i1> %x, %y + ret <4 x i1> %r +} diff --git a/llvm/test/CodeGen/X86/combine-urem.ll b/llvm/test/CodeGen/X86/combine-urem.ll index 04c0abe..11505ed 100644 --- a/llvm/test/CodeGen/X86/combine-urem.ll +++ b/llvm/test/CodeGen/X86/combine-urem.ll @@ -379,3 +379,107 @@ define <4 x i32> @combine_vec_urem_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) { %2 = urem <4 x i32> %x, %1 ret <4 x i32> %2 } + +define i1 @bool_urem(i1 %x, i1 %y) { +; CHECK-LABEL: bool_urem: +; CHECK: # %bb.0: +; CHECK-NEXT: andb $1, %sil +; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: # kill: def $eax killed $eax def $ax +; CHECK-NEXT: divb %sil +; CHECK-NEXT: movzbl %ah, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + %r = urem i1 %x, %y + ret i1 %r +} + +define <4 x i1> @boolvec_urem(<4 x i1> %x, <4 x i1> %y) { +; SSE-LABEL: boolvec_urem: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] +; SSE-NEXT: pand %xmm2, %xmm1 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pextrd $1, %xmm0, %eax +; SSE-NEXT: pextrd $1, %xmm1, %ecx +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: divl %ecx +; SSE-NEXT: movl %edx, %ecx +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: movd %xmm1, %esi +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: divl %esi +; SSE-NEXT: movd %edx, %xmm2 +; SSE-NEXT: pinsrd $1, %ecx, %xmm2 +; SSE-NEXT: pextrd $2, %xmm0, %eax +; SSE-NEXT: pextrd $2, %xmm1, %ecx +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: divl %ecx +; SSE-NEXT: pinsrd $2, %edx, %xmm2 +; SSE-NEXT: pextrd $3, %xmm0, %eax +; SSE-NEXT: pextrd $3, %xmm1, %ecx +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: divl %ecx +; SSE-NEXT: pinsrd $3, %edx, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: boolvec_urem: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: vpextrd $1, %xmm1, %ecx +; AVX1-NEXT: xorl %edx, %edx +; AVX1-NEXT: divl %ecx +; AVX1-NEXT: movl %edx, %ecx +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vmovd %xmm1, %esi +; AVX1-NEXT: xorl %edx, %edx +; AVX1-NEXT: divl %esi +; AVX1-NEXT: vmovd %edx, %xmm2 +; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: vpextrd $2, %xmm1, %ecx +; AVX1-NEXT: xorl %edx, %edx +; AVX1-NEXT: divl %ecx +; AVX1-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm0, %eax +; AVX1-NEXT: vpextrd $3, %xmm1, %ecx +; AVX1-NEXT: xorl %edx, %edx +; AVX1-NEXT: divl %ecx +; AVX1-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: boolvec_urem: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpextrd $1, %xmm0, %eax +; AVX2-NEXT: vpextrd $1, %xmm1, %ecx +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: divl %ecx +; AVX2-NEXT: movl %edx, %ecx +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vmovd %xmm1, %esi +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: divl %esi +; AVX2-NEXT: vmovd %edx, %xmm2 +; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; AVX2-NEXT: vpextrd $2, %xmm0, %eax +; AVX2-NEXT: vpextrd $2, %xmm1, %ecx +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: divl %ecx +; AVX2-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 +; AVX2-NEXT: vpextrd $3, %xmm0, %eax +; AVX2-NEXT: vpextrd $3, %xmm1, %ecx +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: divl %ecx +; AVX2-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 +; AVX2-NEXT: retq + %r = urem <4 x i1> %x, %y + ret <4 x i1> %r +}