From cc41495eb809e064a976f93c49a5000c0c8c0a72 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 22 Mar 2016 20:10:49 +0000 Subject: [PATCH] [X86][AVX] Added AVX1 tests for 256-bit vector idiv-by-constant Prep work based on feedback for D18307 llvm-svn: 264086 --- llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll | 3244 ++++++++++++++++++------- llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll | 2980 ++++++++++++++++------- 2 files changed, 4482 insertions(+), 1742 deletions(-) diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll index 345874d..926a30c 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; @@ -6,435 +7,1066 @@ ; define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { -; AVX-LABEL: test_div7_4i64: -; AVX: # BB#0: -; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpextrq $1, %xmm1, %rax -; AVX-NEXT: movabsq $5270498306774157605, %rcx # imm = 0x4924924924924925 -; AVX-NEXT: imulq %rcx -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $63, %rax -; AVX-NEXT: sarq %rdx -; AVX-NEXT: addq %rax, %rdx -; AVX-NEXT: vmovq %rdx, %xmm2 -; AVX-NEXT: vmovq %xmm1, %rax -; AVX-NEXT: imulq %rcx -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $63, %rax -; AVX-NEXT: sarq %rdx -; AVX-NEXT: addq %rax, %rdx -; AVX-NEXT: vmovq %rdx, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: imulq %rcx -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $63, %rax -; AVX-NEXT: sarq %rdx -; AVX-NEXT: addq %rax, %rdx -; AVX-NEXT: vmovq %rdx, %xmm2 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: imulq %rcx -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $63, %rax -; AVX-NEXT: sarq %rdx -; AVX-NEXT: addq %rax, %rdx -; AVX-NEXT: vmovq %rdx, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_div7_4i64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm1, %rax +; AVX1-NEXT: movabsq $5270498306774157605, %rcx # imm = 0x4924924924924925 +; AVX1-NEXT: imulq %rcx +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: vmovq %rdx, %xmm2 +; AVX1-NEXT: vmovq %xmm1, %rax +; AVX1-NEXT: imulq %rcx +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: vmovq %rdx, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: imulq %rcx +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: vmovq %rdx, %xmm2 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: imulq %rcx +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: vmovq %rdx, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_div7_4i64: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm1, %rax +; AVX2-NEXT: movabsq $5270498306774157605, %rcx # imm = 0x4924924924924925 +; AVX2-NEXT: imulq %rcx +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm2 +; AVX2-NEXT: vmovq %xmm1, %rax +; AVX2-NEXT: imulq %rcx +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm1 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vpextrq $1, %xmm0, %rax +; AVX2-NEXT: imulq %rcx +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm2 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: imulq %rcx +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm0 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = sdiv <4 x i64> %a, ret <4 x i64> %res } define <8 x i32> @test_div7_8i32(<8 x i32> %a) nounwind { -; AVX-LABEL: test_div7_8i32: -; AVX: # BB#0: -; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 -; AVX-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 -; AVX-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 -; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] -; AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vpsrld $31, %ymm0, %ymm1 -; AVX-NEXT: vpsrad $2, %ymm0, %ymm0 -; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_div7_8i32: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrd $1, %xmm1, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: sarl $2, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vmovd %xmm1, %ecx +; AVX1-NEXT: movslq %ecx, %rcx +; AVX1-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rdx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: sarl $2, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm1, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: sarl $2, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm1, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: sarl $2, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: sarl $2, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vmovd %xmm0, %ecx +; AVX1-NEXT: movslq %ecx, %rcx +; AVX1-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rdx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: sarl $2, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: sarl $2, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm0, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: sarl $2, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_div7_8i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpsrld $31, %ymm0, %ymm1 +; AVX2-NEXT: vpsrad $2, %ymm0, %ymm0 +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = sdiv <8 x i32> %a, ret <8 x i32> %res } define <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind { -; AVX-LABEL: test_div7_16i16: -; AVX: # BB#0: -; AVX-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm0 -; AVX-NEXT: vpsrlw $15, %ymm0, %ymm1 -; AVX-NEXT: vpsraw $1, %ymm0, %ymm0 -; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_div7_16i16: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrw $1, %xmm1, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vmovd %xmm1, %ecx +; AVX1-NEXT: movswl %cx, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $2, %xmm1, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $3, %xmm1, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $4, %xmm1, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $5, %xmm1, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm1, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $7, %xmm1, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrw $1, %xmm0, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vmovd %xmm0, %ecx +; AVX1-NEXT: movswl %cx, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $2, %xmm0, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $3, %xmm0, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $4, %xmm0, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $5, %xmm0, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm0, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $7, %xmm0, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: sarw %cx +; AVX1-NEXT: shrl $31, %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_div7_16i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm1 +; AVX2-NEXT: vpsraw $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = sdiv <16 x i16> %a, ret <16 x i16> %res } define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { -; AVX-LABEL: test_div7_32i8: -; AVX: # BB#0: -; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpextrb $1, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpextrb $0, %xmm1, %ecx -; AVX-NEXT: movsbl %cl, %ecx -; AVX-NEXT: imull $-109, %ecx, %edx -; AVX-NEXT: shrl $8, %edx -; AVX-NEXT: addb %dl, %cl -; AVX-NEXT: movb %cl, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %cl -; AVX-NEXT: addb %dl, %cl -; AVX-NEXT: movzbl %cl, %ecx -; AVX-NEXT: vmovd %ecx, %xmm2 -; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm1, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX-NEXT: vpextrb $1, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpextrb $0, %xmm0, %ecx -; AVX-NEXT: movsbl %cl, %ecx -; AVX-NEXT: imull $-109, %ecx, %edx -; AVX-NEXT: shrl $8, %edx -; AVX-NEXT: addb %dl, %cl -; AVX-NEXT: movb %cl, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %cl -; AVX-NEXT: addb %dl, %cl -; AVX-NEXT: movzbl %cl, %ecx -; AVX-NEXT: vmovd %ecx, %xmm2 -; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm0, %eax -; AVX-NEXT: movsbl %al, %eax -; AVX-NEXT: imull $-109, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 -; AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_div7_32i8: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrb $1, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: movsbl %cl, %ecx +; AVX1-NEXT: imull $-109, %ecx, %edx +; AVX1-NEXT: shrl $8, %edx +; AVX1-NEXT: addb %dl, %cl +; AVX1-NEXT: movb %cl, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %cl +; AVX1-NEXT: addb %dl, %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $15, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpextrb $0, %xmm0, %ecx +; AVX1-NEXT: movsbl %cl, %ecx +; AVX1-NEXT: imull $-109, %ecx, %edx +; AVX1-NEXT: shrl $8, %edx +; AVX1-NEXT: addb %dl, %cl +; AVX1-NEXT: movb %cl, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %cl +; AVX1-NEXT: addb %dl, %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %eax +; AVX1-NEXT: imull $-109, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_div7_32i8: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrb $1, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: movsbl %cl, %ecx +; AVX2-NEXT: imull $-109, %ecx, %edx +; AVX2-NEXT: shrl $8, %edx +; AVX2-NEXT: addb %dl, %cl +; AVX2-NEXT: movb %cl, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %cl +; AVX2-NEXT: addb %dl, %cl +; AVX2-NEXT: movzbl %cl, %ecx +; AVX2-NEXT: vmovd %ecx, %xmm2 +; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $15, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpextrb $0, %xmm0, %ecx +; AVX2-NEXT: movsbl %cl, %ecx +; AVX2-NEXT: imull $-109, %ecx, %edx +; AVX2-NEXT: shrl $8, %edx +; AVX2-NEXT: addb %dl, %cl +; AVX2-NEXT: movb %cl, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %cl +; AVX2-NEXT: addb %dl, %cl +; AVX2-NEXT: movzbl %cl, %ecx +; AVX2-NEXT: vmovd %ecx, %xmm2 +; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %eax +; AVX2-NEXT: imull $-109, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = sdiv <32 x i8> %a, ret <32 x i8> %res } @@ -444,521 +1076,1305 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { -; AVX-LABEL: test_rem7_4i64: -; AVX: # BB#0: -; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpextrq $1, %xmm1, %rcx -; AVX-NEXT: movabsq $5270498306774157605, %rsi # imm = 0x4924924924924925 -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: imulq %rsi -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $63, %rax -; AVX-NEXT: sarq %rdx -; AVX-NEXT: addq %rax, %rdx -; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 -; AVX-NEXT: vmovq %xmm1, %rcx -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: imulq %rsi -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $63, %rax -; AVX-NEXT: sarq %rdx -; AVX-NEXT: addq %rax, %rdx -; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX-NEXT: vpextrq $1, %xmm0, %rcx -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: imulq %rsi -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $63, %rax -; AVX-NEXT: sarq %rdx -; AVX-NEXT: addq %rax, %rdx -; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 -; AVX-NEXT: vmovq %xmm0, %rcx -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: imulq %rsi -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $63, %rax -; AVX-NEXT: sarq %rdx -; AVX-NEXT: addq %rax, %rdx -; AVX-NEXT: leaq (,%rdx,8), %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: subq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_rem7_4i64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm1, %rcx +; AVX1-NEXT: movabsq $5270498306774157605, %rsi # imm = 0x4924924924924925 +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: imulq %rsi +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: leaq (,%rdx,8), %rax +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: subq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovq %xmm1, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: imulq %rsi +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: leaq (,%rdx,8), %rax +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: subq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vpextrq $1, %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: imulq %rsi +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: leaq (,%rdx,8), %rax +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: subq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovq %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: imulq %rsi +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: leaq (,%rdx,8), %rax +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: subq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_rem7_4i64: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm1, %rcx +; AVX2-NEXT: movabsq $5270498306774157605, %rsi # imm = 0x4924924924924925 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: imulq %rsi +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: leaq (,%rdx,8), %rax +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: subq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: vmovq %xmm1, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: imulq %rsi +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: leaq (,%rdx,8), %rax +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: subq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm1 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vpextrq $1, %xmm0, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: imulq %rsi +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: leaq (,%rdx,8), %rax +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: subq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: vmovq %xmm0, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: imulq %rsi +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: leaq (,%rdx,8), %rax +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: subq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm0 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = srem <4 x i64> %a, ret <4 x i64> %res } define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind { -; AVX-LABEL: test_rem7_8i32: -; AVX: # BB#0: -; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 -; AVX-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 -; AVX-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 -; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] -; AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm1 -; AVX-NEXT: vpsrld $31, %ymm1, %ymm2 -; AVX-NEXT: vpsrad $2, %ymm1, %ymm1 -; AVX-NEXT: vpaddd %ymm2, %ymm1, %ymm1 -; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 -; AVX-NEXT: vpmulld %ymm2, %ymm1, %ymm1 -; AVX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_rem7_8i32: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrd $1, %xmm1, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %eax, %ecx +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: sarl $2, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vmovd %xmm1, %ecx +; AVX1-NEXT: movslq %ecx, %rcx +; AVX1-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rdx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: movl %edx, %esi +; AVX1-NEXT: shrl $31, %esi +; AVX1-NEXT: sarl $2, %edx +; AVX1-NEXT: addl %esi, %edx +; AVX1-NEXT: leal (,%rdx,8), %esi +; AVX1-NEXT: subl %edx, %esi +; AVX1-NEXT: subl %esi, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm1, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %eax, %ecx +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: sarl $2, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm1, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %eax, %ecx +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: sarl $2, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %eax, %ecx +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: sarl $2, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vmovd %xmm0, %ecx +; AVX1-NEXT: movslq %ecx, %rcx +; AVX1-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rdx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: movl %edx, %esi +; AVX1-NEXT: shrl $31, %esi +; AVX1-NEXT: sarl $2, %edx +; AVX1-NEXT: addl %esi, %edx +; AVX1-NEXT: leal (,%rdx,8), %esi +; AVX1-NEXT: subl %edx, %esi +; AVX1-NEXT: subl %esi, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %eax, %ecx +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: sarl $2, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm0, %eax +; AVX1-NEXT: cltq +; AVX1-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0xFFFFFFFF92492493 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: addl %eax, %ecx +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: sarl $2, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_rem7_8i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm1 +; AVX2-NEXT: vpsrld $31, %ymm1, %ymm2 +; AVX2-NEXT: vpsrad $2, %ymm1, %ymm1 +; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = srem <8 x i32> %a, ret <8 x i32> %res } define <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind { -; AVX-LABEL: test_rem7_16i16: -; AVX: # BB#0: -; AVX-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm1 -; AVX-NEXT: vpsrlw $15, %ymm1, %ymm2 -; AVX-NEXT: vpsraw $1, %ymm1, %ymm1 -; AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1 -; AVX-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 -; AVX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_rem7_16i16: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrw $1, %xmm1, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vmovd %xmm1, %ecx +; AVX1-NEXT: movswl %cx, %edx +; AVX1-NEXT: imull $18725, %edx, %edx # imm = 0x4925 +; AVX1-NEXT: movl %edx, %esi +; AVX1-NEXT: shrl $16, %esi +; AVX1-NEXT: sarw %si +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: addl %esi, %edx +; AVX1-NEXT: leal (,%rdx,8), %esi +; AVX1-NEXT: subl %edx, %esi +; AVX1-NEXT: subl %esi, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $2, %xmm1, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $3, %xmm1, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $4, %xmm1, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $5, %xmm1, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm1, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $7, %xmm1, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrw $1, %xmm0, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vmovd %xmm0, %ecx +; AVX1-NEXT: movswl %cx, %edx +; AVX1-NEXT: imull $18725, %edx, %edx # imm = 0x4925 +; AVX1-NEXT: movl %edx, %esi +; AVX1-NEXT: shrl $16, %esi +; AVX1-NEXT: sarw %si +; AVX1-NEXT: shrl $31, %edx +; AVX1-NEXT: addl %esi, %edx +; AVX1-NEXT: leal (,%rdx,8), %esi +; AVX1-NEXT: subl %edx, %esi +; AVX1-NEXT: subl %esi, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $2, %xmm0, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $3, %xmm0, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $4, %xmm0, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $5, %xmm0, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm0, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $7, %xmm0, %eax +; AVX1-NEXT: movswl %ax, %ecx +; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925 +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: sarw %dx +; AVX1-NEXT: shrl $31, %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: leal (,%rcx,8), %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: subl %edx, %eax +; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_rem7_16i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm2 +; AVX2-NEXT: vpsraw $1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = srem <16 x i16> %a, ret <16 x i16> %res } define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { -; AVX-LABEL: test_rem7_32i8: -; AVX: # BB#0: -; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpextrb $1, %xmm1, %eax -; AVX-NEXT: movsbl %al, %edx -; AVX-NEXT: imull $-109, %edx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb $7, %dil -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %edx -; AVX-NEXT: vpextrb $0, %xmm1, %eax -; AVX-NEXT: movsbl %al, %esi -; AVX-NEXT: imull $-109, %esi, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %sil -; AVX-NEXT: movzbl %sil, %eax -; AVX-NEXT: vmovd %eax, %xmm2 -; AVX-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm1, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX-NEXT: vpextrb $1, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %esi -; AVX-NEXT: vpextrb $0, %xmm0, %eax -; AVX-NEXT: movsbl %al, %edx -; AVX-NEXT: imull $-109, %edx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: movb %al, %cl -; AVX-NEXT: shrb $7, %cl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vmovd %eax, %xmm2 -; AVX-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm0, %eax -; AVX-NEXT: movsbl %al, %ecx -; AVX-NEXT: imull $-109, %ecx, %eax -; AVX-NEXT: shrl $8, %eax -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: movb %al, %dl -; AVX-NEXT: shrb $7, %dl -; AVX-NEXT: sarb $2, %al -; AVX-NEXT: addb %dl, %al -; AVX-NEXT: mulb %dil -; AVX-NEXT: subb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 -; AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_rem7_32i8: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrb $1, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %edx +; AVX1-NEXT: imull $-109, %edx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb $7, %dil +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %edx +; AVX1-NEXT: vpextrb $0, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %esi +; AVX1-NEXT: imull $-109, %esi, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %sil +; AVX1-NEXT: movzbl %sil, %eax +; AVX1-NEXT: vmovd %eax, %xmm2 +; AVX1-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $15, %xmm1, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %esi +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %edx +; AVX1-NEXT: imull $-109, %edx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: movb %al, %cl +; AVX1-NEXT: shrb $7, %cl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vmovd %eax, %xmm2 +; AVX1-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: movsbl %al, %ecx +; AVX1-NEXT: imull $-109, %ecx, %eax +; AVX1-NEXT: shrl $8, %eax +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: movb %al, %dl +; AVX1-NEXT: shrb $7, %dl +; AVX1-NEXT: sarb $2, %al +; AVX1-NEXT: addb %dl, %al +; AVX1-NEXT: mulb %dil +; AVX1-NEXT: subb %al, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_rem7_32i8: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrb $1, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %edx +; AVX2-NEXT: imull $-109, %edx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb $7, %dil +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %edx +; AVX2-NEXT: vpextrb $0, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %esi +; AVX2-NEXT: imull $-109, %esi, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %sil +; AVX2-NEXT: movzbl %sil, %eax +; AVX2-NEXT: vmovd %eax, %xmm2 +; AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $15, %xmm1, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %esi +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %edx +; AVX2-NEXT: imull $-109, %edx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: movb %al, %cl +; AVX2-NEXT: shrb $7, %cl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vmovd %eax, %xmm2 +; AVX2-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: movsbl %al, %ecx +; AVX2-NEXT: imull $-109, %ecx, %eax +; AVX2-NEXT: shrl $8, %eax +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: movb %al, %dl +; AVX2-NEXT: shrb $7, %dl +; AVX2-NEXT: sarb $2, %al +; AVX2-NEXT: addb %dl, %al +; AVX2-NEXT: mulb %dil +; AVX2-NEXT: subb %al, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = srem <32 x i8> %a, ret <32 x i8> %res } diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll index 1cd1202..fc2ea63 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; @@ -6,376 +7,933 @@ ; define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { -; AVX-LABEL: test_div7_4i64: -; AVX: # BB#0: -; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpextrq $1, %xmm1, %rcx -; AVX-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: mulq %rsi -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: shrq %rcx -; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 -; AVX-NEXT: vmovq %xmm1, %rcx -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: mulq %rsi -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: shrq %rcx -; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx -; AVX-NEXT: vmovq %rcx, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX-NEXT: vpextrq $1, %xmm0, %rcx -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: mulq %rsi -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: shrq %rcx -; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 -; AVX-NEXT: vmovq %xmm0, %rcx -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: mulq %rsi -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: shrq %rcx -; AVX-NEXT: addq %rdx, %rcx -; AVX-NEXT: shrq $2, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_div7_4i64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm1, %rcx +; AVX1-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rsi +; AVX1-NEXT: subq %rdx, %rcx +; AVX1-NEXT: shrq %rcx +; AVX1-NEXT: addq %rdx, %rcx +; AVX1-NEXT: shrq $2, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovq %xmm1, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rsi +; AVX1-NEXT: subq %rdx, %rcx +; AVX1-NEXT: shrq %rcx +; AVX1-NEXT: addq %rdx, %rcx +; AVX1-NEXT: shrq $2, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vpextrq $1, %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rsi +; AVX1-NEXT: subq %rdx, %rcx +; AVX1-NEXT: shrq %rcx +; AVX1-NEXT: addq %rdx, %rcx +; AVX1-NEXT: shrq $2, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovq %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rsi +; AVX1-NEXT: subq %rdx, %rcx +; AVX1-NEXT: shrq %rcx +; AVX1-NEXT: addq %rdx, %rcx +; AVX1-NEXT: shrq $2, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_div7_4i64: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm1, %rcx +; AVX2-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rsi +; AVX2-NEXT: subq %rdx, %rcx +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: addq %rdx, %rcx +; AVX2-NEXT: shrq $2, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: vmovq %xmm1, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rsi +; AVX2-NEXT: subq %rdx, %rcx +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: addq %rdx, %rcx +; AVX2-NEXT: shrq $2, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm1 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vpextrq $1, %xmm0, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rsi +; AVX2-NEXT: subq %rdx, %rcx +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: addq %rdx, %rcx +; AVX2-NEXT: shrq $2, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: vmovq %xmm0, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rsi +; AVX2-NEXT: subq %rdx, %rcx +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: addq %rdx, %rcx +; AVX2-NEXT: shrq $2, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm0 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = udiv <4 x i64> %a, ret <4 x i64> %res } define <8 x i32> @test_div7_8i32(<8 x i32> %a) nounwind { -; AVX-LABEL: test_div7_8i32: -; AVX: # BB#0: -; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 -; AVX-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm2 -; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm1 -; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] -; AVX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpsrld $1, %ymm0, %ymm0 -; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpsrld $2, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_div7_8i32: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrd $1, %xmm1, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vmovd %xmm1, %ecx +; AVX1-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rdx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: shrl %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: shrl $2, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm1, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm1, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vmovd %xmm0, %ecx +; AVX1-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rdx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: shrl %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: shrl $2, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm0, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_div7_8i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpmuludq %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrld $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrld $2, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = udiv <8 x i32> %a, ret <8 x i32> %res } define <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind { -; AVX-LABEL: test_div7_16i16: -; AVX: # BB#0: -; AVX-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 -; AVX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpsrlw $1, %ymm0, %ymm0 -; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpsrlw $2, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_div7_16i16: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrw $1, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vmovd %xmm1, %ecx +; AVX1-NEXT: movzwl %cx, %edx +; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: andl $65534, %ecx # imm = 0xFFFE +; AVX1-NEXT: shrl %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: shrl $2, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $2, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $3, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $4, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $5, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $7, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrw $1, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vmovd %xmm0, %ecx +; AVX1-NEXT: movzwl %cx, %edx +; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: andl $65534, %ecx # imm = 0xFFFE +; AVX1-NEXT: shrl %ecx +; AVX1-NEXT: addl %edx, %ecx +; AVX1-NEXT: shrl $2, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $2, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $3, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $4, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $5, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $7, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_div7_16i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = udiv <16 x i16> %a, ret <16 x i16> %res } define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { -; AVX-LABEL: test_div7_32i8: -; AVX: # BB#0: -; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpextrb $1, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpextrb $0, %xmm1, %ecx -; AVX-NEXT: imull $37, %ecx, %edx -; AVX-NEXT: shrl $8, %edx -; AVX-NEXT: subb %dl, %cl -; AVX-NEXT: shrb %cl -; AVX-NEXT: addb %dl, %cl -; AVX-NEXT: shrb $2, %cl -; AVX-NEXT: movzbl %cl, %ecx -; AVX-NEXT: vmovd %ecx, %xmm2 -; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm1, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX-NEXT: vpextrb $1, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpextrb $0, %xmm0, %ecx -; AVX-NEXT: imull $37, %ecx, %edx -; AVX-NEXT: shrl $8, %edx -; AVX-NEXT: subb %dl, %cl -; AVX-NEXT: shrb %cl -; AVX-NEXT: addb %dl, %cl -; AVX-NEXT: shrb $2, %cl -; AVX-NEXT: movzbl %cl, %ecx -; AVX-NEXT: vmovd %ecx, %xmm2 -; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm0, %eax -; AVX-NEXT: imull $37, %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 -; AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_div7_32i8: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrb $1, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: imull $37, %ecx, %edx +; AVX1-NEXT: shrl $8, %edx +; AVX1-NEXT: subb %dl, %cl +; AVX1-NEXT: shrb %cl +; AVX1-NEXT: addb %dl, %cl +; AVX1-NEXT: shrb $2, %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $15, %xmm1, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpextrb $0, %xmm0, %ecx +; AVX1-NEXT: imull $37, %ecx, %edx +; AVX1-NEXT: shrl $8, %edx +; AVX1-NEXT: subb %dl, %cl +; AVX1-NEXT: shrb %cl +; AVX1-NEXT: addb %dl, %cl +; AVX1-NEXT: shrb $2, %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: imull $37, %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_div7_32i8: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrb $1, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: imull $37, %ecx, %edx +; AVX2-NEXT: shrl $8, %edx +; AVX2-NEXT: subb %dl, %cl +; AVX2-NEXT: shrb %cl +; AVX2-NEXT: addb %dl, %cl +; AVX2-NEXT: shrb $2, %cl +; AVX2-NEXT: movzbl %cl, %ecx +; AVX2-NEXT: vmovd %ecx, %xmm2 +; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $15, %xmm1, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpextrb $0, %xmm0, %ecx +; AVX2-NEXT: imull $37, %ecx, %edx +; AVX2-NEXT: shrl $8, %edx +; AVX2-NEXT: subb %dl, %cl +; AVX2-NEXT: shrb %cl +; AVX2-NEXT: addb %dl, %cl +; AVX2-NEXT: shrb $2, %cl +; AVX2-NEXT: movzbl %cl, %ecx +; AVX2-NEXT: vmovd %ecx, %xmm2 +; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: imull $37, %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = udiv <32 x i8> %a, ret <32 x i8> %res } @@ -385,494 +943,1260 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { -; AVX-LABEL: test_rem7_4i64: -; AVX: # BB#0: -; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpextrq $1, %xmm1, %rcx -; AVX-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: mulq %rsi -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: shrq %rax -; AVX-NEXT: addq %rdx, %rax -; AVX-NEXT: shrq $2, %rax -; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 -; AVX-NEXT: vmovq %xmm1, %rcx -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: mulq %rsi -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: shrq %rax -; AVX-NEXT: addq %rdx, %rax -; AVX-NEXT: shrq $2, %rax -; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX-NEXT: vpextrq $1, %xmm0, %rcx -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: mulq %rsi -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: shrq %rax -; AVX-NEXT: addq %rdx, %rax -; AVX-NEXT: shrq $2, %rax -; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm2 -; AVX-NEXT: vmovq %xmm0, %rcx -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: mulq %rsi -; AVX-NEXT: movq %rcx, %rax -; AVX-NEXT: subq %rdx, %rax -; AVX-NEXT: shrq %rax -; AVX-NEXT: addq %rdx, %rax -; AVX-NEXT: shrq $2, %rax -; AVX-NEXT: leaq (,%rax,8), %rdx -; AVX-NEXT: subq %rax, %rdx -; AVX-NEXT: subq %rdx, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_rem7_4i64: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm1, %rcx +; AVX1-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rsi +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: addq %rdx, %rax +; AVX1-NEXT: shrq $2, %rax +; AVX1-NEXT: leaq (,%rax,8), %rdx +; AVX1-NEXT: subq %rax, %rdx +; AVX1-NEXT: subq %rdx, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovq %xmm1, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rsi +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: addq %rdx, %rax +; AVX1-NEXT: shrq $2, %rax +; AVX1-NEXT: leaq (,%rax,8), %rdx +; AVX1-NEXT: subq %rax, %rdx +; AVX1-NEXT: subq %rdx, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vpextrq $1, %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rsi +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: addq %rdx, %rax +; AVX1-NEXT: shrq $2, %rax +; AVX1-NEXT: leaq (,%rax,8), %rdx +; AVX1-NEXT: subq %rax, %rdx +; AVX1-NEXT: subq %rdx, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovq %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rsi +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: addq %rdx, %rax +; AVX1-NEXT: shrq $2, %rax +; AVX1-NEXT: leaq (,%rax,8), %rdx +; AVX1-NEXT: subq %rax, %rdx +; AVX1-NEXT: subq %rdx, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_rem7_4i64: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm1, %rcx +; AVX2-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rsi +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: addq %rdx, %rax +; AVX2-NEXT: shrq $2, %rax +; AVX2-NEXT: leaq (,%rax,8), %rdx +; AVX2-NEXT: subq %rax, %rdx +; AVX2-NEXT: subq %rdx, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: vmovq %xmm1, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rsi +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: addq %rdx, %rax +; AVX2-NEXT: shrq $2, %rax +; AVX2-NEXT: leaq (,%rax,8), %rdx +; AVX2-NEXT: subq %rax, %rdx +; AVX2-NEXT: subq %rdx, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm1 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vpextrq $1, %xmm0, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rsi +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: addq %rdx, %rax +; AVX2-NEXT: shrq $2, %rax +; AVX2-NEXT: leaq (,%rax,8), %rdx +; AVX2-NEXT: subq %rax, %rdx +; AVX2-NEXT: subq %rdx, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 +; AVX2-NEXT: vmovq %xmm0, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rsi +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: addq %rdx, %rax +; AVX2-NEXT: shrq $2, %rax +; AVX2-NEXT: leaq (,%rax,8), %rdx +; AVX2-NEXT: subq %rax, %rdx +; AVX2-NEXT: subq %rdx, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm0 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = urem <4 x i64> %a, ret <4 x i64> %res } define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind { -; AVX-LABEL: test_rem7_8i32: -; AVX: # BB#0: -; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 -; AVX-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm2 -; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm1 -; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] -; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] -; AVX-NEXT: vpsubd %ymm1, %ymm0, %ymm2 -; AVX-NEXT: vpsrld $1, %ymm2, %ymm2 -; AVX-NEXT: vpaddd %ymm1, %ymm2, %ymm1 -; AVX-NEXT: vpsrld $2, %ymm1, %ymm1 -; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 -; AVX-NEXT: vpmulld %ymm2, %ymm1, %ymm1 -; AVX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_rem7_8i32: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrd $1, %xmm1, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vmovd %xmm1, %ecx +; AVX1-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rdx +; AVX1-NEXT: movl %ecx, %esi +; AVX1-NEXT: subl %edx, %esi +; AVX1-NEXT: shrl %esi +; AVX1-NEXT: addl %edx, %esi +; AVX1-NEXT: shrl $2, %esi +; AVX1-NEXT: leal (,%rsi,8), %edx +; AVX1-NEXT: subl %esi, %edx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm1, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm1, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vmovd %xmm0, %ecx +; AVX1-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rdx +; AVX1-NEXT: movl %ecx, %esi +; AVX1-NEXT: subl %edx, %esi +; AVX1-NEXT: shrl %esi +; AVX1-NEXT: addl %edx, %esi +; AVX1-NEXT: shrl $2, %esi +; AVX1-NEXT: leal (,%rsi,8), %edx +; AVX1-NEXT: subl %esi, %edx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrd $3, %xmm0, %eax +; AVX1-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_rem7_8i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpmuludq %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpsrld $1, %ymm2, %ymm2 +; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpsrld $2, %ymm1, %ymm1 +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = urem <8 x i32> %a, ret <8 x i32> %res } define <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind { -; AVX-LABEL: test_rem7_16i16: -; AVX: # BB#0: -; AVX-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 -; AVX-NEXT: vpsubw %ymm1, %ymm0, %ymm2 -; AVX-NEXT: vpsrlw $1, %ymm2, %ymm2 -; AVX-NEXT: vpaddw %ymm1, %ymm2, %ymm1 -; AVX-NEXT: vpsrlw $2, %ymm1, %ymm1 -; AVX-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 -; AVX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_rem7_16i16: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrw $1, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vmovd %xmm1, %ecx +; AVX1-NEXT: movzwl %cx, %edx +; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: movl %ecx, %esi +; AVX1-NEXT: subl %edx, %esi +; AVX1-NEXT: andl $65534, %esi # imm = 0xFFFE +; AVX1-NEXT: shrl %esi +; AVX1-NEXT: addl %edx, %esi +; AVX1-NEXT: shrl $2, %esi +; AVX1-NEXT: leal (,%rsi,8), %edx +; AVX1-NEXT: subl %esi, %edx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $2, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $3, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $4, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $5, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $7, %xmm1, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrw $1, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vmovd %xmm0, %ecx +; AVX1-NEXT: movzwl %cx, %edx +; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %edx +; AVX1-NEXT: movl %ecx, %esi +; AVX1-NEXT: subl %edx, %esi +; AVX1-NEXT: andl $65534, %esi # imm = 0xFFFE +; AVX1-NEXT: shrl %esi +; AVX1-NEXT: addl %edx, %esi +; AVX1-NEXT: shrl $2, %esi +; AVX1-NEXT: leal (,%rsi,8), %edx +; AVX1-NEXT: subl %esi, %edx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $2, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $3, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $4, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $5, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrw $7, %xmm0, %eax +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: subl %ecx, %edx +; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE +; AVX1-NEXT: shrl %edx +; AVX1-NEXT: addl %ecx, %edx +; AVX1-NEXT: shrl $2, %edx +; AVX1-NEXT: leal (,%rdx,8), %ecx +; AVX1-NEXT: subl %edx, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_rem7_16i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpsrlw $1, %ymm2, %ymm2 +; AVX2-NEXT: vpaddw %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpsrlw $2, %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = urem <16 x i16> %a, ret <16 x i16> %res } define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { -; AVX-LABEL: test_rem7_32i8: -; AVX: # BB#0: -; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX-NEXT: vpextrb $1, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %cl, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: movb $7, %cl -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %edx -; AVX-NEXT: vpextrb $0, %xmm1, %esi -; AVX-NEXT: imull $37, %esi, %edi -; AVX-NEXT: shrl $8, %edi -; AVX-NEXT: movb %sil, %al -; AVX-NEXT: subb %dil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %dil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %sil -; AVX-NEXT: movzbl %sil, %eax -; AVX-NEXT: vmovd %eax, %xmm2 -; AVX-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm1, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 -; AVX-NEXT: vpextrb $1, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %edx -; AVX-NEXT: vpextrb $0, %xmm0, %esi -; AVX-NEXT: imull $37, %esi, %edi -; AVX-NEXT: shrl $8, %edi -; AVX-NEXT: movb %sil, %al -; AVX-NEXT: subb %dil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %dil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %sil -; AVX-NEXT: movzbl %sil, %eax -; AVX-NEXT: vmovd %eax, %xmm2 -; AVX-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $2, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $3, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $4, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $5, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $6, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $7, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $8, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $9, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $10, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $11, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $12, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $13, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $14, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX-NEXT: vpextrb $15, %xmm0, %edx -; AVX-NEXT: imull $37, %edx, %esi -; AVX-NEXT: shrl $8, %esi -; AVX-NEXT: movb %dl, %al -; AVX-NEXT: subb %sil, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: addb %sil, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: mulb %cl -; AVX-NEXT: subb %al, %dl -; AVX-NEXT: movzbl %dl, %eax -; AVX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 -; AVX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: test_rem7_32i8: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrb $1, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %cl, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %cl, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movb $7, %cl +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %edx +; AVX1-NEXT: vpextrb $0, %xmm1, %esi +; AVX1-NEXT: imull $37, %esi, %edi +; AVX1-NEXT: shrl $8, %edi +; AVX1-NEXT: movb %sil, %al +; AVX1-NEXT: subb %dil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %dil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %sil +; AVX1-NEXT: movzbl %sil, %eax +; AVX1-NEXT: vmovd %eax, %xmm2 +; AVX1-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $15, %xmm1, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $1, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %edx +; AVX1-NEXT: vpextrb $0, %xmm0, %esi +; AVX1-NEXT: imull $37, %esi, %edi +; AVX1-NEXT: shrl $8, %edi +; AVX1-NEXT: movb %sil, %al +; AVX1-NEXT: subb %dil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %dil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %sil +; AVX1-NEXT: movzbl %sil, %eax +; AVX1-NEXT: vmovd %eax, %xmm2 +; AVX1-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX1-NEXT: vpextrb $15, %xmm0, %edx +; AVX1-NEXT: imull $37, %edx, %esi +; AVX1-NEXT: shrl $8, %esi +; AVX1-NEXT: movb %dl, %al +; AVX1-NEXT: subb %sil, %al +; AVX1-NEXT: shrb %al +; AVX1-NEXT: addb %sil, %al +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: mulb %cl +; AVX1-NEXT: subb %al, %dl +; AVX1-NEXT: movzbl %dl, %eax +; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_rem7_32i8: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrb $1, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %cl, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %cl, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movb $7, %cl +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %edx +; AVX2-NEXT: vpextrb $0, %xmm1, %esi +; AVX2-NEXT: imull $37, %esi, %edi +; AVX2-NEXT: shrl $8, %edi +; AVX2-NEXT: movb %sil, %al +; AVX2-NEXT: subb %dil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %dil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %sil +; AVX2-NEXT: movzbl %sil, %eax +; AVX2-NEXT: vmovd %eax, %xmm2 +; AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $15, %xmm1, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 +; AVX2-NEXT: vpextrb $1, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %edx +; AVX2-NEXT: vpextrb $0, %xmm0, %esi +; AVX2-NEXT: imull $37, %esi, %edi +; AVX2-NEXT: shrl $8, %edi +; AVX2-NEXT: movb %sil, %al +; AVX2-NEXT: subb %dil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %dil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %sil +; AVX2-NEXT: movzbl %sil, %eax +; AVX2-NEXT: vmovd %eax, %xmm2 +; AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX2-NEXT: vpextrb $15, %xmm0, %edx +; AVX2-NEXT: imull $37, %edx, %esi +; AVX2-NEXT: shrl $8, %esi +; AVX2-NEXT: movb %dl, %al +; AVX2-NEXT: subb %sil, %al +; AVX2-NEXT: shrb %al +; AVX2-NEXT: addb %sil, %al +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: mulb %cl +; AVX2-NEXT: subb %al, %dl +; AVX2-NEXT: movzbl %dl, %eax +; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq %res = urem <32 x i8> %a, ret <32 x i8> %res } -- 2.7.4