}
define <vscale x 8 x i32> @vdivu_vi_mask_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %mask) {
-; CHECK-LABEL: vdivu_vi_mask_nxv8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vmv.v.i v12, 1
-; CHECK-NEXT: vmerge.vim v12, v12, 7, v0
-; CHECK-NEXT: vdivu.vv v8, v8, v12
-; CHECK-NEXT: ret
+; RV32-LABEL: vdivu_vi_mask_nxv8i32:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a0, 149797
+; RV32-NEXT: addi a0, a0, -1755
+; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT: vmulhu.vx v12, v8, a0
+; RV32-NEXT: vsub.vv v16, v8, v12
+; RV32-NEXT: vsrl.vi v16, v16, 1
+; RV32-NEXT: vadd.vv v12, v16, v12
+; RV32-NEXT: vsrl.vi v8, v12, 2, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vdivu_vi_mask_nxv8i32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a0, 149797
+; RV64-NEXT: addiw a0, a0, -1755
+; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
+; RV64-NEXT: vmulhu.vx v12, v8, a0
+; RV64-NEXT: vsub.vv v16, v8, v12
+; RV64-NEXT: vsrl.vi v16, v16, 1
+; RV64-NEXT: vadd.vv v12, v16, v12
+; RV64-NEXT: vsrl.vi v8, v12, 2, v0.t
+; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
%one = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
%head2 = insertelement <vscale x 8 x i32> poison, i32 7, i32 0
; CHECK-X64-V3-LABEL: udiv_identity_const:
; CHECK-X64-V3: # %bb.0:
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V4: # %bb.0:
; CHECK-X64-V4-NEXT: vpsllq $63, %xmm0, %xmm0
; CHECK-X64-V4-NEXT: vpmovq2m %xmm0, %k1
-; CHECK-X64-V4-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1]
-; CHECK-X64-V4-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1}
-; CHECK-X64-V4-NEXT: vpextrq $1, %xmm0, %rcx
-; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rax
-; CHECK-X64-V4-NEXT: xorl %edx, %edx
-; CHECK-X64-V4-NEXT: divq %rcx
-; CHECK-X64-V4-NEXT: movq %rax, %rcx
-; CHECK-X64-V4-NEXT: vmovq %xmm0, %rsi
-; CHECK-X64-V4-NEXT: vmovq %xmm1, %rax
-; CHECK-X64-V4-NEXT: xorl %edx, %edx
-; CHECK-X64-V4-NEXT: divq %rsi
+; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rdx
+; CHECK-X64-V4-NEXT: movabsq $3353953467947191203, %rax # imm = 0x2E8BA2E8BA2E8BA3
+; CHECK-X64-V4-NEXT: mulxq %rax, %rcx, %rcx
; CHECK-X64-V4-NEXT: vmovq %rcx, %xmm0
-; CHECK-X64-V4-NEXT: vmovq %rax, %xmm1
-; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-X64-V4-NEXT: vmovq %xmm1, %rdx
+; CHECK-X64-V4-NEXT: mulxq %rax, %rax, %rax
+; CHECK-X64-V4-NEXT: vmovq %rax, %xmm2
+; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; CHECK-X64-V4-NEXT: vpsrlq $1, %xmm0, %xmm1 {%k1}
+; CHECK-X64-V4-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-X64-V4-NEXT: retq
%d = select <2 x i1> %c, <2 x i64> <i64 11, i64 11>, <2 x i64> <i64 1, i64 1>
%r = udiv <2 x i64> %x, %d
; CHECK-X64-V3-LABEL: udiv_identity_const_todo_getter_nonzero:
; CHECK-X64-V3: # %bb.0:
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
; CHECK-X64-V3-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; CHECK-X64-V3-NEXT: vpsubq %xmm3, %xmm2, %xmm2
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm3 = [1,1]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm3 = [1,1]
+; CHECK-X64-V3-NEXT: # xmm3 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V3-LABEL: udiv_indentity_partial_zero:
; CHECK-X64-V3: # %bb.0:
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V3-LABEL: urem_identity_const:
; CHECK-X64-V3: # %bb.0:
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [11,11]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [11,11]
+; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V3-LABEL: sdiv_identity_const:
; CHECK-X64-V3: # %bb.0:
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V3-LABEL: sdiv_identity_const_todo_better_nonzero:
; CHECK-X64-V3: # %bb.0:
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V3-LABEL: srem_identity_const:
; CHECK-X64-V3: # %bb.0:
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [11,11]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [11,11]
+; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V3-LABEL: udivrem_identity_const:
; CHECK-X64-V3: # %bb.0:
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V3-LABEL: sdivrem_identity_const:
; CHECK-X64-V3: # %bb.0:
; CHECK-X64-V3-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT: vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT: # xmm2 = mem[0,0]
; CHECK-X64-V3-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm0, %rcx
; CHECK-X64-V3-NEXT: vpextrq $1, %xmm1, %rax