;
; AVX512BW-LABEL: splatvar_rotate_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm2
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm3
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm3
; AVX512BW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm4, %zmm1
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
-; AVX512BW-NEXT: vpandq %zmm1, %zmm3, %zmm1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512BW-NEXT: vpsubb %xmm2, %xmm3, %xmm2
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsrlw %xmm2, %zmm4, %zmm2
-; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsllw %xmm2, %zmm4, %zmm2
; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2
-; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512BW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1
+; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
+; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm2
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm3
+; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm3
; AVX512VLBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4
-; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm4, %zmm1
-; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1
-; AVX512VLBW-NEXT: vpandq %zmm1, %zmm3, %zmm1
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm3, %xmm2
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm4, %zmm2
-; AVX512VLBW-NEXT: vpsrlw $8, %zmm2, %zmm2
+; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm4, %zmm2
; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2
-; AVX512VLBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpandq %zmm2, %zmm3, %zmm2
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1
+; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1
+; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
%splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
%splat8 = sub <64 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %splat
; X32-SSE-NEXT: psrad $31, %xmm0
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
-; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm3
-; X32-SSE-NEXT: xorps %xmm4, %xmm4
-; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm1[0],xmm4[1,2,3]
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648]
-; X32-SSE-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE-NEXT: psrlq %xmm4, %xmm1
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,0,4294967295,0]
+; X32-SSE-NEXT: pand %xmm1, %xmm3
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
+; X32-SSE-NEXT: movdqa %xmm4, %xmm0
; X32-SSE-NEXT: psrlq %xmm3, %xmm0
-; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT: xorps %xmm5, %xmm5
+; X32-SSE-NEXT: movss {{.*#+}} xmm5 = xmm1[0],xmm5[1,2,3]
+; X32-SSE-NEXT: psrlq %xmm5, %xmm4
+; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
; X32-SSE-NEXT: movdqa %xmm2, %xmm0
-; X32-SSE-NEXT: psrlq %xmm4, %xmm0
+; X32-SSE-NEXT: psrlq %xmm5, %xmm0
; X32-SSE-NEXT: psrlq %xmm3, %xmm2
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; X32-SSE-NEXT: xorpd %xmm1, %xmm0
-; X32-SSE-NEXT: psubq %xmm1, %xmm0
+; X32-SSE-NEXT: xorpd %xmm4, %xmm0
+; X32-SSE-NEXT: psubq %xmm4, %xmm0
; X32-SSE-NEXT: retl
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
%shift = ashr <2 x i32> %a, %splat