store <8 x i8> %strided.vec, <8 x i8>* %S
ret void
}
+
+define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62(<64 x i8> %x) {
+; AVX512F-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX512F-NEXT: vpshufb %xmm3, %xmm0, %xmm0
+; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX512VL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vpshufb %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX512VL-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
+; AVX512BW-NEXT: vpextrb $1, %xmm0, %ecx
+; AVX512BW-NEXT: vmovd %ecx, %xmm1
+; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
+; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
+; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
+; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
+; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
+; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
+; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
+; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
+; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
+; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
+; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
+; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
+; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
+; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
+; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
+; AVX512BWVL: # BB#0:
+; AVX512BWVL-NEXT: vpextrb $5, %xmm0, %eax
+; AVX512BWVL-NEXT: vpextrb $1, %xmm0, %ecx
+; AVX512BWVL-NEXT: vmovd %ecx, %xmm1
+; AVX512BWVL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $9, %xmm0, %eax
+; AVX512BWVL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $13, %xmm0, %eax
+; AVX512BWVL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; AVX512BWVL-NEXT: vpextrb $1, %xmm2, %eax
+; AVX512BWVL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $5, %xmm2, %eax
+; AVX512BWVL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $9, %xmm2, %eax
+; AVX512BWVL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $13, %xmm2, %eax
+; AVX512BWVL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; AVX512BWVL-NEXT: vpextrb $1, %xmm2, %eax
+; AVX512BWVL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $5, %xmm2, %eax
+; AVX512BWVL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $9, %xmm2, %eax
+; AVX512BWVL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $13, %xmm2, %eax
+; AVX512BWVL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512BWVL-NEXT: vpextrb $1, %xmm0, %eax
+; AVX512BWVL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $5, %xmm0, %eax
+; AVX512BWVL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $9, %xmm0, %eax
+; AVX512BWVL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpextrb $14, %xmm0, %eax
+; AVX512BWVL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+ %res = shufflevector <64 x i8> %x, <64 x i8> %x, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 62>
+ ret <16 x i8> %res
+}