; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm1
+; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm1
+; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm1
+; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrld $16, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm1
+; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vmovd %xmm0, %eax
; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512BWVL-NEXT: vzeroupper