This can reuse the existing *_EXTEND node handling.
SDValue Y = N1.getOperand(0);
EVT XVT = X.getValueType();
SDLoc DL(N);
- if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
- HandOpcode == ISD::SIGN_EXTEND) {
+ if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode)) {
// If both operands have other uses, this transform would create extra
// instructions without eliminating anything.
if (!N0.hasOneUse() && !N1.hasOneUse())
return SDValue();
// Avoid infinite looping with PromoteIntBinOp.
// TODO: Should we apply desirable/legal constraints to all opcodes?
- if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
- !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
+ if ((HandOpcode == ISD::ANY_EXTEND ||
+ HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
return SDValue();
// logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
; SSE2-LABEL: v2i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-NEXT: pcmpgtb %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,1,1,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movmskpd %xmm1, %eax
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,1,1,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: movmskpd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; SSSE3-LABEL: v2i8:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
-; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; SSSE3-NEXT: pshufb %xmm1, %xmm0
; SSSE3-NEXT: pcmpgtb %xmm3, %xmm2
-; SSSE3-NEXT: pshufb %xmm1, %xmm2
; SSSE3-NEXT: pand %xmm0, %xmm2
+; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,0,u,u,u,u,u,u,u,1]
; SSSE3-NEXT: movmskpd %xmm2, %eax
; SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSSE3-NEXT: retq
; AVX12-LABEL: v2i8:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxbq %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxbq %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxbq %xmm0, %xmm0
; AVX12-NEXT: vmovmskpd %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
; SSE2-SSSE3-LABEL: v2i16:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,1,1,4,5,6,7]
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
-; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,1,1,4,5,6,7]
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
;
; AVX12-LABEL: v2i16:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxwq %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxwq %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxwq %xmm0, %xmm0
; AVX12-NEXT: vmovmskpd %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
; SSE2-SSSE3-LABEL: v2i32:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
-; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1]
+; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
;
; AVX12-LABEL: v2i32:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxdq %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX12-NEXT: vmovmskpd %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
; SSE2-SSSE3-LABEL: v4i8:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
-; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
; AVX12-LABEL: v4i8:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX12-NEXT: vmovmskps %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
; SSE2-SSSE3-LABEL: v4i16:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
; AVX12-LABEL: v4i16:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxwd %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX12-NEXT: vmovmskps %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
; SSE2-SSSE3-LABEL: v8i8:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm2
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; AVX12-LABEL: v8i8:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX12-NEXT: vpmovmskb %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX2-LABEL: eq_or_to_abs_vec4x16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; AVX2-NEXT: vpmovsxwd %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: eq_or_to_abs_vec4x16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
; SSE41-NEXT: pcmpeqw %xmm0, %xmm1
-; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: eq_or_to_abs_vec4x16:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: retq
%cmp1 = icmp eq <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
%cmp2 = icmp eq <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0
-; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: ne_and_to_abs_vec4x8:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pxor %xmm2, %xmm1
-; SSE41-NEXT: pmovsxbd %xmm1, %xmm1
; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
-; SSE41-NEXT: pand %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x8:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
%cmp2 = icmp ne <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: zext_or_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: retq
;
; AVX2-LABEL: zext_or_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retq
%xz = zext <8 x i8> %x to <8 x i16>
%yz = zext <8 x i8> %y to <8 x i16>
define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: zext_xor_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: retq
;
; AVX2-LABEL: zext_xor_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retq
%xz = zext <8 x i8> %x to <8 x i16>
%yz = zext <8 x i8> %y to <8 x i16>
define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: sext_and_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: sext_and_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX2-NEXT: retq
%xs = sext <8 x i8> %x to <8 x i16>
%ys = sext <8 x i8> %y to <8 x i16>
define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: sext_or_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: sext_or_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX2-NEXT: retq
%xs = sext <8 x i8> %x to <8 x i16>
%ys = sext <8 x i8> %y to <8 x i16>
define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: sext_xor_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: sext_xor_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX2-NEXT: retq
%xs = sext <8 x i8> %x to <8 x i16>
%ys = sext <8 x i8> %y to <8 x i16>