bool ForLoadFold = false) {
// Set the OpNum parameter to the first source operand.
switch (Opcode) {
+ case X86::MMX_PUNPCKHBWirr:
+ case X86::MMX_PUNPCKHWDirr:
+ case X86::MMX_PUNPCKHDQirr:
+ case X86::MMX_PUNPCKLBWirr:
+ case X86::MMX_PUNPCKLWDirr:
+ case X86::MMX_PUNPCKLDQirr:
+ case X86::MOVHLPSrr:
case X86::PACKSSWBrr:
case X86::PACKUSWBrr:
case X86::PACKSSDWrr:
case X86::PUNPCKLDQrr:
case X86::PUNPCKHQDQrr:
case X86::PUNPCKLQDQrr:
+ case X86::SHUFPDrri:
+ case X86::SHUFPSrri:
// These instructions are sometimes used with an undef first or second
// source. Return true here so BreakFalseDeps will assign this source to the
// same register as the first source to avoid a false dependency.
// VEX counterparts.
return OpNum == 2 && !ForLoadFold;
+ case X86::VMOVLHPSrr:
+ case X86::VMOVLHPSZrr:
case X86::VPACKSSWBrr:
case X86::VPACKUSWBrr:
case X86::VPACKSSDWrr:
case X86::VPACKUSWBZ128rr:
case X86::VPACKSSDWZ128rr:
case X86::VPACKUSDWZ128rr:
+ case X86::VPERM2F128rr:
+ case X86::VPERM2I128rr:
+ case X86::VSHUFF32X4Z256rri:
+ case X86::VSHUFF32X4Zrri:
+ case X86::VSHUFF64X2Z256rri:
+ case X86::VSHUFF64X2Zrri:
+ case X86::VSHUFI32X4Z256rri:
+ case X86::VSHUFI32X4Zrri:
+ case X86::VSHUFI64X2Z256rri:
+ case X86::VSHUFI64X2Zrri:
case X86::VPUNPCKHBWrr:
case X86::VPUNPCKLBWrr:
case X86::VPUNPCKHBWYrr:
; X64-NEXT: movd %ecx, %mm2
; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3]
; X64-NEXT: movd %esi, %mm1
-; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
+; X64-NEXT: punpcklbw %mm1, %mm1 # mm1 = mm1[0,0,1,1,2,2,3,3]
; X64-NEXT: punpcklwd %mm2, %mm1 # mm1 = mm1[0],mm2[0],mm1[1],mm2[1]
; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
; X64-NEXT: paddd %mm1, %mm1
; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
; X86-NEXT: pxor %mm0, %mm0
; X86-NEXT: pxor %mm1, %mm1
-; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
+; X86-NEXT: punpcklbw %mm1, %mm1 # mm1 = mm1[0,0,1,1,2,2,3,3]
; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
; X64-NEXT: pxor %mm0, %mm0
; X64-NEXT: pxor %mm1, %mm1
-; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
+; X64-NEXT: punpcklbw %mm1, %mm1 # mm1 = mm1[0,0,1,1,2,2,3,3]
; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; X86-NEXT: movdqa %xmm0, %xmm3
-; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
+; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
; X86-NEXT: pmuludq %xmm1, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; X86-NEXT: movdqa %xmm0, %xmm3
-; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
+; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
; X86-NEXT: pmuludq %xmm1, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; X86-NEXT: movdqa %xmm0, %xmm3
-; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
+; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
; X86-NEXT: pmuludq %xmm1, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; X86-NEXT: movdqa %xmm0, %xmm3
-; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
+; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
; X86-NEXT: pmuludq %xmm1, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; X86-NEXT: movd %xmm1, %esi
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %esi
; X86-NEXT: movd %edx, %xmm2
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; X86-NEXT: movd %xmm1, %esi
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %esi
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: movd %xmm1, %edi
-; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; X86-NEXT: movd %xmm1, %ebx
; X86-NEXT: cltd
; X86-NEXT: idivl %ebx
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: movd %xmm1, %edi
-; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; X86-NEXT: movd %xmm1, %ebx
; X86-NEXT: cltd
; X86-NEXT: idivl %ebx