RegisterClass SrcRC, X86VectorVTInfo DstVT,
X86MemOperand x86memop, PatFrag ld_frag, string asm,
string mem> {
+let ExeDomain = DstVT.ExeDomain in {
let hasSideEffects = 0, isCodeGenOnly = 1 in {
def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, SrcRC:$src),
(OpNode (DstVT.VT DstVT.RC:$src1),
(ld_frag addr:$src2)))]>,
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+}
def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
X86FoldableSchedWrite sched, RegisterClass SrcRC,
X86VectorVTInfo DstVT, string asm,
string mem> {
+ let ExeDomain = DstVT.ExeDomain in
def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
(ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
!strconcat(asm,
SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
string aliasStr> {
- let Predicates = [HasAVX512] in {
+ let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
X86VectorVTInfo _DstRC, SDNode OpNode,
SDNode OpNodeInt, SDNode OpNodeSAE,
X86FoldableSchedWrite sched, string aliasStr>{
-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, string mem, X86FoldableSchedWrite sched,
+ Domain d,
SchedRead Int2Fpu = ReadDefault> {
+ let ExeDomain = d in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (OpNode SrcRC:$src))]>,
mem#"\t{$src, $dst|$dst, $src}",
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
Sched<[sched.Folded]>;
+ }
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm, string mem,
- X86FoldableSchedWrite sched> {
-let hasSideEffects = 0, Predicates = [UseAVX] in {
+ X86FoldableSchedWrite sched, Domain d> {
+let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Sched<[sched, ReadDefault, ReadInt2Fpu]>;
let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I>,
+ WriteCvtSS2I, SSEPackedSingle>,
XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I>,
+ WriteCvtSS2I, SSEPackedSingle>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I>,
+ WriteCvtSD2I, SSEPackedDouble>,
XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I>,
+ WriteCvtSD2I, SSEPackedDouble>,
XD, VEX, VEX_W, VEX_LIG;
}
// where appropriate to do so.
let isCodeGenOnly = 1 in {
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
- WriteCvtI2SS>, XS, VEX_4V, VEX_LIG, SIMD_EXC;
+ WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
+ VEX_LIG, SIMD_EXC;
defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
- WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC;
+ WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
+ VEX_W, VEX_LIG, SIMD_EXC;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
- WriteCvtI2SD>, XD, VEX_4V, VEX_LIG;
+ WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
+ VEX_LIG;
defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
- WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC;
+ WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
+ VEX_W, VEX_LIG, SIMD_EXC;
} // isCodeGenOnly = 1
let Predicates = [UseAVX] in {
let isCodeGenOnly = 1 in {
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I>, XS, SIMD_EXC;
+ WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
- WriteCvtSS2I>, XS, REX_W, SIMD_EXC;
+ WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I>, XD, SIMD_EXC;
+ WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
- WriteCvtSD2I>, XD, REX_W, SIMD_EXC;
+ WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
"cvtsi2ss", "cvtsi2ss{l}",
- WriteCvtI2SS, ReadInt2Fpu>, XS, SIMD_EXC;
+ WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
"cvtsi2ss", "cvtsi2ss{q}",
- WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W, SIMD_EXC;
+ WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
"cvtsi2sd", "cvtsi2sd{l}",
- WriteCvtI2SD, ReadInt2Fpu>, XD;
+ WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD;
defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
"cvtsi2sd", "cvtsi2sd{q}",
- WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
+ WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
} // isCodeGenOnly = 1
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
ValueType DstVT, ValueType SrcVT, SDNode OpNode,
Operand memop, ComplexPattern mem_cpat, string asm,
- X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched, Domain d> {
+let ExeDomain = d in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
[(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>,
Sched<[sched.Folded]>;
}
+}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, X86MemOperand x86memop,
string asm, string mem, X86FoldableSchedWrite sched,
- bit Is2Addr = 1> {
-let hasSideEffects = 0 in {
+ Domain d, bit Is2Addr = 1> {
+let hasSideEffects = 0, ExeDomain = d in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
let Predicates = [UseAVX] in {
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
- WriteCvtSD2I>, XD, VEX, VEX_LIG;
+ WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
- WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG;
+ WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG;
}
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
- sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD;
+ sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
+ SSEPackedDouble>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
- sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W;
+ sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
+ SSEPackedDouble>, XD, REX_W;
}
let Predicates = [UseAVX] in {
defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, SIMD_EXC;
+ i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>,
+ XS, VEX_4V, VEX_LIG, SIMD_EXC;
defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
+ i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>,
+ XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG;
+ i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>,
+ XD, VEX_4V, VEX_LIG;
defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
+ i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>,
+ XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
}
let Constraints = "$src1 = $dst" in {
defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS, SIMD_EXC;
+ i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>,
+ XS, SIMD_EXC;
defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W, SIMD_EXC;
+ i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>,
+ XS, REX_W, SIMD_EXC;
defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD;
+ i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>,
+ XD;
defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W, SIMD_EXC;
+ i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>,
+ XD, REX_W, SIMD_EXC;
}
def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
- WriteCvtSS2I>, XS, VEX, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
- "cvttss2si", WriteCvtSS2I>,
+ "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
XS, VEX, VEX_LIG, VEX_W;
defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
- WriteCvtSS2I>, XD, VEX, VEX_LIG;
+ WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
- "cvttsd2si", WriteCvtSS2I>,
+ "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>,
XD, VEX, VEX_LIG, VEX_W;
}
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
- WriteCvtSS2I>, XS;
+ WriteCvtSS2I, SSEPackedSingle>, XS;
defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
- "cvttss2si", WriteCvtSS2I>, XS, REX_W;
+ "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
+ XS, REX_W;
defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
- WriteCvtSD2I>, XD;
+ WriteCvtSD2I, SSEPackedDouble>, XD;
defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
- "cvttsd2si", WriteCvtSD2I>, XD, REX_W;
+ "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>,
+ XD, REX_W;
}
def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I>, XS, VEX, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG;
}
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I>, XS;
+ WriteCvtSS2I, SSEPackedSingle>, XS;
defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I>, XS, REX_W;
+ WriteCvtSS2I, SSEPackedSingle>, XS, REX_W;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
ValueType vt, X86MemOperand x86memop,
PatFrag ld_frag, string OpcodeStr, Domain d,
X86FoldableSchedWrite sched = WriteFCom> {
-let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in {
+let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
+ ExeDomain = d in {
def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
ComplexPattern mem_cpat, string OpcodeStr,
Domain d,
X86FoldableSchedWrite sched = WriteFCom> {
-let Uses = [MXCSR], mayRaiseFPException = 1 in {
+let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = d in {
def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; NODQ-NEXT: retq
; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; NODQ-NEXT: retq
;
; NODQ-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; NODQ-NEXT: retq
;
; VLDQ-LABEL: slto2f64:
; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VLNODQ-NEXT: vmovq %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; VLNODQ-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; VLNODQ-NEXT: retq
;
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
-; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3
+; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; NODQ-NEXT: retq
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm3
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm2
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm3
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
-; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
; NODQ-NEXT: retq
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
-; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3
+; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
; SSE2-LABEL: trunc_signed_v2f64:
; SSE2: # %bb.0:
; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE2-NEXT: cvttsd2si %xmm0, %rcx
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
; SSE2-NEXT: cvtsi2sd %rcx, %xmm1
-; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: trunc_signed_v2f64:
; SSE2-LABEL: trunc_signed_v4f64:
; SSE2: # %bb.0:
; SSE2-NEXT: cvttsd2si %xmm1, %rax
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
; SSE2-NEXT: cvttsd2si %xmm1, %rcx
; SSE2-NEXT: cvttsd2si %xmm0, %rdx
-; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE2-NEXT: cvttsd2si %xmm0, %rsi
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2sd %rdx, %xmm0
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2sd %rsi, %xmm1
-; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
; SSE2-NEXT: cvtsi2sd %rcx, %xmm2
-; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: trunc_signed_v4f64:
;
; ALL-LABEL: test_zext_cmp11:
; ALL: # %bb.0: # %entry
-; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; ALL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; ALL-NEXT: vucomisd %xmm2, %xmm0
; ALL-NEXT: sete %al
; ALL-NEXT: vucomisd %xmm2, %xmm1
; CHECK-NEXT: movq %xmm2, %rax
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: cvtsi2sd %rax, %xmm2
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
; CHECK-NEXT: retq
define double @d(double %val) nounwind {
; SSE-LABEL: d:
; SSE: # %bb.0:
-; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: xorpd %xmm1, %xmm1
; SSE-NEXT: ucomisd %xmm1, %xmm0
; SSE-NEXT: jb .LBB1_2
; SSE-NEXT: # %bb.1: # %.split
;
; AVX-LABEL: d:
; AVX: # %bb.0:
-; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vucomisd %xmm1, %xmm0
; AVX-NEXT: jb .LBB1_2
; AVX-NEXT: # %bb.1: # %.split
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl $g, %eax
; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: ucomisd %xmm1, %xmm0
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: jnp .LBB0_2
; SSE: # %bb.0:
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movq %rax, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE: # %bb.0:
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movq %rax, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; SSE-NEXT: cvttsd2si %xmm1, %rax
; SSE-NEXT: movq %rax, %xmm3
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: cvttsd2si %xmm1, %rax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
; SSE-LABEL: fptoui_2f64_to_4i32:
; SSE: # %bb.0:
; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: cvttsd2si %xmm0, %rcx
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movd %ecx, %xmm1
; SSE: # %bb.0:
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE: # %bb.0:
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE: # %bb.0:
; SSE-NEXT: cvttsd2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: cvttsd2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
-; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_2i64_to_2f64:
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2sd %rax, %xmm0
-; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE41-NEXT: retq
;
; VEX-LABEL: sitofp_2i64_to_2f64:
; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
-; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_2f64:
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_2f64:
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_2i64_to_2f64:
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
-; SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; SSE2-NEXT: movq %xmm1, %rax
; SSE2-NEXT: cvtsi2sd %rax, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
-; SSE2-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: movaps %xmm3, %xmm1
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: movapd %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_4i64_to_4f64:
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2sd %rax, %xmm0
-; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE41-NEXT: pextrq $1, %xmm1, %rax
; SSE41-NEXT: xorps %xmm2, %xmm2
; SSE41-NEXT: cvtsi2sd %rax, %xmm2
; SSE41-NEXT: movq %xmm1, %rax
; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: cvtsi2sd %rax, %xmm1
-; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE41-NEXT: retq
;
; AVX1-LABEL: sitofp_4i64_to_4f64:
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
-; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE2-NEXT: retq
;
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; SSE2-NEXT: movq %xmm1, %rax
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
-; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_load_2i64_to_2f64:
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2sd %rax, %xmm0
-; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE41-NEXT: retq
;
; VEX-LABEL: sitofp_load_2i64_to_2f64:
; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
-; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
; SSE2-NEXT: movq %xmm1, %rax
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
-; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: movq %xmm2, %rax
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
; SSE2-NEXT: movq %xmm2, %rax
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: cvtsi2sd %rax, %xmm2
-; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_load_4i64_to_4f64:
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2sd %rax, %xmm0
-; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE41-NEXT: pextrq $1, %xmm1, %rax
; SSE41-NEXT: xorps %xmm2, %xmm2
; SSE41-NEXT: cvtsi2sd %rax, %xmm2
; SSE41-NEXT: movq %xmm1, %rax
; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: cvtsi2sd %rax, %xmm1
-; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE41-NEXT: retq
;
; VEX-LABEL: sitofp_load_4i64_to_4f64:
; VEX: # %bb.0:
-; VEX-NEXT: vmovdqa (%rdi), %xmm0
+; VEX-NEXT: vmovapd (%rdi), %xmm0
; VEX-NEXT: vmovdqa 16(%rdi), %xmm1
; VEX-NEXT: vpextrq $1, %xmm1, %rax
; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; VEX-NEXT: vmovq %xmm1, %rax
; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; VEX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; VEX-NEXT: vpextrq $1, %xmm0, %rax
; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
-; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_load_4i64_to_4f64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512F-NEXT: vmovapd (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
-; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_load_4i64_to_4f64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512VL-NEXT: vmovapd (%rdi), %xmm0
; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
-; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
;
; VEX-LABEL: sitofp_load_8i64_to_8f32:
; VEX: # %bb.0:
-; VEX-NEXT: vmovdqa (%rdi), %xmm0
+; VEX-NEXT: vmovaps (%rdi), %xmm0
; VEX-NEXT: vmovdqa 16(%rdi), %xmm1
; VEX-NEXT: vmovdqa 32(%rdi), %xmm2
; VEX-NEXT: vmovdqa 48(%rdi), %xmm3
;
; AVX512F-LABEL: sitofp_load_8i64_to_8f32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512F-NEXT: vmovaps (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3
;
; AVX512VL-LABEL: sitofp_load_8i64_to_8f32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512VL-NEXT: vmovaps (%rdi), %xmm0
; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2
; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3
; VEX-LABEL: uitofp_load_4i64_to_4f32:
; VEX: # %bb.0:
; VEX-NEXT: vmovdqa (%rdi), %xmm2
-; VEX-NEXT: vmovdqa 16(%rdi), %xmm0
+; VEX-NEXT: vmovaps 16(%rdi), %xmm0
; VEX-NEXT: vpextrq $1, %xmm2, %rax
; VEX-NEXT: testq %rax, %rax
; VEX-NEXT: js .LBB81_1
; VEX-LABEL: uitofp_load_8i64_to_8f32:
; VEX: # %bb.0:
; VEX-NEXT: vmovdqa (%rdi), %xmm1
-; VEX-NEXT: vmovdqa 16(%rdi), %xmm0
+; VEX-NEXT: vmovaps 16(%rdi), %xmm0
; VEX-NEXT: vmovdqa 32(%rdi), %xmm4
; VEX-NEXT: vmovdqa 48(%rdi), %xmm3
; VEX-NEXT: vpextrq $1, %xmm4, %rax
;
; AVX512F-LABEL: uitofp_load_8i64_to_8f32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512F-NEXT: vmovaps (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3
;
; AVX512VL-LABEL: uitofp_load_8i64_to_8f32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512VL-NEXT: vmovaps (%rdi), %xmm0
; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2
; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3