multiclass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
string OpcodeStr> {
-let ExeDomain = GenericDomain, hasSideEffects = 0 in {
+let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, Sched<[WriteFAddLd, ReadAfterLd]>;
+} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
+let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, Sched<[WriteFAddLd, ReadAfterLd]>;
-} // ExeDomain = GenericDomain, hasSideEffects = 0
+} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
}
multiclass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd,
string OpcodeStr> {
-let ExeDomain = GenericDomain, hasSideEffects = 0 in {
+let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, Sched<[WriteFAddLd, ReadAfterLd]>;
+} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
+let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, Sched<[WriteFAddLd, ReadAfterLd]>;
-} // ExeDomain = GenericDomain, hasSideEffects = 0
+} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
}
multiclass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd,
string OpcodeStr,
Intrinsic F32Int,
Intrinsic F64Int, bit Is2Addr = 1> {
-let ExeDomain = GenericDomain, isCodeGenOnly = 1 in {
+let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
!if(Is2Addr,
[(set VR128:$dst,
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
Sched<[WriteFAddLd, ReadAfterLd]>;
+} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
+let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
!if(Is2Addr,
[(set VR128:$dst,
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
Sched<[WriteFAddLd, ReadAfterLd]>;
-} // ExeDomain = GenericDomain, isCodeGenOnly = 1
+} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
}
// FP round - roundss, roundps, roundsd, roundpd
define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_roundsd:
; GENERIC: # BB#0:
-; GENERIC-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; GENERIC-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
; GENERIC-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
; GENERIC-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
;
; SLM-LABEL: test_roundsd:
; SLM: # BB#0:
-; SLM-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
; SLM-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]