From: Craig Topper Date: Wed, 30 Oct 2019 21:56:19 +0000 (-0700) Subject: [X86] Model MXCSR for all SSE instructions X-Git-Tag: llvmorg-11-init~5433 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8f48ba993ba32925f37a374f624663da37d96254;p=platform%2Fupstream%2Fllvm.git [X86] Model MXCSR for all SSE instructions This patch adds MXCSR as a reserved physical register and models its use by X86 SSE instructions. It also adds flag "mayRaiseFPException" for the instructions that possibly can raise FP exception according to the architecture definition. Following what SystemZ and other targets does, only the current rounding modes and the IEEE exception masks are modeled. *Changes* of the MXCSR due to exceptions are not modeled. Patch by Pengfei Wang Differential Revision: https://reviews.llvm.org/D68121 --- diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index e8f0d937..ac26850 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -227,6 +227,7 @@ class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; } class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; } class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; } class NOTRACK { bit hasNoTrackPrefix = 1; } +class SIMD_EXC { list Uses = [MXCSR]; bit mayRaiseFPException = 1; } // Specify AVX512 8-bit compressed displacement encoding based on the vector // element size in bits (8, 16, 32, 64) and the CDisp8 form. diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 09a04c0..ffdcb65 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -837,7 +837,7 @@ multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_p opc, RegisterClass RC, X86MemOperand x86memop, ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, string asm, Domain d, X86FoldableSchedWrite sched> { -let hasSideEffects = 0 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : I, Sched<[sched]>; @@ -864,7 +864,7 @@ let hasSideEffects = 0, Predicates = [UseAVX] in { } // hasSideEffects = 0 } -let isCodeGenOnly = 1, Predicates = [UseAVX] in { +let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", WriteCvtSS2I>, @@ -889,13 +889,13 @@ defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, // where appropriate to do so. let isCodeGenOnly = 1 in { defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", - WriteCvtI2SS>, XS, VEX_4V, VEX_LIG; + WriteCvtI2SS>, XS, VEX_4V, VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", - WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG; + WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", - WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG; + WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; } // isCodeGenOnly = 1 let Predicates = [UseAVX] in { @@ -921,28 +921,28 @@ let Predicates = [UseAVX] in { let isCodeGenOnly = 1 in { defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS; + WriteCvtSS2I>, XS, SIMD_EXC; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS, REX_W; + WriteCvtSS2I>, XS, REX_W, SIMD_EXC; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD; + WriteCvtSD2I>, XD, SIMD_EXC; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD, REX_W; + WriteCvtSD2I>, XD, REX_W, SIMD_EXC; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, "cvtsi2ss", "cvtsi2ss{l}", - WriteCvtI2SS, ReadInt2Fpu>, XS; + WriteCvtI2SS, ReadInt2Fpu>, XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, "cvtsi2ss", "cvtsi2ss{q}", - WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W; + WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, "cvtsi2sd", "cvtsi2sd{l}", WriteCvtI2SD, ReadInt2Fpu>, XD; defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd", "cvtsi2sd{q}", - WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W; + WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; } // isCodeGenOnly = 1 // Conversion Instructions Intrinsics - Match intrinsics which expect MM @@ -982,6 +982,7 @@ let hasSideEffects = 0 in { } } +let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [UseAVX] in { defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", @@ -994,27 +995,27 @@ defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W; - +} let Predicates = [UseAVX] in { defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; } let Constraints = "$src1 = $dst" in { defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD; defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W, SIMD_EXC; } def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1048,7 +1049,7 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", /// SSE 1 Only // Aliases for intrinsics -let Predicates = [UseAVX] in { +let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", WriteCvtSS2I>, XS, VEX, VEX_LIG; @@ -1064,6 +1065,7 @@ defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, "cvttsd2si", WriteCvtSS2I>, XD, VEX, VEX_LIG, VEX_W; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", WriteCvtSS2I>, XS; @@ -1076,6 +1078,7 @@ defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", WriteCvtSD2I>, XD, REX_W; +} def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; @@ -1111,7 +1114,7 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; -let Predicates = [UseAVX] in { +let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", WriteCvtSS2I>, XS, VEX, VEX_LIG; @@ -1119,6 +1122,7 @@ defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", WriteCvtSS2I>, XS; @@ -1139,6 +1143,7 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, WriteCvtI2PS>, PS, Requires<[UseSSE2]>; +} // AVX aliases def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", @@ -1184,13 +1189,13 @@ def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSD2SS]>; + Sched<[WriteCvtSD2SS]>, SIMD_EXC; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; + Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; } def : Pat<(f32 (fpround FR64:$src)), @@ -1201,14 +1206,15 @@ let isCodeGenOnly = 1 in { def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fpround FR64:$src))]>, - Sched<[WriteCvtSD2SS]>; + Sched<[WriteCvtSD2SS]>, SIMD_EXC; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>, XD, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSD2SS.Folded]>; + Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1238,6 +1244,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; } +} // Convert scalar single to scalar double // SSE2 instructions with XS prefix @@ -1246,14 +1253,14 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>; + Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, - Requires<[UseAVX, OptForSize]>; + Requires<[UseAVX, OptForSize]>, SIMD_EXC; } // isCodeGenOnly = 1, hasSideEffects = 0 def : Pat<(f64 (fpextend FR32:$src)), @@ -1265,15 +1272,15 @@ let isCodeGenOnly = 1 in { def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fpextend FR32:$src))]>, - XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>; + XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>, XS, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSS2SD.Folded]>; + Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; } // isCodeGenOnly = 1 -let hasSideEffects = 0 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1418,36 +1425,36 @@ let Predicates = [HasAVX, NoVLX] in { def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, - VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; + VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, - VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; + VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, - VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; + VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, - VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG; + VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC; } def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, - Sched<[WriteCvtPS2I]>; + Sched<[WriteCvtPS2I]>, SIMD_EXC; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, - Sched<[WriteCvtPS2ILd]>; + Sched<[WriteCvtPS2ILd]>, SIMD_EXC; // Convert Packed Double FP to Packed DW Integers -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -1486,15 +1493,16 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, - Sched<[WriteCvtPD2ILd]>; + Sched<[WriteCvtPD2ILd]>, SIMD_EXC; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, - Sched<[WriteCvtPD2I]>; + Sched<[WriteCvtPD2I]>, SIMD_EXC; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix +let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [HasAVX, NoVLX] in { def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", @@ -1529,11 +1537,12 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>, Sched<[WriteCvtPS2ILd]>; +} // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // XMM only def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", @@ -1575,15 +1584,15 @@ def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>, - Sched<[WriteCvtPD2I]>; + Sched<[WriteCvtPD2I]>, SIMD_EXC; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>, - Sched<[WriteCvtPD2ILd]>; + Sched<[WriteCvtPD2ILd]>, SIMD_EXC; // Convert packed single to packed double -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", @@ -1603,7 +1612,7 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG; } -let Predicates = [UseSSE2] in { +let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, @@ -1674,7 +1683,7 @@ let Predicates = [UseSSE2] in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // XMM only def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", @@ -1703,11 +1712,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, - Sched<[WriteCvtPD2PS]>; + Sched<[WriteCvtPD2PS]>, SIMD_EXC; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>, - Sched<[WriteCvtPD2PS.Folded]>; + Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), @@ -1725,6 +1734,7 @@ multiclass sse12_cmp_scalar { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let isCommutable = 1 in def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, @@ -1736,6 +1746,7 @@ multiclass sse12_cmp_scalar, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let isCodeGenOnly = 1 in { let ExeDomain = SSEPackedSingle in @@ -1763,6 +1774,7 @@ let isCodeGenOnly = 1 in { multiclass sse12_cmp_scalar_int { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src, u8imm:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, @@ -1775,6 +1787,7 @@ let mayLoad = 1 in mem_cpat:$src, timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} // Aliases to match intrinsics which expect XMM operand(s). let ExeDomain = SSEPackedSingle in @@ -1804,7 +1817,7 @@ multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, ValueType vt, X86MemOperand x86memop, PatFrag ld_frag, string OpcodeStr, X86FoldableSchedWrite sched> { -let hasSideEffects = 0 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def rr: SI, @@ -1823,6 +1836,7 @@ multiclass sse12_ord_cmp_int opc, RegisterClass RC, SDNode OpNode, ValueType vt, Operand memop, ComplexPattern mem_cpat, string OpcodeStr, X86FoldableSchedWrite sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr_Int: SI, @@ -1834,6 +1848,7 @@ let mayLoad = 1 in mem_cpat:$src2))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, @@ -1888,6 +1903,7 @@ multiclass sse12_cmp_packed { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let isCommutable = 1 in def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, @@ -1899,6 +1915,7 @@ multiclass sse12_cmp_packed, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} defm VCMPPS : sse12_cmp_packed opc, string OpcodeStr, SDNode OpNode, X86SchedWriteSizes sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PS : sse12_fp_packed opc, string OpcodeStr, sched.PD.XMM>, PD; } } +} multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, X86SchedWriteSizes sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm V#NAME#SS : sse12_fp_scalar, XS, VEX_4V, VEX_LIG, VEX_WIG; @@ -2599,10 +2619,12 @@ multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, sched.PD.Scl>, XD; } } +} multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteSizes sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V, VEX_LIG, VEX_WIG; @@ -2619,6 +2641,7 @@ multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, SSEPackedDouble, sched.PD.Scl>, XD; } } +} // Binary Arithmetic instructions defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>, @@ -2964,7 +2987,7 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>; + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>, SIMD_EXC; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. @@ -4436,6 +4459,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), multiclass sse3_addsub { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : I<0xD0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, @@ -4451,6 +4475,7 @@ multiclass sse3_addsub, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { @@ -4488,6 +4513,7 @@ multiclass S3D_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag ld_frag, bit Is2Addr = 1> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : S3DI o, string OpcodeStr, ValueType vt, RegisterClass RC, [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag ld_frag, bit Is2Addr = 1> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : S3I o, string OpcodeStr, ValueType vt, RegisterClass RC, [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { @@ -5348,6 +5377,7 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, X86FoldableSchedWrite sched> { // Intrinsic operation, reg. // Vector intrinsic operation, reg +let Uses = [MXCSR], mayRaiseFPException = 1 in { def r : SS4AIi8 opc, string OpcodeStr, (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, Sched<[sched.Folded]>; } +} multiclass avx_fp_unop_rm opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { @@ -5400,6 +5431,7 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { multiclass sse41_fp_unop_s opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { def SSr : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, hasSideEffects = 0 } +} multiclass sse41_fp_binop_s opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched, ValueType VT32, ValueType VT64, SDNode OpNode, bit Is2Addr = 1> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in { def SSr_Int : SS4AIi8; } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 } +} // FP round - roundss, roundps, roundsd, roundpd let Predicates = [HasAVX, NoVLX] in { @@ -5959,6 +5994,7 @@ let Predicates = [HasAVX] in { SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, load, f128mem, 0, @@ -5972,6 +6008,7 @@ let Predicates = [HasAVX] in { VR256, load, i256mem, 0, SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; } +} let Predicates = [HasAVX2] in { let isCommutable = 0 in { @@ -5991,11 +6028,11 @@ let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memop, f128mem, 1, - SchedWriteDPPS.XMM>; + SchedWriteDPPS.XMM>, SIMD_EXC; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, VR128, memop, f128mem, 1, - SchedWriteDPPD.XMM>; + SchedWriteDPPD.XMM>, SIMD_EXC; } /// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 9362c60..024a13f 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -523,6 +523,9 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Set the floating point control register as reserved. Reserved.set(X86::FPCW); + // Set the SIMD floating point control register as reserved. + Reserved.set(X86::MXCSR); + // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); ++I) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 0528b90..f26c2d4 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -294,6 +294,11 @@ def FPSW : X86Reg<"fpsr", 0>; // Floating-point control word def FPCW : X86Reg<"fpcr", 0>; +// SIMD Floating-point control register. +// Note: We only model the current rounding modes and the IEEE masks. +// IEEE flags, FTZ and DAZ are not modeled here. +def MXCSR : X86Reg<"mxcsr", 0>; + // Status flags register. // // Note that some flags that are commonly thought of as part of the status diff --git a/llvm/test/CodeGen/MIR/X86/constant-pool.mir b/llvm/test/CodeGen/MIR/X86/constant-pool.mir index 69a436b..85b2071 100644 --- a/llvm/test/CodeGen/MIR/X86/constant-pool.mir +++ b/llvm/test/CodeGen/MIR/X86/constant-pool.mir @@ -61,12 +61,12 @@ constants: alignment: 4 body: | bb.0.entry: - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg - ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _ - $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr + ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr + $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... --- @@ -89,10 +89,10 @@ constants: value: 'float 6.250000e+00' body: | bb.0.entry: - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _ - $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr + $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... --- @@ -117,12 +117,12 @@ constants: alignment: 1 body: | bb.0.entry: - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg - ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _ - $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr + ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr + $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... --- @@ -135,11 +135,11 @@ constants: value: 'float 6.250000e+00' body: | bb.0.entry: - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.1 - 12, $noreg - ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.0 + 8, $noreg - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.1 - 12, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.0 + 8, _ - $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.1 - 12, $noreg, implicit $mxcsr + ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.0 + 8, $noreg, implicit $mxcsr + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.1 - 12, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.0 + 8, _, implicit $mxcsr + $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... diff --git a/llvm/test/CodeGen/MIR/X86/fastmath.mir b/llvm/test/CodeGen/MIR/X86/fastmath.mir index 0b5c7ff..5697d8b 100644 --- a/llvm/test/CodeGen/MIR/X86/fastmath.mir +++ b/llvm/test/CodeGen/MIR/X86/fastmath.mir @@ -10,24 +10,24 @@ body: | ; CHECK: %0:fr32 = COPY $xmm0 %0:fr32 = COPY $xmm0 - ; CHECK: %1:fr32 = nnan VMULSSrr %0, %0 - %1:fr32 = nnan VMULSSrr %0, %0 - ; CHECK: %2:fr32 = ninf VMULSSrr %1, %1 - %2:fr32 = ninf VMULSSrr %1, %1 - ; CHECK: %3:fr32 = nsz VMULSSrr %2, %2 - %3:fr32 = nsz VMULSSrr %2, %2 - ; CHECK: %4:fr32 = arcp VMULSSrr %3, %3 - %4:fr32 = arcp VMULSSrr %3, %3 - ; CHECK: %5:fr32 = contract VMULSSrr %4, %4 - %5:fr32 = contract VMULSSrr %4, %4 - ; CHECK: %6:fr32 = afn VMULSSrr %5, %5 - %6:fr32 = afn VMULSSrr %5, %5 - ; CHECK: %7:fr32 = reassoc VMULSSrr %6, %6 - %7:fr32 = reassoc VMULSSrr %6, %6 - ; CHECK: %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7 - %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7 - ; CHECK: %9:fr32 = contract afn reassoc VMULSSrr %8, %8 - %9:fr32 = contract afn reassoc VMULSSrr %8, %8 + ; CHECK: %1:fr32 = nnan VMULSSrr %0, %0, implicit $mxcsr + %1:fr32 = nnan VMULSSrr %0, %0, implicit $mxcsr + ; CHECK: %2:fr32 = ninf VMULSSrr %1, %1, implicit $mxcsr + %2:fr32 = ninf VMULSSrr %1, %1, implicit $mxcsr + ; CHECK: %3:fr32 = nsz VMULSSrr %2, %2, implicit $mxcsr + %3:fr32 = nsz VMULSSrr %2, %2, implicit $mxcsr + ; CHECK: %4:fr32 = arcp VMULSSrr %3, %3, implicit $mxcsr + %4:fr32 = arcp VMULSSrr %3, %3, implicit $mxcsr + ; CHECK: %5:fr32 = contract VMULSSrr %4, %4, implicit $mxcsr + %5:fr32 = contract VMULSSrr %4, %4, implicit $mxcsr + ; CHECK: %6:fr32 = afn VMULSSrr %5, %5, implicit $mxcsr + %6:fr32 = afn VMULSSrr %5, %5, implicit $mxcsr + ; CHECK: %7:fr32 = reassoc VMULSSrr %6, %6, implicit $mxcsr + %7:fr32 = reassoc VMULSSrr %6, %6, implicit $mxcsr + ; CHECK: %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7, implicit $mxcsr + %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7, implicit $mxcsr + ; CHECK: %9:fr32 = contract afn reassoc VMULSSrr %8, %8, implicit $mxcsr + %9:fr32 = contract afn reassoc VMULSSrr %8, %8, implicit $mxcsr ; CHECK: $xmm0 = COPY %9 $xmm0 = COPY %9 ; CHECK: RET 0, $xmm0 diff --git a/llvm/test/CodeGen/MIR/X86/memory-operands.mir b/llvm/test/CodeGen/MIR/X86/memory-operands.mir index 1be8bc1..848939c9 100644 --- a/llvm/test/CodeGen/MIR/X86/memory-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/memory-operands.mir @@ -336,10 +336,10 @@ body: | bb.0.entry: liveins: $xmm0 ; CHECK: name: constant_pool_psv - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool) - ; CHECK-NEXT: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool + 8) - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ :: (load 8 from constant-pool) - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ :: (load 8 from constant-pool + 8) + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load 8 from constant-pool) + ; CHECK-NEXT: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load 8 from constant-pool + 8) + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load 8 from constant-pool) + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load 8 from constant-pool + 8) RETQ $xmm0 ... --- diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index 97c6091..eaf68b9 100755 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -2314,38 +2314,38 @@ body: | $xmm0 = VMOVQI2PQIZrm $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VMOVZPQILo2PQIrr $xmm0 $xmm0 = VMOVZPQILo2PQIZrr $xmm0 - ; CHECK: VCOMISDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISDrr_Int $xmm0, $xmm1, implicit-def $eflags - VCOMISDZrr_Int $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VCOMISSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISSrr_Int $xmm0, $xmm1, implicit-def $eflags - VCOMISSZrr_Int $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISDrr_Int $xmm0, $xmm1, implicit-def $eflags - VUCOMISDZrr_Int $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISSrr_Int $xmm0, $xmm1, implicit-def $eflags - VUCOMISSZrr_Int $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VCOMISDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISDrr $xmm0, $xmm1, implicit-def $eflags - VCOMISDZrr $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VCOMISSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISSrr $xmm0, $xmm1, implicit-def $eflags - VCOMISSZrr $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISDrr $xmm0, $xmm1, implicit-def $eflags - VUCOMISDZrr $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISSrr $xmm0, $xmm1, implicit-def $eflags - VUCOMISSZrr $xmm0, $xmm1, implicit-def $eflags + ; CHECK: VCOMISDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISDZrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISSZrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISDZrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISSZrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr ; CHECK: VEXTRACTPSmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 1 VEXTRACTPSZmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 1 ; CHECK: $eax = VEXTRACTPSrr $xmm0, 1 @@ -4696,38 +4696,38 @@ body: | $xmm16 = VMOVQI2PQIZrm $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VMOVZPQILo2PQIZrr $xmm16 $xmm16 = VMOVZPQILo2PQIZrr $xmm16 - ; CHECK: VCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags - VCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags - VCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags - VUCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags - VUCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISDZrr $xmm16, $xmm1, implicit-def $eflags - VCOMISDZrr $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISSZrr $xmm16, $xmm1, implicit-def $eflags - VCOMISSZrr $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISDZrr $xmm16, $xmm1, implicit-def $eflags - VUCOMISDZrr $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags - VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags + ; CHECK: VCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISDZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr ; CHECK: $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15 $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15 ; CHECK: $xmm16 = VRNDSCALESDZr $xmm16, $xmm1, 15 diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll index 3a877bf..7d05600 100644 --- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh +; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh call void @bar1() call void @bar2() ret void