From 3041434450e6c9cbc3476289f7c862f346126296 Mon Sep 17 00:00:00 2001 From: "Liu, Chen3" Date: Fri, 6 Dec 2019 09:44:33 +0800 Subject: [PATCH] Add strict fp support for instructions fadd/fsub/fmul/fdiv Differential Revision: https://reviews.llvm.org/D68757 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 4 +-- llvm/lib/Target/X86/X86ISelLowering.cpp | 22 +++++++++++---- llvm/lib/Target/X86/X86InstrAVX512.td | 16 +++++------ llvm/lib/Target/X86/X86InstrSSE.td | 32 +++++++++++----------- llvm/test/CodeGen/X86/vec-strict-128.ll | 12 ++++---- llvm/test/CodeGen/X86/vec-strict-256.ll | 8 +++--- llvm/test/CodeGen/X86/vec-strict-512.ll | 4 +-- .../X86/vector-constrained-fp-intrinsics.ll | 26 +++++++++--------- 8 files changed, 67 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 3c33c4b..b955f63 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5219,10 +5219,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) { SelectCode(Res.getNode()); return; } - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: case ISD::STRICT_FP_ROUND: { - // X87 instructions has enabled these strict fp operation. + // X87 instructions has enabled this strict fp operation. bool UsingFp80 = Node->getSimpleValueType(0) == MVT::f80 || Node->getOperand(1).getSimpleValueType() == MVT::f80; if (UsingFp80 || (!Subtarget->hasSSE1() && Subtarget->hasX87())) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a840ca4..a50d30f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -593,8 +593,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSINCOS, VT, Expand); // Handle constrained floating-point operations of scalar. - setOperationAction(ISD::STRICT_FMUL , VT, Legal); - setOperationAction(ISD::STRICT_FDIV , VT, Legal); setOperationAction(ISD::STRICT_FSQRT , VT, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten @@ -623,13 +621,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } else // SSE immediates. addLegalFPImmediate(APFloat(+0.0)); // xorpd } - - // FIXME: Mark these legal to prevent them from being expanded to a - // libcall in LegalizeDAG. They'll be mutated by X86ISelDAGToDAG::Select. + // Handle constrained floating-point operations of scalar. setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); // We don't support FMA. setOperationAction(ISD::FMA, MVT::f64, Expand); @@ -864,6 +864,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { @@ -1160,6 +1164,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal); if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); @@ -1429,6 +1437,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 5917894..3f782f7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5397,13 +5397,13 @@ multiclass avx512_binop_s_sae opc, string OpcodeStr, SDNode OpNode, NAME#"SD">, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } -defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds, +defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, SchedWriteFAddSizes, 1>; -defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds, +defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, SchedWriteFMulSizes, 1>; -defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds, +defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, SchedWriteFAddSizes, 0>; -defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds, +defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, SchedWriteFDivSizes, 0>; defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, SchedWriteFCmpSizes, 0>; @@ -5555,16 +5555,16 @@ multiclass avx512_fp_binop_p_sae opc, string OpcodeStr, SDNode OpNodeRnd EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } -defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, +defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512, SchedWriteFAddSizes, 1>, avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; -defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, +defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512, SchedWriteFMulSizes, 1>, avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512, SchedWriteFAddSizes>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; -defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, +defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512, SchedWriteFDivSizes>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index b8e80bc..adc616e 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2667,18 +2667,18 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in { } // Binary Arithmetic instructions -defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>, - basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>, +defm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>, basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>; -defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>, - basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>, +defm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>, + basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>, basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>; let isCommutable = 0 in { - defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>, - basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>, + defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>; - defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>, - basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>, + defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, + basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>; defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, @@ -2773,15 +2773,15 @@ multiclass scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll index d8de98c..a884303 100644 --- a/llvm/test/CodeGen/X86/vec-strict-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-128.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll index 5b44c6c..8828deb 100644 --- a/llvm/test/CodeGen/X86/vec-strict-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-256.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) declare <8 x float> @llvm.experimental.constrained.fadd.v8f32(<8 x float>, <8 x float>, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll index aa23735..8130884 100644 --- a/llvm/test/CodeGen/X86/vec-strict-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-512.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s declare <8 x double> @llvm.experimental.constrained.fadd.v8f64(<8 x double>, <8 x double>, metadata, metadata) declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 8905eec..e9b5e82 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -115,10 +115,10 @@ define <4 x double> @constrained_vector_fdiv_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fdiv_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1] -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] -; CHECK-NEXT: divpd %xmm2, %xmm0 ; CHECK-NEXT: movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0] ; CHECK-NEXT: divpd %xmm2, %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] +; CHECK-NEXT: divpd %xmm2, %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fdiv_v4f64: @@ -507,10 +507,10 @@ entry: define <4 x double> @constrained_vector_fmul_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fmul_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [2.0E+0,3.0E+0] -; CHECK-NEXT: mulpd %xmm1, %xmm0 -; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.0E+0,5.0E+0] +; CHECK-NEXT: mulpd %xmm0, %xmm1 +; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fmul_v4f64: @@ -644,10 +644,10 @@ entry: define <4 x double> @constrained_vector_fadd_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0000000000000001E-1] -; CHECK-NEXT: addpd %xmm1, %xmm0 -; CHECK-NEXT: addpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2.0E+0,2.0000000000000001E-1] +; CHECK-NEXT: addpd %xmm0, %xmm1 +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fadd_v4f64: @@ -784,10 +784,10 @@ entry: define <4 x double> @constrained_vector_fsub_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fsub_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: movapd %xmm0, %xmm1 ; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fsub_v4f64: -- 2.7.4