From 3546c1603a3e8c283dce5024d2a0b6614f1f3fbc Mon Sep 17 00:00:00 2001 From: Aleksandar Beserminji Date: Fri, 27 Apr 2018 13:30:27 +0000 Subject: [PATCH] [mips] Fix how compiler fuse instructions to fmadd/fmsub This patch makes compiler does not fuse fmul and fadd/fsub into fmadd/fmsub by default. Instead, -fp-contract=fast option can be used when such behavior is desired. Differential Revision: https://reviews.llvm.org/D46057 llvm-svn: 331033 --- llvm/lib/Target/Mips/MipsISelLowering.cpp | 1 + llvm/lib/Target/Mips/MipsISelLowering.h | 3 ++ llvm/lib/Target/Mips/MipsInstrInfo.td | 7 ++++ llvm/lib/Target/Mips/MipsMSAInstrInfo.td | 22 +++++++++--- llvm/lib/Target/Mips/MipsSEISelLowering.cpp | 6 ++-- llvm/test/CodeGen/Mips/fp-contract.ll | 38 ++++++++++++++++++++ llvm/test/CodeGen/Mips/msa/arithmetic_float.ll | 50 +++++++++++++------------- 7 files changed, 94 insertions(+), 33 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/fp-contract.ll diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 1344992..74797e6 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -200,6 +200,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::Ret: return "MipsISD::Ret"; case MipsISD::ERet: return "MipsISD::ERet"; case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN"; + case MipsISD::FMS: return "MipsISD::FMS"; case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; case MipsISD::FPCmp: return "MipsISD::FPCmp"; case MipsISD::FSELECT: return "MipsISD::FSELECT"; diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 87422a9c..8d483cc 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -90,6 +90,9 @@ class TargetRegisterClass; // Thread Pointer ThreadPointer, + // Vector Floating Point Multiply and Subtract + FMS, + // Floating Point Branch Conditional FPBrcond, diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td index b71068d..5290b56 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -252,6 +252,9 @@ def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC">; def HasVirt : Predicate<"Subtarget->hasVirt()">, AssemblerPredicate<"FeatureVirt">; +// TODO: Add support for FPOpFusion::Standard +def AllowFPOpFusion : Predicate<"TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast">; //===----------------------------------------------------------------------===// // Mips GPR size adjectives. // They are mutually exclusive. @@ -490,6 +493,10 @@ class ABI_NOT_N64 { list AdditionalPredicates = [IsNotN64]; } +class FPOP_FUSION_FAST { + list AdditionalPredicates = [AllowFPOpFusion]; +} + //===----------------------------------------------------------------------===// class MipsPat : Pat, PredicateControl { diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td index 7c56bc7..7682223 100644 --- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -46,6 +46,7 @@ def MipsILVR : SDNode<"MipsISD::ILVR", SDT_ILV>; def MipsPCKEV : SDNode<"MipsISD::PCKEV", SDT_ILV>; def MipsPCKOD : SDNode<"MipsISD::PCKOD", SDT_ILV>; def MipsINSVE : SDNode<"MipsISD::INSVE", SDT_INSVE>; +def MipsFMS : SDNode<"MipsISD::FMS", SDTFPTernaryOp>; def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; @@ -369,9 +370,6 @@ def vbset_d : PatFrag<(ops node:$ws, node:$wt), (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1), node:$wt))>; -def fms : PatFrag<(ops node:$wd, node:$ws, node:$wt), - (fsub node:$wd, (fmul node:$ws, node:$wt))>; - def muladd : PatFrag<(ops node:$wd, node:$ws, node:$wt), (add node:$wd, (mul node:$ws, node:$wt))>; @@ -2091,8 +2089,8 @@ class FMIN_A_W_DESC : MSA_3RF_DESC_BASE<"fmin_a.w", int_mips_fmin_a_w, class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d, MSA128DOpnd>; -class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", fms, MSA128WOpnd>; -class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", fms, MSA128DOpnd>; +class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", MipsFMS, MSA128WOpnd>; +class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", MipsFMS, MSA128DOpnd>; class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", fmul, MSA128WOpnd>; class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", fmul, MSA128DOpnd>; @@ -3135,6 +3133,20 @@ def FTRUNC_S_D : FTRUNC_S_D_ENC, FTRUNC_S_D_DESC; def FTRUNC_U_W : FTRUNC_U_W_ENC, FTRUNC_U_W_DESC; def FTRUNC_U_D : FTRUNC_U_D_ENC, FTRUNC_U_D_DESC; +def : MipsPat<(fsub MSA128WOpnd:$wd, (fmul MSA128WOpnd:$ws, MSA128WOpnd:$wt)), + (FMSUB_W MSA128WOpnd:$wd, MSA128WOpnd:$ws, MSA128WOpnd:$wt)>, + FPOP_FUSION_FAST; +def : MipsPat<(fsub MSA128DOpnd:$wd, (fmul MSA128DOpnd:$ws, MSA128DOpnd:$wt)), + (FMSUB_D MSA128DOpnd:$wd, MSA128DOpnd:$ws, MSA128DOpnd:$wt)>, + FPOP_FUSION_FAST; + +def : MipsPat<(fadd MSA128WOpnd:$wd, (fmul MSA128WOpnd:$ws, MSA128WOpnd:$wt)), + (FMADD_W MSA128WOpnd:$wd, MSA128WOpnd:$ws, MSA128WOpnd:$wt)>, + FPOP_FUSION_FAST; +def : MipsPat<(fadd MSA128DOpnd:$wd, (fmul MSA128DOpnd:$ws, MSA128DOpnd:$wt)), + (FMADD_D MSA128DOpnd:$wd, MSA128DOpnd:$ws, MSA128DOpnd:$wt)>, + FPOP_FUSION_FAST; + def HADD_S_H : HADD_S_H_ENC, HADD_S_H_DESC; def HADD_S_W : HADD_S_W_ENC, HADD_S_W_DESC; def HADD_S_D : HADD_S_D_ENC, HADD_S_D_DESC; diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index bfc6820..885c24f 100644 --- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1858,10 +1858,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fmsub_w: case Intrinsic::mips_fmsub_d: { // TODO: If intrinsics have fast-math-flags, propagate them. - EVT ResTy = Op->getValueType(0); - return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), - DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, - Op->getOperand(2), Op->getOperand(3))); + return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); } case Intrinsic::mips_frint_w: case Intrinsic::mips_frint_d: diff --git a/llvm/test/CodeGen/Mips/fp-contract.ll b/llvm/test/CodeGen/Mips/fp-contract.ll new file mode 100644 index 0000000..600b7c0 --- /dev/null +++ b/llvm/test/CodeGen/Mips/fp-contract.ll @@ -0,0 +1,38 @@ +; Test that the compiled does not fuse fmul and fadd into fmadd when no -fp-contract=fast +; option is set (the same applies for fmul, fsub and fmsub). + +; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=CHECK-CONTRACT-OFF +; RUN: llc -march=mipsel -mattr=+msa,+fp64 -fp-contract=off < %s | FileCheck %s --check-prefixes=CHECK-CONTRACT-OFF +; RUN: llc -march=mips -mattr=+msa,+fp64 -fp-contract=fast < %s | FileCheck %s --check-prefixes=CHECK-CONTRACT-FAST + +declare <4 x float> @llvm.mips.fmul.w(<4 x float>, <4 x float>) +declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) +declare <4 x float> @llvm.mips.fsub.w(<4 x float>, <4 x float>) + +define void @foo(<4 x float>* %agg.result, <4 x float>* %acc, <4 x float>* %a, <4 x float>* %b) { +entry: + %0 = load <4 x float>, <4 x float>* %a, align 16 + %1 = load <4 x float>, <4 x float>* %b, align 16 + %2 = call <4 x float> @llvm.mips.fmul.w(<4 x float> %0, <4 x float> %1) + %3 = load <4 x float>, <4 x float>* %acc, align 16 + %4 = call <4 x float> @llvm.mips.fadd.w(<4 x float> %3, <4 x float> %2) + store <4 x float> %4, <4 x float>* %agg.result, align 16 + ret void + ; CHECK-CONTRACT-OFF: fmul.w + ; CHECK-CONTRACT-OFF: fadd.w + ; CHECK-CONTRACT-FAST: fmadd.w +} + +define void @boo(<4 x float>* %agg.result, <4 x float>* %acc, <4 x float>* %a, <4 x float>* %b) { +entry: + %0 = load <4 x float>, <4 x float>* %a, align 16 + %1 = load <4 x float>, <4 x float>* %b, align 16 + %2 = call <4 x float> @llvm.mips.fmul.w(<4 x float> %0, <4 x float> %1) + %3 = load <4 x float>, <4 x float>* %acc, align 16 + %4 = call <4 x float> @llvm.mips.fsub.w(<4 x float> %3, <4 x float> %2) + store <4 x float> %4, <4 x float>* %agg.result, align 16 + ret void + ; CHECK-CONTRACT-OFF: fmul.w + ; CHECK-CONTRACT-OFF: fsub.w + ; CHECK-CONTRACT-FAST: fmsub.w +} diff --git a/llvm/test/CodeGen/Mips/msa/arithmetic_float.ll b/llvm/test/CodeGen/Mips/msa/arithmetic_float.ll index d2ead53..b8d9f30 100644 --- a/llvm/test/CodeGen/Mips/msa/arithmetic_float.ll +++ b/llvm/test/CodeGen/Mips/msa/arithmetic_float.ll @@ -137,44 +137,46 @@ define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, ; CHECK: .size fma_v2f64 } -define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, +define void @fmlu_fsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, <4 x float>* %c) nounwind { - ; CHECK: fmsub_v4f32: + ; CHECK: fmlu_fsub_v4f32: - %1 = load <4 x float>, <4 x float>* %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) - %2 = load <4 x float>, <4 x float>* %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) - %3 = load <4 x float>, <4 x float>* %c - ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) - %4 = fmul <4 x float> %2, %3 - %5 = fsub <4 x float> %1, %4 - ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]] + %1 = load <4 x float>, <4 x float>* %b + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($6) + %2 = load <4 x float>, <4 x float>* %c + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($7) + %3 = fmul <4 x float> %1, %2 + ; CHECK-DAG: fmul.w [[R2]], [[R1]], [[R2]] + %4 = load <4 x float>, <4 x float>* %a + ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($5) + %5 = fsub <4 x float> %4, %3 + ; CHECK-DAG: fsub.w [[R2]], [[R3]], [[R2]] store <4 x float> %5, <4 x float>* %d - ; CHECK-DAG: st.w [[R1]], 0($4) + ; CHECK-DAG: st.w [[R2]], 0($4) ret void - ; CHECK: .size fmsub_v4f32 + ; CHECK: .size fmlu_fsub_v4f32 } -define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, +define void @fmul_fsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, <2 x double>* %c) nounwind { - ; CHECK: fmsub_v2f64: + ; CHECK: fmul_fsub_v2f64: - %1 = load <2 x double>, <2 x double>* %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) - %2 = load <2 x double>, <2 x double>* %b + %1 = load <2 x double>, <2 x double>* %b + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($7) + %2 = load <2 x double>, <2 x double>* %c ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) - %3 = load <2 x double>, <2 x double>* %c - ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) - %4 = fmul <2 x double> %2, %3 - %5 = fsub <2 x double> %1, %4 - ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]] + %3 = fmul <2 x double> %1, %2 + ; CHECK-DAG: fmul.d [[R1]], [[R2]], [[R1]] + %4 = load <2 x double>, <2 x double>* %a + ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($5) + %5 = fsub <2 x double> %4, %3 + ; CHECK-DAG: fsub.d [[R1]], [[R3]], [[R1]] store <2 x double> %5, <2 x double>* %d ; CHECK-DAG: st.d [[R1]], 0($4) ret void - ; CHECK: .size fmsub_v2f64 + ; CHECK: .size fmul_fsub_v2f64 } define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { -- 2.7.4