From f7658241cb27491b4160a1f7060ef883bc535d09 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 4 Aug 2020 10:25:16 +0100 Subject: [PATCH] [AArch64] Consider instruction-level contract FMFs in combiner patterns. Currently, instruction level fast math flags are not considered when generating patterns for the machine combiner. This currently leads to some missed opportunities to generate FMAs in combination with `#pragma clang fp contract (fast)`. For example, when building the example below with -O3 for AArch64, no FMADD is generated. If built with -O2 and the DAGCombiner is used instead of the MachineCombiner for FMAs, an FMADD is generated. With this patch, the same code is generated in both cases. float madd_contract(float a, float b, float c) { #pragma clang fp contract (fast) return (a * b) + c; } Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D84930 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 10 +++++-- .../CodeGen/AArch64/machine-combiner-instr-fmf.mir | 32 +++++++++------------- llvm/test/CodeGen/AArch64/neon-fma-FMF.ll | 1 + 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 323ac76..b6fda6b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3861,7 +3861,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) { return false; } -// FP Opcodes that can be combined with a FMUL +// FP Opcodes that can be combined with a FMUL. static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { default: @@ -3883,8 +3883,12 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; - return (Options.UnsafeFPMath || - Options.AllowFPOpFusion == FPOpFusion::Fast); + // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by + // the target options or if FADD/FSUB has the contract fast-math flag. + return Options.UnsafeFPMath || + Options.AllowFPOpFusion == FPOpFusion::Fast || + Inst.getFlag(MachineInstr::FmContract); + return true; } return false; } diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir index 23ed96a..992e636 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir @@ -6,8 +6,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_fast alignment: 4 @@ -46,8 +45,7 @@ body: | # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = contract FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = contract FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_contract @@ -81,7 +79,7 @@ body: | ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do not create FMADD, because we don't have the contract flag on the FADD. # CHECK-LABEL: name: scalar_fmadd_contract_op0 # CHECK: [[C:%.*]]:fpr32 = COPY $s2 @@ -121,14 +119,13 @@ body: | ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do create FMADD, because we have the contract flag on the FADD. # # CHECK-LABEL: name: scalar_fmadd_contract_op1 # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = contract FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_contract_op1 @@ -203,14 +200,13 @@ body: | ... -# Can create FMADD, because both the fmul and fadd have all fast-math flags. +# Can create FMLA, because both the fmul and fadd have all fast-math flags. # # CHECK-LABEL: name: vector_fmadd_fast # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = nnan ninf nsz arcp contract afn reassoc FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = nnan ninf nsz arcp contract afn reassoc FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_fast alignment: 4 @@ -243,14 +239,13 @@ body: | ... -# Can create FMADD, because both the fmul and fadd have the contract fast-math flag. +# Can create FMLA, because both the fmul and fadd have the contract fast-math flag. # # CHECK-LABEL: name: vector_fmadd_contract # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = contract FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = contract FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_contract alignment: 4 @@ -283,7 +278,7 @@ body: | ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do not create FMLA, because we don't have the contract flag on the FADD. # # CHECK-LABEL: name: vector_fmadd_contract_op0 # CHECK: [[C:%.*]]:fpr128 = COPY $q2 @@ -323,14 +318,13 @@ body: | ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do create FMLA, because we have the contract flag on the FADD. # # CHECK-LABEL: name: vector_fmadd_contract_op1 # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = contract FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_contract_op1 @@ -364,7 +358,7 @@ body: | ... -# Do not create FMADD, as nsz flag does not allow it. +# Do not create FMLA, as nsz flag does not allow it. # # CHECK-LABEL: name: vector_fmadd_nsz # CHECK: [[C:%.*]]:fpr128 = COPY $q2 diff --git a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll index 893d153..0eb1733 100644 --- a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll +++ b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -O3 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; CHECK-LABEL: fma_1: -- 2.7.4