--- /dev/null
+//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUCombinerHelper.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+LLVM_READNONE
+static bool fnegFoldsIntoMI(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
+ case AMDGPU::G_FMUL:
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_FSIN:
+ case AMDGPU::G_FPEXT:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_FPTRUNC:
+ case AMDGPU::G_FRINT:
+ case AMDGPU::G_FNEARBYINT:
+ case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FCANONICALIZE:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ return true;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MI.getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sin:
+ case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fmed3:
+ case Intrinsic::amdgcn_fma_legacy:
+ return true;
+ default:
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+}
+
+/// \p returns true if the operation will definitely need to use a 64-bit
+/// encoding, and thus will use a VOP3 encoding regardless of the source
+/// modifiers.
+LLVM_READONLY
+static bool opMustUseVOP3Encoding(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return MI.getNumOperands() >
+ (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4 : 3) ||
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
+}
+
+// Most FP instructions support source modifiers.
+LLVM_READONLY
+static bool hasSourceMods(const MachineInstr &MI) {
+ if (!MI.memoperands().empty())
+ return false;
+
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::G_SELECT:
+ case AMDGPU::G_FDIV:
+ case AMDGPU::G_FREM:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_BITCAST:
+ case AMDGPU::G_ANYEXT:
+ case AMDGPU::G_BUILD_VECTOR:
+ case AMDGPU::G_BUILD_VECTOR_TRUNC:
+ case AMDGPU::G_PHI:
+ return false;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MI.getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_interp_mov:
+ case Intrinsic::amdgcn_interp_p1_f16:
+ case Intrinsic::amdgcn_interp_p2_f16:
+ case Intrinsic::amdgcn_div_scale:
+ return false;
+ default:
+ return true;
+ }
+ }
+ default:
+ return true;
+ }
+}
+
+static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
+ unsigned CostThreshold = 4) {
+ // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
+ // it is truly free to use a source modifier in all cases. If there are
+ // multiple users but for each one will necessitate using VOP3, there will be
+ // a code size increase. Try to avoid increasing code size unless we know it
+ // will save on the instruction count.
+ unsigned NumMayIncreaseSize = 0;
+ Register Dst = MI.getOperand(0).getReg();
+ for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
+ if (!hasSourceMods(Use))
+ return false;
+
+ if (!opMustUseVOP3Encoding(Use, MRI)) {
+ if (++NumMayIncreaseSize > CostThreshold)
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool mayIgnoreSignedZero(MachineInstr &MI) {
+ const TargetOptions &Options = MI.getMF()->getTarget().Options;
+ return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
+}
+
+static bool isInv2Pi(const APFloat &APF) {
+ static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
+ static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
+ static const APFloat KF64(APFloat::IEEEdouble(),
+ APInt(64, 0x3fc45f306dc9c882));
+
+ return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
+ APF.bitwiseIsEqual(KF64);
+}
+
+// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
+// additional cost to negate them.
+static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
+ MachineRegisterInfo &MRI) {
+ Optional<FPValueAndVReg> FPValReg;
+ if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
+ if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
+ return true;
+
+ const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
+ if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
+ return true;
+ }
+ return false;
+}
+
+static unsigned inverseMinMax(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::G_FMAXNUM:
+ return AMDGPU::G_FMINNUM;
+ case AMDGPU::G_FMINNUM:
+ return AMDGPU::G_FMAXNUM;
+ case AMDGPU::G_FMAXNUM_IEEE:
+ return AMDGPU::G_FMINNUM_IEEE;
+ case AMDGPU::G_FMINNUM_IEEE:
+ return AMDGPU::G_FMAXNUM_IEEE;
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ return AMDGPU::G_AMDGPU_FMIN_LEGACY;
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ return AMDGPU::G_AMDGPU_FMAX_LEGACY;
+ default:
+ llvm_unreachable("invalid min/max opcode");
+ }
+}
+
+bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
+ MachineInstr *&MatchInfo) {
+ Register Src = MI.getOperand(1).getReg();
+ MatchInfo = MRI.getVRegDef(Src);
+
+ // If the input has multiple uses and we can either fold the negate down, or
+ // the other uses cannot, give up. This both prevents unprofitable
+ // transformations and infinite loops: we won't repeatedly try to fold around
+ // a negate that has no 'good' form.
+ if (MRI.hasOneNonDBGUse(Src)) {
+ if (allUsesHaveSourceMods(MI, MRI, 0))
+ return false;
+ } else {
+ if (fnegFoldsIntoMI(*MatchInfo) &&
+ (allUsesHaveSourceMods(MI, MRI) ||
+ !allUsesHaveSourceMods(*MatchInfo, MRI)))
+ return false;
+ }
+
+ switch (MatchInfo->getOpcode()) {
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ // 0 doesn't have a negated inline immediate.
+ return !isConstantCostlierToNegate(*MatchInfo,
+ MatchInfo->getOperand(2).getReg(), MRI);
+ case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
+ return mayIgnoreSignedZero(*MatchInfo);
+ case AMDGPU::G_FMUL:
+ case AMDGPU::G_FPEXT:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_FPTRUNC:
+ case AMDGPU::G_FRINT:
+ case AMDGPU::G_FNEARBYINT:
+ case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FSIN:
+ case AMDGPU::G_FCANONICALIZE:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ return true;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sin:
+ case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fmed3:
+ return true;
+ case Intrinsic::amdgcn_fma_legacy:
+ return mayIgnoreSignedZero(*MatchInfo);
+ default:
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+}
+
+void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
+ MachineInstr *&MatchInfo) {
+ // Transform:
+ // %A = inst %Op1, ...
+ // %B = fneg %A
+ //
+ // into:
+ //
+ // (if %A has one use, specifically fneg above)
+ // %B = inst (maybe fneg %Op1), ...
+ //
+ // (if %A has multiple uses)
+ // %B = inst (maybe fneg %Op1), ...
+ // %A = fneg %B
+
+ // Replace register in operand with a register holding negated value.
+ auto NegateOperand = [&](MachineOperand &Op) {
+ Register Reg = Op.getReg();
+ if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
+ Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
+ replaceRegOpWith(MRI, Op, Reg);
+ };
+
+ // Replace either register in operands with a register holding negated value.
+ auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
+ Register XReg = X.getReg();
+ Register YReg = Y.getReg();
+ if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
+ replaceRegOpWith(MRI, X, XReg);
+ else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
+ replaceRegOpWith(MRI, Y, YReg);
+ else {
+ YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
+ replaceRegOpWith(MRI, Y, YReg);
+ }
+ };
+
+ Builder.setInstrAndDebugLoc(*MatchInfo);
+
+ // Negate appropriate operands so that resulting value of MatchInfo is
+ // negated.
+ switch (MatchInfo->getOpcode()) {
+ case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
+ NegateOperand(MatchInfo->getOperand(1));
+ NegateOperand(MatchInfo->getOperand(2));
+ break;
+ case AMDGPU::G_FMUL:
+ NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
+ break;
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
+ NegateOperand(MatchInfo->getOperand(1));
+ NegateOperand(MatchInfo->getOperand(2));
+ unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
+ replaceOpcodeWith(*MatchInfo, Opposite);
+ break;
+ }
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
+ NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
+ NegateOperand(MatchInfo->getOperand(3));
+ break;
+ case AMDGPU::G_FPEXT:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_FRINT:
+ case AMDGPU::G_FNEARBYINT:
+ case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FSIN:
+ case AMDGPU::G_FCANONICALIZE:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ case AMDGPU::G_FPTRUNC:
+ NegateOperand(MatchInfo->getOperand(1));
+ break;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sin:
+ NegateOperand(MatchInfo->getOperand(2));
+ break;
+ case Intrinsic::amdgcn_fmul_legacy:
+ NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
+ break;
+ case Intrinsic::amdgcn_fmed3:
+ NegateOperand(MatchInfo->getOperand(2));
+ NegateOperand(MatchInfo->getOperand(3));
+ NegateOperand(MatchInfo->getOperand(4));
+ break;
+ case Intrinsic::amdgcn_fma_legacy:
+ NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
+ NegateOperand(MatchInfo->getOperand(4));
+ break;
+ default:
+ llvm_unreachable("folding fneg not supported for this intrinsic");
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("folding fneg not supported for this instruction");
+ }
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
+
+ if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
+ // MatchInfo now has negated value so use that instead of old Dst.
+ replaceRegWith(MRI, Dst, MatchInfoDst);
+ } else {
+ // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
+ // but replaceRegWith will replace defs as well. It is easier to replace one
+ // def with a new register.
+ LLT Type = MRI.getType(Dst);
+ Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
+ replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
+
+ // MatchInfo now has negated value so use that instead of old Dst.
+ replaceRegWith(MRI, Dst, NegatedMatchInfo);
+
+ // Recreate non negated value for other uses of old MatchInfoDst
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
+ }
+
+ MI.eraseFromParent();
+ return;
+}
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: test_fminnum
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fminnum
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[FNEG]], [[FNEG1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_FMINNUM %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fmaxnum
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fmaxnum
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[FNEG]], [[FNEG1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_FMAXNUM %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fminnum_ieee
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fminnum_ieee
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FNEG]], [[FNEG1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_FMINNUM_IEEE %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fmaxnum_ieee
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fmaxnum_ieee
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FNEG]], [[FNEG1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_FMAXNUM_IEEE %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_amdgpu_fmin_legacy
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_amdgpu_fmin_legacy
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[AMDGPU_FMAX_LEGACY:%[0-9]+]]:_(s32) = G_AMDGPU_FMAX_LEGACY [[FNEG]], [[FNEG1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMAX_LEGACY]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_AMDGPU_FMIN_LEGACY %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_amdgpu_fmax_legacy
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_amdgpu_fmax_legacy
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[AMDGPU_FMIN_LEGACY:%[0-9]+]]:_(s32) = G_AMDGPU_FMIN_LEGACY [[FNEG]], [[FNEG1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMIN_LEGACY]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_AMDGPU_FMAX_LEGACY %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fadd
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fadd
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[FNEG]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = nsz G_FADD %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fsub
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fsub
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY1]], [[COPY]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = nsz G_FSUB %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fma
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_fma
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]]
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = nsz G_FMA [[COPY]], [[FNEG]], [[FNEG1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = nsz G_FMA %0, %1, %2
+ %4:_(s32) = G_FNEG %3
+ $vgpr0 = COPY %4(s32)
+
+...
+---
+name: test_fmad
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_fmad
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]]
+ ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(s32) = nsz G_FMAD [[COPY]], [[FNEG]], [[FNEG1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMAD]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = nsz G_FMAD %0, %1, %2
+ %4:_(s32) = G_FNEG %3
+ $vgpr0 = COPY %4(s32)
+
+...
+---
+name: test_fmul
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fmul
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[FNEG]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_FMUL %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fpext
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_fpext
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]]
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16)
+ ; CHECK-NEXT: $vgpr0 = COPY [[FPEXT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s16) = G_TRUNC %0(s32)
+ %2:_(s32) = G_FPEXT %1(s16)
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_intrinsic_trunc
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_intrinsic_trunc
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FNEG]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_TRUNC]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_INTRINSIC_TRUNC %0
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_frint
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_frint
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FNEG]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FRINT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_FRINT %0
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_fnearbyint
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_fnearbyint
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FNEARBYINT:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FNEG]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FNEARBYINT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_FNEARBYINT %0
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_intrinsic_round
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_intrinsic_round
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FNEG]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_ROUND]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_INTRINSIC_ROUND %0
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_intrinsic_roundeven
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_intrinsic_roundeven
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FNEG]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_ROUNDEVEN]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_INTRINSIC_ROUNDEVEN %0
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_fsin
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_fsin
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FSIN:%[0-9]+]]:_(s32) = G_FSIN [[FNEG]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FSIN]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_FSIN %0
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_fcanonicalize
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_fcanonicalize
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FNEG]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_FCANONICALIZE %0
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_amdgcn_rcp_iflag
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_amdgcn_rcp_iflag
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FNEG]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_RCP_IFLAG]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_AMDGPU_RCP_IFLAG %0
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_fptrunc
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: test_fptrunc
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[FNEG]](s64)
+ ; CHECK-NEXT: $vgpr0 = COPY [[FPTRUNC]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_FPTRUNC %0:_(s64)
+ %2:_(s32) = G_FNEG %1:_
+ $vgpr0 = COPY %2:_(s32)
+
+...
+---
+name: test_amdgcn_rcp
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_amdgcn_rcp
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0(s32)
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_amdgcn_rcp_legacy
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_amdgcn_rcp_legacy
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), [[FNEG]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0(s32)
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_amdgcn_sin
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_amdgcn_sin
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FNEG]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0(s32)
+ %2:_(s32) = G_FNEG %1
+ $vgpr0 = COPY %2(s32)
+
+...
+---
+name: test_fmul_legacy
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fmul_legacy
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY]](s32), [[FNEG]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0(s32), %1(s32)
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fmed3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_fmed3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]]
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0(s32), %1(s32), %2(s32)
+ %4:_(s32) = G_FNEG %3
+ $vgpr0 = COPY %4(s32)
+
+...
+---
+name: test_amdgcn_fma_legacy
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_amdgcn_fma_legacy
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]]
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), [[COPY]](s32), [[FNEG]](s32), [[FNEG1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), %0(s32), %1(s32), %2(s32)
+ %4:_(s32) = G_FNEG %3
+ $vgpr0 = COPY %4(s32)
+
+...
+
+# Don't fold fneg for fadd, fsub, fma, fmad or fma_legacy without nsz
+---
+name: test_fadd_sz
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fadd_sz
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FADD]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_FADD %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fsub_sz
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fsub_sz
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FSUB]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_FSUB %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %3(s32)
+
+...
+---
+name: test_fma_sz
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_fma_sz
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMA]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = G_FMA %0, %1, %2
+ %4:_(s32) = G_FNEG %3
+ $vgpr0 = COPY %4(s32)
+
+...
+---
+name: test_fmad_sz
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_fmad_sz
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMAD]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = G_FMAD %0, %1, %2
+ %4:_(s32) = G_FNEG %3
+ $vgpr0 = COPY %4(s32)
+
+...
+---
+name: test_amdgcn_fma_legacy_sz
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_amdgcn_fma_legacy_sz
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), %0(s32), %1(s32), %2(s32)
+ %4:_(s32) = G_FNEG %3
+ $vgpr0 = COPY %4(s32)
+
+...
+
+# Don't negate 0 for minnum, maxnum
+---
+name: test_fminnum_zero
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: test_fminnum_zero
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[C]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMINNUM]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_FCONSTANT float 0.000000e+00
+ %2:_(s32) = G_FMINNUM %0:_, %1:_
+ %3:_(s32) = G_FNEG %2:_
+ $vgpr0 = COPY %3:_(s32)
+
+...
+
+# On VI and above don't negate 1.0 / (0.5 * pi)
+---
+name: test_fminnum_inv2pi_half
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: test_fminnum_inv2pi_half
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
+ ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s16) = G_FMINNUM [[TRUNC]], [[C]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FMINNUM]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s16) = G_TRUNC %0:_(s32)
+ %2:_(s16) = G_FCONSTANT half 0xH3118
+ %3:_(s16) = G_FMINNUM %1:_, %2:_
+ %4:_(s16) = G_FNEG %3:_
+ %5:_(s32) = G_ANYEXT %4:_(s16)
+ $vgpr0 = COPY %5:_(s32)
+
+...
+---
+name: test_fminnum_inv2pi_float
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: test_fminnum_inv2pi_float
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000
+ ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[C]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMINNUM]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_FCONSTANT float 0x3FC45F3060000000
+ %2:_(s32) = G_FMINNUM %0:_, %1:_
+ %3:_(s32) = G_FNEG %2:_
+ $vgpr0 = COPY %3:_(s32)
+
+...
+---
+name: test_fminnum_inv2pi_double
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: test_fminnum_inv2pi_double
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C882
+ ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s64) = G_FMINNUM [[COPY]], [[C]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[FMINNUM]]
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C882
+ %2:_(s64) = G_FMINNUM %0:_, %1:_
+ %3:_(s64) = G_FNEG %2:_
+ $vgpr0_vgpr1 = COPY %3:_(s64)
+
+...
+
+#Don't fold when where instruction count will not decrease.
+---
+name: test_use_both
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_use_both
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]]
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[FNEG]](s32)
+ ; CHECK-NEXT: $vgpr2 = COPY [[FMUL1]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = G_FMUL %0, %1
+ %4:_(s32) = G_FNEG %3
+ %5:_(s32) = G_FMUL %4, %2
+ $vgpr0 = COPY %3:_(s32)
+ $vgpr1 = COPY %4:_(s32)
+ $vgpr2 = COPY %5:_(s32)
+
+...
+
+#Don't fold when where instruction count will not decrease.
+---
+name: test_use_both2
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_use_both2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[FNEG]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_FMUL %0, %1
+ %3:_(s32) = G_FNEG %2
+ $vgpr0 = COPY %2:_(s32)
+ $vgpr1 = COPY %3:_(s32)
+
+...
+
+---
+name: multiple_uses_of_fneg
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; CHECK-LABEL: name: multiple_uses_of_fneg
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+ ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[FNEG]]
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[COPY2]]
+ ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[FMUL1]](s32)
+ ; CHECK-NEXT: $vgpr2 = COPY [[FMUL2]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+
+ %4:_(s32) = G_FMUL %0, %1
+ %5:_(s32) = G_FNEG %4
+ %6:_(s32) = G_FMUL %5, %2
+ %7:_(s32) = G_FMUL %5, %3
+
+ $vgpr0 = COPY %5:_(s32)
+ $vgpr1 = COPY %6:_(s32)
+ $vgpr2 = COPY %7:_(s32)
+
+...