llvm.amdgcn.exp2 Provides direct access to v_exp_f32 and v_exp_f16
(on targets with half support). Performs exp2 function.
+ :ref:`llvm.frexp <int_frexp>` Implemented for half, float and double.
+
:ref:`llvm.log2 <int_log2>` Implemented for float and half (and vectors of float or
half). Not implemented for double. Hardware provides
1ULP accuracy for float, and 0.51ULP for half. Float
inputs. Backend will optimize out denormal scaling if
marked with the :ref:`afn <fastmath_afn>` flag.
- :ref:`llvm.frexp <int_frexp>` Implemented for half, float and double.
+ :ref:`llvm.log <int_log>` Implemented for float and half (and vectors).
+
+ :ref:`llvm.log10 <int_log10>` Implemented for float and half (and vectors).
========================================= ==========================================================
If the argument is an infinity, returns an infinity with the same sign
and an unspecified exponent.
+.. _int_log:
+
'``llvm.log.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^
When specified with the fast-math-flag 'afn', the result may be approximated
using a less accurate calculation.
+.. _int_log10:
+
'``llvm.log10.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* Added llvm.amdgcn.exp2.f32 intrinsic. This provides direct access to
v_exp_f32.
-* llvm.log2.f32 is now lowered accurately. Use llvm.amdgcn.log.f32 to
- access the old behavior.
+* llvm.log2.f32, llvm.log10.f32, and llvm.log.f32 are now lowered
+ accurately. Use llvm.amdgcn.log.f32 to access the old behavior for
+ llvm.log2.f32.
Changes to the ARM Backend
--------------------------
setOperationAction(ISD::FLOG2, MVT::f16, Custom);
}
+ setOperationAction({ISD::FLOG10, ISD::FLOG}, MVT::f16, Custom);
+
// FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
// scalarization code. Can be removed when IS_FPCLASS expand isn't called by
// default unless marked custom/legal.
case ISD::FLOG2:
return LowerFLOG2(Op, DAG);
case ISD::FLOG:
- return LowerFLOG(Op, DAG, numbers::ln2);
case ISD::FLOG10:
- return LowerFLOG(Op, DAG, numbers::ln2 / numbers::ln10);
+ return LowerFLOGCommon(Op, DAG);
case ISD::FEXP:
return lowerFEXP(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
if (SDValue Lowered = LowerFLOG2(SDValue(N, 0), DAG))
Results.push_back(Lowered);
return;
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ if (SDValue Lowered = LowerFLOGCommon(SDValue(N, 0), DAG))
+ Results.push_back(Lowered);
+ return;
default:
return;
}
llvm_unreachable("covered opcode switch");
}
+static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src,
+ SDNodeFlags Flags) {
+ return !Flags.hasApproximateFuncs() &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
+ !DAG.getTarget().Options.ApproxFuncFPMath &&
+ !valueIsKnownNeverF32Denorm(Src) &&
+ DAG.getMachineFunction()
+ .getDenormalMode(APFloat::IEEEsingle())
+ .Input != DenormalMode::PreserveSign;
+}
+
+SDValue AMDGPUTargetLowering::getIsLtSmallestNormal(SelectionDAG &DAG,
+ SDValue Src,
+ SDNodeFlags Flags) const {
+ SDLoc SL(Src);
+ EVT VT = Src.getValueType();
+ const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
+ SDValue SmallestNormal =
+ DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);
+
+ // Want to scale denormals up, but negatives and 0 work just as well on the
+ // scaled path.
+ SDValue IsLtSmallestNormal = DAG.getSetCC(
+ SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
+ SmallestNormal, ISD::SETOLT);
+
+ return IsLtSmallestNormal;
+}
+
+SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
+ SDNodeFlags Flags) const {
+ SDLoc SL(Src);
+ EVT VT = Src.getValueType();
+ const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
+ SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), SL, VT);
+
+ SDValue Fabs = DAG.getNode(ISD::FABS, SL, VT, Src, Flags);
+ SDValue IsFinite = DAG.getSetCC(
+ SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Fabs,
+ Inf, ISD::SETOLT);
+ return IsFinite;
+}
+
+/// If denormal handling is required return the scaled input to FLOG2, and the
+/// check for denormal range. Otherwise, return null values.
+std::pair<SDValue, SDValue>
+AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
+ SDValue Src, SDNodeFlags Flags) const {
+ if (!needsDenormHandlingF32(DAG, Src, Flags))
+ return {};
+
+ MVT VT = MVT::f32;
+ const fltSemantics &Semantics = APFloat::IEEEsingle();
+ SDValue SmallestNormal =
+ DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);
+
+ SDValue IsLtSmallestNormal = DAG.getSetCC(
+ SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
+ SmallestNormal, ISD::SETOLT);
+
+ SDValue Scale32 = DAG.getConstantFP(0x1.0p+32, SL, VT);
+ SDValue One = DAG.getConstantFP(1.0, SL, VT);
+ SDValue ScaleFactor =
+ DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags);
+
+ SDValue ScaledInput = DAG.getNode(ISD::FMUL, SL, VT, Src, ScaleFactor, Flags);
+ return {ScaledInput, IsLtSmallestNormal};
+}
+
SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const {
// v_log_f32 is good enough for OpenCL, except it doesn't handle denormals.
// If we have to handle denormals, scale up the input and adjust the result.
DAG.getTargetConstant(0, SL, MVT::i32), Flags);
}
- bool NeedDenormHandling =
- !Flags.hasApproximateFuncs() && !DAG.getTarget().Options.UnsafeFPMath &&
- !DAG.getTarget().Options.ApproxFuncFPMath &&
- !valueIsKnownNeverF32Denorm(Src) &&
- DAG.getDenormalMode(VT).Input != DenormalMode::PreserveSign;
-
- if (!NeedDenormHandling)
+ auto [ScaledInput, IsLtSmallestNormal] =
+ getScaledLogInput(DAG, SL, Src, Flags);
+ if (!ScaledInput)
return DAG.getNode(AMDGPUISD::LOG, SL, VT, Src, Flags);
- const fltSemantics &Semantics = APFloat::IEEEsingle();
- SDValue SmallestNormal =
- DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);
-
- // Want to scale denormals up, but negatives and 0 work just as well on the
- // scaled path.
- SDValue IsLtSmallestNormal = DAG.getSetCC(
- SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
- SmallestNormal, ISD::SETOLT);
-
- SDValue Scale32 = DAG.getConstantFP(0x1.0p+32, SL, VT);
- SDValue One = DAG.getConstantFP(1.0, SL, VT);
- SDValue ScaleFactor =
- DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags);
-
- SDValue ScaledInput = DAG.getNode(ISD::FMUL, SL, VT, Src, ScaleFactor, Flags);
-
SDValue Log2 = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);
SDValue ThirtyTwo = DAG.getConstantFP(32.0, SL, VT);
return DAG.getNode(ISD::FSUB, SL, VT, Log2, ResultOffset, Flags);
}
-SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG,
- double Log2BaseInverted) const {
+static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X,
+ SDValue Y, SDValue C, SDNodeFlags Flags = SDNodeFlags()) {
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Y, Flags);
+ return DAG.getNode(ISD::FADD, SL, VT, Mul, C, Flags);
+}
+
+SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue X = Op.getOperand(0);
EVT VT = Op.getValueType();
+ SDNodeFlags Flags = Op->getFlags();
+ SDLoc DL(Op);
- SDLoc SL(Op);
- SDValue Operand = Op.getOperand(0);
- SDValue Log2Operand = DAG.getNode(ISD::FLOG2, SL, VT, Operand);
+ const bool IsLog10 = Op.getOpcode() == ISD::FLOG10;
+ assert(IsLog10 || Op.getOpcode() == ISD::FLOG);
+
+ const auto &Options = getTargetMachine().Options;
+ if (VT == MVT::f16 || Flags.hasApproximateFuncs() ||
+ Options.ApproxFuncFPMath || Options.UnsafeFPMath) {
+
+ if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
+ // Log and multiply in f32 is good enough for f16.
+ X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags);
+ }
+
+ SDValue Lowered = LowerFLOGUnsafe(
+ X, DL, DAG, IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2,
+ Flags);
+ if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
+ return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered,
+ DAG.getTargetConstant(0, DL, MVT::i32), Flags);
+ }
+
+ return Lowered;
+ }
+
+ auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, DL, X, Flags);
+ if (ScaledInput)
+ X = ScaledInput;
+
+ SDValue Y = DAG.getNode(AMDGPUISD::LOG, DL, VT, X, Flags);
+
+ SDValue R;
+ if (Subtarget->hasFastFMAF32()) {
+ // c+cc are ln(2)/ln(10) to more than 49 bits
+ const float c_log10 = 0x1.344134p-2f;
+ const float cc_log10 = 0x1.09f79ep-26f;
+
+ // c + cc is ln(2) to more than 49 bits
+ const float c_log = 0x1.62e42ep-1f;
+ const float cc_log = 0x1.efa39ep-25f;
+
+ SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT);
+ SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT);
+
+ R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags);
+ SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags);
+ SDValue FMA0 = DAG.getNode(ISD::FMA, DL, VT, Y, C, NegR, Flags);
+ SDValue FMA1 = DAG.getNode(ISD::FMA, DL, VT, Y, CC, FMA0, Flags);
+ R = DAG.getNode(ISD::FADD, DL, VT, R, FMA1, Flags);
+ } else {
+ // ch+ct is ln(2)/ln(10) to more than 36 bits
+ const float ch_log10 = 0x1.344000p-2f;
+ const float ct_log10 = 0x1.3509f6p-18f;
+
+ // ch + ct is ln(2) to more than 36 bits
+ const float ch_log = 0x1.62e000p-1f;
+ const float ct_log = 0x1.0bfbe8p-15f;
+
+ SDValue CH = DAG.getConstantFP(IsLog10 ? ch_log10 : ch_log, DL, VT);
+ SDValue CT = DAG.getConstantFP(IsLog10 ? ct_log10 : ct_log, DL, VT);
+
+ SDValue YAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Y);
+ SDValue MaskConst = DAG.getConstant(0xfffff000, DL, MVT::i32);
+ SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst);
+ SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);
+ SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags);
+
+ SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags);
+ SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);
+ SDValue Mad1 = getMad(DAG, DL, VT, YT, CH, Mad0, Flags);
+ R = getMad(DAG, DL, VT, YH, CH, Mad1);
+ }
+
+ const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) &&
+ (Flags.hasNoInfs() || Options.NoInfsFPMath);
+
+ // TODO: Check if known finite from source value.
+ if (!IsFiniteOnly) {
+ SDValue IsFinite = getIsFinite(DAG, Y, Flags);
+ R = DAG.getNode(ISD::SELECT, DL, VT, IsFinite, R, Y, Flags);
+ }
+
+ if (IsScaled) {
+ SDValue Zero = DAG.getConstantFP(0.0f, DL, VT);
+ SDValue ShiftK =
+ DAG.getConstantFP(IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f, DL, VT);
+ SDValue Shift =
+ DAG.getNode(ISD::SELECT, DL, VT, IsScaled, ShiftK, Zero, Flags);
+ R = DAG.getNode(ISD::FSUB, DL, VT, R, Shift, Flags);
+ }
+
+ return R;
+}
+
+SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const {
+ return LowerFLOGCommon(Op, DAG);
+}
+
+// Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a
+// promote f16 operation.
+SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL,
+ SelectionDAG &DAG,
+ double Log2BaseInverted,
+ SDNodeFlags Flags) const {
+ EVT VT = Src.getValueType();
+ unsigned LogOp = VT == MVT::f32 ? AMDGPUISD::LOG : ISD::FLOG2;
+ SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags);
SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);
- return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
+ return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand,
+ Flags);
}
// exp2(M_LOG2E_F * f);
SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op,
+ SDNodeFlags Flags) const;
+ SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const;
+ std::pair<SDValue, SDValue> getScaledLogInput(SelectionDAG &DAG,
+ const SDLoc SL, SDValue Op,
+ SDNodeFlags Flags) const;
+
SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG,
- double Log2BaseInverted) const;
+ SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
+ double Log2BaseInverted, SDNodeFlags Flags) const;
SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
Exp2Ops.clampScalar(0, MinScalarFPTy, S32);
Exp2Ops.scalarize(0);
- auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FLOG, G_FLOG10, G_FPOW});
+ auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FPOW});
if (ST.has16BitInsts())
ExpOps.customFor({{S32}, {S16}});
else
Log2Ops.scalarize(0)
.lower();
+ auto &LogOps = getActionDefinitionsBuilder({G_FLOG, G_FLOG10});
+ LogOps.customFor({S32, S16});
+ LogOps.clampScalar(0, MinScalarFPTy, S32)
+ .scalarize(0);
+
// The 64-bit versions produce 32-bit results, but only on the SALU.
getActionDefinitionsBuilder(G_CTPOP)
.legalFor({{S32, S32}, {S32, S64}})
case TargetOpcode::G_FLOG2:
return legalizeFlog2(MI, B);
case TargetOpcode::G_FLOG:
- return legalizeFlog(MI, B, numbers::ln2);
case TargetOpcode::G_FLOG10:
- return legalizeFlog(MI, B, numbers::ln2 / numbers::ln10);
+ return legalizeFlogCommon(MI, B);
case TargetOpcode::G_FEXP:
return legalizeFExp(MI, B);
case TargetOpcode::G_FPOW:
return false;
}
+static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src,
+ unsigned Flags) {
+ return (Flags & MachineInstr::FmAfn) == 0 &&
+ !MF.getTarget().Options.UnsafeFPMath &&
+ !MF.getTarget().Options.ApproxFuncFPMath &&
+ !valueIsKnownNeverF32Denorm(MF.getRegInfo(), Src) &&
+ MF.getDenormalMode(APFloat::IEEEsingle()).Input !=
+ DenormalMode::PreserveSign;
+}
+
+std::pair<Register, Register>
+AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src,
+ unsigned Flags) const {
+ if (!needsDenormHandlingF32(B.getMF(), Src, Flags))
+ return {};
+
+ const LLT F32 = LLT::scalar(32);
+ auto SmallestNormal = B.buildFConstant(
+ F32, APFloat::getSmallestNormalized(APFloat::IEEEsingle()));
+ auto IsLtSmallestNormal =
+ B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Src, SmallestNormal);
+
+ auto Scale32 = B.buildFConstant(F32, 0x1.0p+32);
+ auto One = B.buildFConstant(F32, 1.0);
+ auto ScaleFactor =
+ B.buildSelect(F32, IsLtSmallestNormal, Scale32, One, Flags);
+ auto ScaledInput = B.buildFMul(F32, Src, ScaleFactor, Flags);
+
+ return {ScaledInput.getReg(0), IsLtSmallestNormal.getReg(0)};
+}
+
bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI,
MachineIRBuilder &B) const {
// v_log_f32 is good enough for OpenCL, except it doesn't handle denormals.
LLT Ty = B.getMRI()->getType(Dst);
unsigned Flags = MI.getFlags();
- const MachineFunction &MF = B.getMF();
-
if (Ty == LLT::scalar(16)) {
const LLT F32 = LLT::scalar(32);
// Nothing in half is a denormal when promoted to f32.
assert(Ty == LLT::scalar(32));
- const fltSemantics &Flt = APFloat::IEEEsingle();
-
- bool NeedDenormHandling =
- !MI.getFlag(MachineInstr::FmAfn) &&
- !MF.getTarget().Options.UnsafeFPMath &&
- !MF.getTarget().Options.ApproxFuncFPMath &&
- !valueIsKnownNeverF32Denorm(*B.getMRI(), Src) &&
- MF.getDenormalMode(Flt).Input != DenormalMode::PreserveSign;
-
- if (!NeedDenormHandling) {
- B.buildIntrinsic(Intrinsic::amdgcn_log, ArrayRef<Register>{Dst}, false)
- .addUse(Src)
- .setMIFlags(Flags);
+ auto [ScaledInput, IsLtSmallestNormal] = getScaledLogInput(B, Src, Flags);
+ if (!ScaledInput) {
+ B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)}, false)
+ .addUse(Src)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
}
- auto SmallestNormal =
- B.buildFConstant(Ty, APFloat::getSmallestNormalized(Flt));
- auto IsDenormOrZero =
- B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Src, SmallestNormal);
-
- auto Scale32 = B.buildFConstant(Ty, 0x1.0p+32);
- auto One = B.buildFConstant(Ty, 1.0);
- auto ScaleFactor = B.buildSelect(Ty, IsDenormOrZero, Scale32, One, Flags);
- auto ScaledInput = B.buildFMul(Ty, Src, ScaleFactor, Flags);
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
- .addUse(ScaledInput.getReg(0))
- .setMIFlags(Flags);
+ .addUse(ScaledInput)
+ .setMIFlags(Flags);
auto ThirtyTwo = B.buildFConstant(Ty, 32.0);
auto Zero = B.buildFConstant(Ty, 0.0);
- auto ResultOffset = B.buildSelect(Ty, IsDenormOrZero, ThirtyTwo, Zero, Flags);
+ auto ResultOffset =
+ B.buildSelect(Ty, IsLtSmallestNormal, ThirtyTwo, Zero, Flags);
B.buildFSub(Dst, Log2, ResultOffset, Flags);
MI.eraseFromParent();
return true;
}
-bool AMDGPULegalizerInfo::legalizeFlog(
- MachineInstr &MI, MachineIRBuilder &B, double Log2BaseInverted) const {
+static Register getMad(MachineIRBuilder &B, LLT Ty, Register X, Register Y,
+ Register Z, unsigned Flags) {
+ auto FMul = B.buildFMul(Ty, X, Y, Flags);
+ return B.buildFAdd(Ty, FMul, Z, Flags).getReg(0);
+}
+
+bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ const bool IsLog10 = MI.getOpcode() == TargetOpcode::G_FLOG10;
+ assert(IsLog10 || MI.getOpcode() == TargetOpcode::G_FLOG);
+
+ MachineRegisterInfo &MRI = *B.getMRI();
Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT Ty = B.getMRI()->getType(Dst);
+ Register X = MI.getOperand(1).getReg();
unsigned Flags = MI.getFlags();
+ const LLT Ty = MRI.getType(X);
+ MachineFunction &MF = B.getMF();
- auto Log2Operand = B.buildFLog2(Ty, Src, Flags);
- auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
+ const LLT F32 = LLT::scalar(32);
+ const LLT F16 = LLT::scalar(16);
+
+ const AMDGPUTargetMachine &TM =
+ static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
+
+ if (Ty == F16 || MI.getFlag(MachineInstr::FmAfn) ||
+ TM.Options.ApproxFuncFPMath || TM.Options.UnsafeFPMath) {
+ const double Log2BaseInv =
+ IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2;
+
+ if (Ty == F16 && !ST.has16BitInsts()) {
+ Register LogVal = MRI.createGenericVirtualRegister(F32);
+ auto PromoteSrc = B.buildFPExt(F32, X);
+ legalizeFlogUnsafe(B, LogVal, PromoteSrc.getReg(0), Log2BaseInv, Flags);
+ B.buildFPTrunc(Dst, LogVal);
+ } else {
+ legalizeFlogUnsafe(B, Dst, X, Log2BaseInv, Flags);
+ }
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto [ScaledInput, IsScaled] = getScaledLogInput(B, X, Flags);
+ if (ScaledInput)
+ X = ScaledInput;
+
+ auto Y = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ .addUse(X)
+ .setMIFlags(Flags);
+
+ Register R;
+ if (ST.hasFastFMAF32()) {
+ // c+cc are ln(2)/ln(10) to more than 49 bits
+ const float c_log10 = 0x1.344134p-2f;
+ const float cc_log10 = 0x1.09f79ep-26f;
+
+ // c + cc is ln(2) to more than 49 bits
+ const float c_log = 0x1.62e42ep-1f;
+ const float cc_log = 0x1.efa39ep-25f;
+
+ auto C = B.buildFConstant(Ty, IsLog10 ? c_log10 : c_log);
+ auto CC = B.buildFConstant(Ty, IsLog10 ? cc_log10 : cc_log);
+
+ R = B.buildFMul(Ty, Y, C, Flags).getReg(0);
+ auto NegR = B.buildFNeg(Ty, R, Flags);
+ auto FMA0 = B.buildFMA(Ty, Y, C, NegR, Flags);
+ auto FMA1 = B.buildFMA(Ty, Y, CC, FMA0, Flags);
+ R = B.buildFAdd(Ty, R, FMA1, Flags).getReg(0);
+ } else {
+ // ch+ct is ln(2)/ln(10) to more than 36 bits
+ const float ch_log10 = 0x1.344000p-2f;
+ const float ct_log10 = 0x1.3509f6p-18f;
+
+ // ch + ct is ln(2) to more than 36 bits
+ const float ch_log = 0x1.62e000p-1f;
+ const float ct_log = 0x1.0bfbe8p-15f;
+
+ auto CH = B.buildFConstant(Ty, IsLog10 ? ch_log10 : ch_log);
+ auto CT = B.buildFConstant(Ty, IsLog10 ? ct_log10 : ct_log);
+
+ auto MaskConst = B.buildConstant(Ty, 0xfffff000);
+ auto YH = B.buildAnd(Ty, Y, MaskConst);
+ auto YT = B.buildFSub(Ty, Y, YH, Flags);
+ auto YTCT = B.buildFMul(Ty, YT, CT, Flags);
+
+ Register Mad0 =
+ getMad(B, Ty, YH.getReg(0), CT.getReg(0), YTCT.getReg(0), Flags);
+ Register Mad1 = getMad(B, Ty, YT.getReg(0), CH.getReg(0), Mad0, Flags);
+ R = getMad(B, Ty, YH.getReg(0), CH.getReg(0), Mad1, Flags);
+ }
+
+ const bool IsFiniteOnly =
+ (MI.getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) &&
+ (MI.getFlag(MachineInstr::FmNoInfs) || TM.Options.NoInfsFPMath);
+
+ if (!IsFiniteOnly) {
+ // Expand isfinite(x) => fabs(x) < inf
+ auto Inf = B.buildFConstant(Ty, APFloat::getInf(APFloat::IEEEsingle()));
+ auto Fabs = B.buildFAbs(Ty, Y);
+ auto IsFinite =
+ B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Fabs, Inf, Flags);
+ R = B.buildSelect(Ty, IsFinite, R, Y, Flags).getReg(0);
+ }
+
+ if (ScaledInput) {
+ auto Zero = B.buildFConstant(Ty, 0.0);
+ auto ShiftK =
+ B.buildFConstant(Ty, IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f);
+ auto Shift = B.buildSelect(Ty, IsScaled, ShiftK, Zero, Flags);
+ B.buildFSub(Dst, R, Shift, Flags);
+ } else {
+ B.buildCopy(Dst, R);
+ }
- B.buildFMul(Dst, Log2Operand, Log2BaseInvertedOperand, Flags);
MI.eraseFromParent();
return true;
}
+bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst,
+ Register Src,
+ double Log2BaseInverted,
+ unsigned Flags) const {
+ LLT Ty = B.getMRI()->getType(Dst);
+ auto Log2Operand = Ty == LLT::scalar(16)
+ ? B.buildFLog2(Ty, Src, Flags)
+ : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ .addUse(Src)
+ .setMIFlags(Flags);
+ auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
+ B.buildFMul(Dst, Log2Operand, Log2BaseInvertedOperand, Flags);
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
MachineIRBuilder &B) const {
Register Dst = MI.getOperand(0).getReg();
bool legalizeAtomicCmpXChg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+
+ std::pair<Register, Register>
+ getScaledLogInput(MachineIRBuilder &B, Register Src, unsigned Flags) const;
+
bool legalizeFlog2(MachineInstr &MI, MachineIRBuilder &B) const;
- bool legalizeFlog(MachineInstr &MI, MachineIRBuilder &B,
- double Log2BaseInverted) const;
+ bool legalizeFlogCommon(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, Register Src,
+ double Log2BaseInverted, unsigned Flags) const;
bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFPow(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI,
bool HasFminFmaxLegacy = true;
bool EnablePromoteAlloca = false;
bool HasTrigReducedRange = false;
+ bool FastFMAF32 = false;
unsigned EUsPerCU = 4;
unsigned MaxWavesPerEU = 10;
unsigned LocalMemorySize = 0;
return HasTrigReducedRange;
}
+ bool hasFastFMAF32() const {
+ return FastFMAF32;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
unsigned MaxPrivateElementSize = 0;
// Possibly statically set by tablegen, but may want to be overridden.
- bool FastFMAF32 = false;
bool FastDenormalF32 = false;
bool HalfRate64Ops = false;
bool FullRate64Ops = false;
return FP64;
}
- bool hasFastFMAF32() const {
- return FastFMAF32;
- }
-
bool hasHalfRate64Ops() const {
return HalfRate64Ops;
}
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[SELECT]]
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000
- ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]]
+ ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]]
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000
+ ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]]
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_FLOG %0
$vgpr0 = COPY %1
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[SELECT]]
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[INT]], [[SELECT1]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000
- ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FSUB]], [[C5]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[C3]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL1]]
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C3]], [[FNEG]]
+ ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C4]], [[FMA]]
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL1]], [[FMA1]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000
+ ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]]
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FABS]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[SELECT1]], [[SELECT2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = nnan G_FLOG %0
$vgpr0 = COPY %1
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]]
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000
- ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]]
- ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
- ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]]
+ ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]]
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000
+ ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]]
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]]
+ ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
+ ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]]
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]]
- ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FSUB1]], [[C5]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32)
+ ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]]
+ ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]]
+ ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]]
+ ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]]
+ ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]]
+ ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]]
+ ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = G_FLOG %0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]]
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000
- ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]]
- ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
- ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E42E0000000
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E6EFA39E0000000
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]]
+ ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]]
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000
+ ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]]
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x40362E4300000000
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]]
+ ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
+ ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]]
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]]
- ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FSUB1]], [[C5]]
- ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
- ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
- ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT4]]
+ ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]]
+ ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]]
+ ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]]
+ ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]]
+ ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]]
+ ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]]
+ ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]]
+ ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
+ ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]]
+ ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT6]]
; CHECK-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32)
- ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT2]], [[SELECT5]]
- ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C5]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32)
+ ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[C3]]
+ ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL5]]
+ ; CHECK-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C3]], [[FNEG2]]
+ ; CHECK-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C4]], [[FMA4]]
+ ; CHECK-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[FMA5]]
+ ; CHECK-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[INT2]]
+ ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS2]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[FADD2]], [[INT2]]
+ ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[SELECT7]], [[SELECT8]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = G_FLOG %0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[SELECT]]
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
- ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]]
+ ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]]
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000
+ ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]]
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_FLOG10 %0
$vgpr0 = COPY %1
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[SELECT]]
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[INT]], [[SELECT1]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
- ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FSUB]], [[C5]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[C3]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[FMUL1]]
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C3]], [[FNEG]]
+ ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[INT]], [[C4]], [[FMA]]
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL1]], [[FMA1]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000
+ ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]]
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FABS]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[SELECT1]], [[SELECT2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = nnan G_FLOG10 %0
$vgpr0 = COPY %1
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]]
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
- ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]]
- ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
- ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]]
+ ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]]
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000
+ ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]]
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]]
+ ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
+ ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]]
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]]
- ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FSUB1]], [[C5]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32)
+ ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]]
+ ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]]
+ ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]]
+ ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]]
+ ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]]
+ ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]]
+ ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = G_FLOG10 %0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[SELECT]]
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.200000e+01
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
- ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FSUB]], [[C5]]
- ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
- ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E509F79E0000000
+ ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[C3]]
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]]
+ ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C3]], [[FNEG]]
+ ; CHECK-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[INT]], [[C4]], [[FMA]]
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[FMA1]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x7FF0000000000000
+ ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[INT]]
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[FADD]], [[INT]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x4023441360000000
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[SELECT1]], [[SELECT2]]
+ ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
+ ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT3]]
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT1]], [[SELECT3]]
- ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FSUB1]], [[C5]]
- ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
- ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
- ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT4]]
+ ; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[C3]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL3]]
+ ; CHECK-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C3]], [[FNEG1]]
+ ; CHECK-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[INT1]], [[C4]], [[FMA2]]
+ ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMA3]]
+ ; CHECK-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[INT1]]
+ ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS1]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[FADD1]], [[INT1]]
+ ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[SELECT4]], [[SELECT5]]
+ ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
+ ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]]
+ ; CHECK-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT6]]
; CHECK-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32)
- ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]]
- ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT2]], [[SELECT5]]
- ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FSUB2]], [[C5]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32)
+ ; CHECK-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[C3]]
+ ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL5]]
+ ; CHECK-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C3]], [[FNEG2]]
+ ; CHECK-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[INT2]], [[C4]], [[FMA4]]
+ ; CHECK-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[FMA5]]
+ ; CHECK-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[INT2]]
+ ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FABS2]](s32), [[C5]]
+ ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[FADD2]], [[INT2]]
+ ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C7]], [[C6]]
+ ; CHECK-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[SELECT7]], [[SELECT8]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = G_FLOG10 %0
; SI-SDAG-LABEL: s_log_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb
+; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: s_mov_b32 s0, 0x3f317217
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: s_mov_b32 s2, -1
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, -v1
+; SI-SDAG-NEXT: s_mov_b32 s0, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, v2
+; SI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_log_f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_log_f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; VI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_log_f32:
; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1]
+; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x3f317217
+; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s1, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s0, -v2
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s1, v3
+; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_log_f32:
; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GFX900-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-LABEL: s_log_f32:
; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s3
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v0, v1, v0 :: v_dual_mov_b32 v1, 0
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s3
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x41b17218, s3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log_f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
; R600-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T1.W, KC0[2].Z, PV.W,
-; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
-; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W,
; R600-NEXT: LOG_IEEE * T0.X, PV.W,
-; R600-NEXT: ADD * T0.W, PS, -T0.W,
-; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
-; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
-; R600-NEXT: 1060205080(6.931472e-01), 2(2.802597e-45)
+; R600-NEXT: AND_INT * T1.W, PS, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD * T2.W, T0.X, -PV.W,
+; R600-NEXT: MUL_IEEE * T3.W, PV.W, literal.x,
+; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
+; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
+; R600-NEXT: 1060204544(6.931152e-01), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
+; R600-NEXT: SETGT * T2.W, literal.y, |T0.X|,
+; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; R600-NEXT: CNDE T1.W, PS, T0.X, PV.W,
+; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
+; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T0.X, PV.W, -PS,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_log_f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 15, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 26, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
; CM-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x,
; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; CM-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, PV.W,
-; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
-; CM-NEXT: LOG_IEEE T0.X, T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: ADD * T0.W, PV.X, -T0.Z,
-; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x,
-; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W,
+; CM-NEXT: LOG_IEEE T0.X, T1.W,
+; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: ADD * T2.W, T0.X, -PV.W,
+; CM-NEXT: MUL_IEEE * T3.W, PV.W, literal.x,
+; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
+; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
+; CM-NEXT: 1060204544(6.931152e-01), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T0.Z, T1.W, literal.x, PV.W,
+; CM-NEXT: SETGT * T1.W, literal.y, |T0.X|,
+; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; CM-NEXT: CNDE T0.Z, PV.W, T0.X, PV.Z,
+; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
+; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; CM-NEXT: ADD * T0.X, PV.Z, -PV.W,
; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call float @llvm.log.f32(float %in)
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: s_mov_b32 s8, 0x3377d1cf
+; SI-SDAG-NEXT: s_mov_b32 s9, 0x7f800000
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; SI-SDAG-NEXT: v_log_f32_e32 v3, v1
-; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s3, 0x3f317217
; SI-SDAG-NEXT: s_mov_b32 s4, s0
-; SI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v2
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0
; SI-SDAG-NEXT: s_mov_b32 s5, s1
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2
+; SI-SDAG-NEXT: v_fma_f32 v4, v2, s3, -v3
+; SI-SDAG-NEXT: v_fma_f32 v4, v2, s8, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s9
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v4, v0, s3, -v2
+; SI-SDAG-NEXT: v_fma_f32 v4, v0, s8, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s9
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_log_v2f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s4, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
+; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, s2
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, s3
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_log_f32_e32 v4, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
+; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
+; SI-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4
+; SI-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5
+; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
+; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1
+; SI-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
+; SI-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_log_v2f32:
; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: s_mov_b32 s2, 0x7f800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; VI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; VI-SDAG-NEXT: v_log_f32_e32 v2, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s2
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_log_v2f32:
; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x800000
+; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s0, 0x800000
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, s2
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0
-; VI-GISEL-NEXT: v_log_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_log_f32_e32 v4, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s6
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
+; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v4, v1, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v0, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v5, v2, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v7, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_log_v2f32:
; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: s_mov_b32 s2, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s3, 0x3377d1cf
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4
-; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v1
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3
+; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3
+; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x7f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v3
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s2, -v4
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s3, v5
+; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v3|, s7
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s2, -v3
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s3, v5
+; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_log_v2f32:
; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
+; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s3
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5
+; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
+; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX900-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-LABEL: s_log_v2f32:
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s5
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s4
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s5
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s3, v2 :: v_dual_mul_f32 v3, s2, v3
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s3, v0 :: v_dual_mul_f32 v1, s2, v1
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v2, v3, v1 :: v_dual_mov_b32 v3, 0
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3f317218, v0 :: v_dual_mul_f32 v0, 0x3f317218, v2
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3f317217, v0 :: v_dual_mul_f32 v3, 0x3f317217, v1
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v4, 0x3f317217, v0, -v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3
+; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v4, 0x3377d1cf, v0 :: v_dual_fmac_f32 v5, 0x3377d1cf, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s4
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT: v_dual_cndmask_b32 v2, v1, v3 :: v_dual_mov_b32 v3, 0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5
; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1]
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s5
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
-; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3f317218, v0 :: v_dual_mul_f32 v1, 0x3f317218, v1
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3f317217, v0 :: v_dual_mul_f32 v3, 0x3f317217, v1
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v4, v0, 0x3f317217, -v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v4, 0x3377d1cf, v0 :: v_dual_fmac_f32 v5, 0x3377d1cf, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s4
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, v1, v3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log_v2f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 21, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XY, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: MUL_IEEE T2.W, KC0[3].X, PV.W,
; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Z, KC0[2].W, PS,
-; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.x,
+; R600-NEXT: MUL_IEEE T3.W, KC0[2].W, PS,
; R600-NEXT: LOG_IEEE * T0.X, PV.W,
-; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
-; R600-NEXT: CNDE T1.Z, T1.W, 0.0, literal.x,
-; R600-NEXT: ADD T0.W, PS, -PV.W,
-; R600-NEXT: LOG_IEEE * T0.X, PV.Z,
-; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
-; R600-NEXT: ADD * T0.W, PS, -PV.Z,
-; R600-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
-; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
-; R600-NEXT: 1060205080(6.931472e-01), 2(2.802597e-45)
+; R600-NEXT: AND_INT T2.W, PS, literal.x,
+; R600-NEXT: LOG_IEEE * T0.Y, PV.W,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD T3.W, T0.X, -PV.W,
+; R600-NEXT: AND_INT * T4.W, PS, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD T5.W, T0.Y, -PS,
+; R600-NEXT: MUL_IEEE * T6.W, PV.W, literal.x,
+; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T6.W, T2.W, literal.x, PS,
+; R600-NEXT: MUL_IEEE * T7.W, PV.W, literal.x,
+; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T7.W, T4.W, literal.x, PS,
+; R600-NEXT: MULADD_IEEE * T3.W, T3.W, literal.y, PV.W, BS:VEC_021/SCL_122
+; R600-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
+; R600-NEXT: MULADD_IEEE T0.Z, T2.W, literal.x, PS,
+; R600-NEXT: SETGT T2.W, literal.y, |T0.X|,
+; R600-NEXT: MULADD_IEEE * T3.W, T5.W, literal.x, PV.W, BS:VEC_021/SCL_122
+; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; R600-NEXT: MULADD_IEEE T1.Y, T4.W, literal.x, PS,
+; R600-NEXT: SETGT T1.Z, literal.y, |T0.Y|,
+; R600-NEXT: CNDE T2.W, PV.W, T0.X, PV.Z,
+; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z,
+; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T2.Y, PV.W, -PS,
+; R600-NEXT: CNDE T0.W, PV.Z, T0.Y, PV.Y,
+; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.x,
+; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T2.X, PV.W, -PS,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_log_v2f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 27, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: ALU 47, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x,
; CM-NEXT: SETGT * T1.W, literal.y, KC0[2].W,
; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
-; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x,
-; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].X, PV.Z,
-; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01)
-; CM-NEXT: LOG_IEEE T0.X, T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x,
-; CM-NEXT: ADD T0.Z, PV.X, -T1.Z,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[2].W, T0.Y,
-; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
-; CM-NEXT: LOG_IEEE T0.X, T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE T0.Y, T0.Z, literal.x,
-; CM-NEXT: ADD * T0.W, PV.X, -T1.Y,
-; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x,
-; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00)
-; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: CNDE T1.Z, PV.W, 1.0, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].X, PV.Z,
+; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
+; CM-NEXT: LOG_IEEE T0.X, T2.W,
+; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: AND_INT T0.Z, PV.X, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[2].W, T1.Z,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Y, T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: ADD T1.Z, T0.X, -T0.Z,
+; CM-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: ADD T2.Z, T0.Y, -PV.W,
+; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x,
+; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T3.Z, T0.Z, literal.x, PV.W,
+; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x,
+; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T4.Z, T2.W, literal.x, PV.W,
+; CM-NEXT: MULADD_IEEE * T3.W, T1.Z, literal.y, PV.Z,
+; CM-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
+; CM-NEXT: MULADD_IEEE T1.Y, T0.Z, literal.x, PV.W,
+; CM-NEXT: SETGT T0.Z, literal.y, |T0.X|,
+; CM-NEXT: MULADD_IEEE * T3.W, T2.Z, literal.x, PV.Z, BS:VEC_120/SCL_212
+; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; CM-NEXT: MULADD_IEEE T1.X, T2.W, literal.x, PV.W,
+; CM-NEXT: SETGT T2.Y, literal.y, |T0.Y|,
+; CM-NEXT: CNDE T0.Z, PV.Z, T0.X, PV.Y,
+; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; CM-NEXT: ADD T1.Y, PV.Z, -PV.W,
+; CM-NEXT: CNDE T0.Z, PV.Y, T0.Y, PV.X,
+; CM-NEXT: CNDE * T0.W, T1.W, 0.0, literal.x,
+; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; CM-NEXT: ADD * T1.X, PV.Z, -PV.W,
+; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <2 x float> @llvm.log.v2f32(<2 x float> %in)
store <2 x float> %result, ptr addrspace(1) %out
define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; SI-SDAG-LABEL: s_log_v3f32:
; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
+; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4
-; SI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT: s_mov_b32 s11, 0x3377d1cf
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, s9, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s9, 0x3f317217
+; SI-SDAG-NEXT: s_mov_b32 s12, 0x7f800000
+; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v1
+; SI-SDAG-NEXT: v_fma_f32 v4, v1, s9, -v3
+; SI-SDAG-NEXT: v_fma_f32 v4, v1, s11, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s12
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1]
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, s8, v3
+; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v5
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5
+; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s10, v0
+; SI-SDAG-NEXT: v_fma_f32 v6, v3, s9, -v5
; SI-SDAG-NEXT: v_log_f32_e32 v2, v0
-; SI-SDAG-NEXT: v_log_f32_e32 v5, v5
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
-; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6
-; SI-SDAG-NEXT: s_mov_b32 s2, -1
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v5, v4
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v3
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
-; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-SDAG-NEXT: v_fma_f32 v6, v3, s11, v6
+; SI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s12
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[2:3]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2
+; SI-SDAG-NEXT: v_fma_f32 v5, v2, s9, -v3
+; SI-SDAG-NEXT: v_fma_f32 v5, v2, s11, v5
+; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s12
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
+; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_log_v3f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s2, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, s5
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
+; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4
+; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
+; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v4, s9, v4
; SI-GISEL-NEXT: v_log_f32_e32 v4, v4
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
+; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v4
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1
+; SI-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8
; SI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
+; SI-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9
+; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
+; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf
+; SI-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; SI-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_log_v3f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: s_mov_b32 s8, 0x7f800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4
-; VI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24
+; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s8
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, s5, v3
+; VI-SDAG-NEXT: v_log_f32_e32 v3, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5
+; VI-SDAG-NEXT: v_and_b32_e32 v5, 0xfffff000, v3
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
-; VI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; VI-SDAG-NEXT: v_log_f32_e32 v6, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v3
-; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v5
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v6, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0
+; VI-SDAG-NEXT: v_sub_f32_e32 v6, v3, v5
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
+; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s8
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3]
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v5, v0, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT: v_add_f32_e32 v5, v6, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s8
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s6
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, s7
; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_log_v3f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1]
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, s5, v3
+; VI-GISEL-NEXT: v_log_f32_e32 v3, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6
; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
-; VI-GISEL-NEXT: v_log_f32_e32 v4, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7
; VI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v6, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s8
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s9
; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_log_v3f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3377d1cf
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x7f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s6, -v4
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, v5
+; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s10
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4
; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
-; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6
-; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v1
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v3
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v4
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v7, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX900-SDAG-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317217, v4
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s6, -v6
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s7, v7
+; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v6, v7
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s10
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v4, v6, s[2:3]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s6, -v4
+; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s7, v6
+; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s10
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4
+; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_log_v3f32:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s5
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
+; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
+; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1]
; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v4
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v4
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8
; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9
+; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
+; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9]
; GFX900-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-LABEL: s_log_v3f32:
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s4
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s2
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s7
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v3, s6, v3 :: v_dual_mul_f32 v4, s5, v4
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s7
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, s4, v5
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s3
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s6, v0 :: v_dual_mul_f32 v1, s5, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, s4, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v4, v1 :: v_dual_mov_b32 v4, 0
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_fma_f32 v6, 0x3f317217, v0, -v3
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v3, v5, v2 :: v_dual_mul_f32 v2, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3f317218, v1 :: v_dual_mul_f32 v0, 0x3f317218, v3
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v2
+; GFX1100-SDAG-NEXT: v_fma_f32 v7, 0x3f317217, v1, -v4
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v6, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v7, 0x3377d1cf, v1
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v8, 0x3377d1cf, v2 :: v_dual_cndmask_b32 v1, v1, v4
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_mov_b32 v4, 0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v1, v10
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc_lo
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v0, v9
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v6
; GFX1100-SDAG-NEXT: global_store_b96 v4, v[0:2], s[0:1]
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s3
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s2
; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_sub_f32 v1, v1, v4
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_mov_b32 v3, 0
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v1, 0x3f317218, v1 :: v_dual_mul_f32 v0, 0x3f317218, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_fma_f32 v6, v0, 0x3f317217, -v3
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v2
+; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3f317217, v1, -v4
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v6, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v7, 0x3377d1cf, v1
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v8, 0x3377d1cf, v2 :: v_dual_mov_b32 v3, 0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v10
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_sub_f32 v0, v0, v9
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1]
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log_v3f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 62, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z,
; R600-NEXT: MUL_IEEE T2.W, KC0[3].Z, PV.W,
; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Y, KC0[3].Y, PS,
-; R600-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x,
-; R600-NEXT: SETGT T0.W, literal.y, KC0[3].W,
+; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Y, PS,
+; R600-NEXT: SETGT T3.W, literal.x, KC0[3].W,
; R600-NEXT: LOG_IEEE * T0.X, PV.W,
-; R600-NEXT: 1107296256(3.200000e+01), 8388608(1.175494e-38)
-; R600-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x,
-; R600-NEXT: CNDE T1.Z, PV.W, 1.0, literal.y,
-; R600-NEXT: ADD T1.W, PS, -PV.Z,
-; R600-NEXT: LOG_IEEE * T0.X, PV.Y,
-; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09)
-; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
-; R600-NEXT: MUL_IEEE T1.W, KC0[3].W, PV.Z,
-; R600-NEXT: ADD * T2.W, PS, -PV.Y,
-; R600-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.X, PS, literal.x,
-; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.y,
-; R600-NEXT: LOG_IEEE * T0.Z, PV.W,
-; R600-NEXT: 1060205080(6.931472e-01), 1107296256(3.200000e+01)
-; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: ADD * T0.W, PS, -PV.W,
-; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T2.X, PV.W, literal.x,
-; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; R600-NEXT: 1060205080(6.931472e-01), 8(1.121039e-44)
+; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
+; R600-NEXT: AND_INT T1.Z, PS, literal.x,
+; R600-NEXT: CNDE T2.W, PV.W, 1.0, literal.y,
+; R600-NEXT: LOG_IEEE * T0.Y, PV.Z,
+; R600-NEXT: -4096(nan), 1333788672(4.294967e+09)
+; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W,
+; R600-NEXT: ADD T2.W, T0.X, -PV.Z,
+; R600-NEXT: AND_INT * T4.W, PS, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD T2.Z, T0.Y, -PS,
+; R600-NEXT: MUL_IEEE T5.W, PV.W, literal.x,
+; R600-NEXT: LOG_IEEE * T0.Z, PV.Z,
+; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T3.Z, T1.Z, literal.x, PV.W,
+; R600-NEXT: AND_INT T5.W, PS, literal.y,
+; R600-NEXT: MUL_IEEE * T6.W, PV.Z, literal.x,
+; R600-NEXT: 939916788(3.194618e-05), -4096(nan)
+; R600-NEXT: MULADD_IEEE T4.Z, T4.W, literal.x, PS,
+; R600-NEXT: ADD T6.W, T0.Z, -PV.W,
+; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.y, PV.Z, BS:VEC_021/SCL_122
+; R600-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
+; R600-NEXT: MULADD_IEEE T1.Y, T1.Z, literal.x, PS,
+; R600-NEXT: SETGT T1.Z, literal.y, |T0.X|,
+; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.z,
+; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Z, BS:VEC_021/SCL_122
+; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T1.X, T4.W, literal.x, PS,
+; R600-NEXT: SETGT T2.Y, literal.y, |T0.Y|,
+; R600-NEXT: MULADD_IEEE T2.Z, T5.W, literal.z, PV.W, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE T2.W, PV.Z, T0.X, PV.Y,
+; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.w,
+; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; R600-NEXT: 939916788(3.194618e-05), 1102148120(2.218071e+01)
+; R600-NEXT: ADD T1.Y, PV.W, -PS,
+; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.x, PV.Z,
+; R600-NEXT: CNDE T0.W, PV.Y, T0.Y, PV.X,
+; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.y,
+; R600-NEXT: 1060204544(6.931152e-01), 1102148120(2.218071e+01)
+; R600-NEXT: ADD T1.X, PV.W, -PS,
+; R600-NEXT: MULADD_IEEE T0.W, T5.W, literal.x, PV.Z,
+; R600-NEXT: SETGT * T1.W, literal.y, |T0.Z|,
+; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: CNDE T0.W, PS, T0.Z, PV.W,
+; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.y,
+; R600-NEXT: 2(2.802597e-45), 1102148120(2.218071e+01)
+; R600-NEXT: ADD T2.X, PV.W, -PS,
+; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
+; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00)
; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_log_v3f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T1.X
+; CM-NEXT: ALU 68, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T2.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].W,
+; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y,
; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
+; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x,
+; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
+; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x,
-; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z,
-; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Y,
+; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Z,
; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
-; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x,
-; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.y,
-; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.x,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, PV.Y,
-; CM-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09)
-; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Y, T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: CNDE T1.X, T1.W, 1.0, literal.x,
-; CM-NEXT: CNDE T2.Y, T0.Z, 0.0, literal.y,
-; CM-NEXT: ADD T0.Z, PV.Y, -T1.Z,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Z, T1.Y,
-; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01)
-; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Y, T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE T2.X, T0.Z, literal.x,
-; CM-NEXT: ADD T0.Y, PV.Y, -T2.Y,
-; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T1.X,
-; CM-NEXT: 1060205080(6.931472e-01), 8(1.121039e-44)
-; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W, T0.W,
-; CM-NEXT: LSHR T1.X, T0.Z, literal.x,
-; CM-NEXT: MUL_IEEE T0.Y, T0.Y, literal.y,
-; CM-NEXT: ADD * T0.W, PV.W, -T0.X,
-; CM-NEXT: 2(2.802597e-45), 1060205080(6.931472e-01)
-; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x,
-; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00)
-; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; CM-NEXT: LOG_IEEE T0.X, T2.W,
+; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: CNDE T1.Y, T1.Z, 1.0, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.X, literal.y,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].W, T0.Y,
+; CM-NEXT: 1333788672(4.294967e+09), -4096(nan)
+; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Y, T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: ADD T2.Y, T0.X, -T0.Z,
+; CM-NEXT: AND_INT T2.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.Y,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: LOG_IEEE T1.X, T2.W,
+; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W,
+; CM-NEXT: ADD T1.Y, T0.Y, -T2.Z,
+; CM-NEXT: AND_INT T3.Z, PV.X, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, T2.Y, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: -4096(nan), 939916788(3.194618e-05)
+; CM-NEXT: MULADD_IEEE T3.Y, T0.Z, literal.x, PV.W,
+; CM-NEXT: ADD T4.Z, T1.X, -PV.Z,
+; CM-NEXT: MUL_IEEE * T2.W, PV.Y, literal.x,
+; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T4.Y, T2.Z, literal.x, PV.W,
+; CM-NEXT: MUL_IEEE T5.Z, PV.Z, literal.x,
+; CM-NEXT: MULADD_IEEE * T2.W, T2.Y, literal.y, PV.Y,
+; CM-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
+; CM-NEXT: MULADD_IEEE T2.Y, T0.Z, literal.x, PV.W,
+; CM-NEXT: MULADD_IEEE T0.Z, T3.Z, literal.y, PV.Z, BS:VEC_120/SCL_212
+; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.x, PV.Y,
+; CM-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05)
+; CM-NEXT: SETGT T2.X, literal.x, |T0.X|,
+; CM-NEXT: MULADD_IEEE T1.Y, T2.Z, literal.y, PV.W,
+; CM-NEXT: SETGT T2.Z, literal.x, |T0.Y|,
+; CM-NEXT: MULADD_IEEE * T2.W, T4.Z, literal.y, PV.Z, BS:VEC_120/SCL_212
+; CM-NEXT: 2139095040(INF), 1060204544(6.931152e-01)
+; CM-NEXT: MULADD_IEEE T3.X, T3.Z, literal.x, PV.W,
+; CM-NEXT: SETGT T3.Y, literal.y, |T1.X|,
+; CM-NEXT: CNDE T0.Z, PV.Z, T0.Y, PV.Y,
+; CM-NEXT: CNDE * T1.W, T1.W, 0.0, literal.z,
+; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; CM-NEXT: ADD T4.X, PV.Z, -PV.W,
+; CM-NEXT: CNDE T0.Y, PV.Y, T1.X, PV.X,
+; CM-NEXT: CNDE T0.Z, T1.Z, 0.0, literal.x,
+; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; CM-NEXT: 1102148120(2.218071e+01), 8(1.121039e-44)
+; CM-NEXT: LSHR T1.X, PV.W, literal.x,
+; CM-NEXT: ADD T0.Y, PV.Y, -PV.Z,
+; CM-NEXT: CNDE T0.Z, T2.X, T0.X, T2.Y,
+; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.y,
+; CM-NEXT: 2(2.802597e-45), 1102148120(2.218071e+01)
+; CM-NEXT: ADD * T0.X, PV.Z, -PV.W,
+; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <3 x float> @llvm.log.v3f32(<3 x float> %in)
store <3 x float> %result, ptr addrspace(1) %out
define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; SI-SDAG-LABEL: s_log_v4f32:
; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x4f800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: s_mov_b32 s12, 0x3377d1cf
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
-; SI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6
-; SI-SDAG-NEXT: v_log_f32_e32 v6, v6
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; SI-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v2
-; SI-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s11, 0x3f317217
+; SI-SDAG-NEXT: s_mov_b32 s13, 0x7f800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2
+; SI-SDAG-NEXT: v_fma_f32 v4, v2, s11, -v3
+; SI-SDAG-NEXT: v_fma_f32 v4, v2, s12, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s13
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3
+; SI-SDAG-NEXT: v_log_f32_e32 v4, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v4
+; SI-SDAG-NEXT: v_fma_f32 v6, v4, s11, -v2
+; SI-SDAG-NEXT: v_fma_f32 v6, v4, s12, v6
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v6, s9, v6
; SI-SDAG-NEXT: v_log_f32_e32 v6, v6
-; SI-SDAG-NEXT: v_log_f32_e32 v4, v1
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
-; SI-SDAG-NEXT: s_mov_b32 s2, -1
-; SI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s13
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v6
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0
+; SI-SDAG-NEXT: v_fma_f32 v7, v6, s11, -v4
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_fma_f32 v7, v6, s12, v7
+; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s13
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3]
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4
+; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v6, v0, s11, -v4
+; SI-SDAG-NEXT: v_fma_f32 v6, v0, s12, v6
+; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s13
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4
+; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_log_v4f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s2, 0x800000
+; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000
+; SI-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, s5
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5
-; SI-GISEL-NEXT: v_log_f32_e32 v5, v5
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
+; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
-; SI-GISEL-NEXT: v_log_f32_e32 v5, v5
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1
+; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
+; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1
+; SI-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
+; SI-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9
+; SI-GISEL-NEXT: v_log_f32_e32 v9, v9
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
+; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf
+; SI-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
; SI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; SI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; SI-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10
+; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
+; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3
+; SI-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
+; SI-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_log_v4f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
+; VI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, s7, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6
-; VI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; VI-SDAG-NEXT: v_log_f32_e32 v6, v6
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s7, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s7
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3
+; VI-SDAG-NEXT: v_log_f32_e32 v4, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3
+; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v6, v4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3
-; VI-SDAG-NEXT: v_sub_f32_e32 v4, v6, v5
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc
; VI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
; VI-SDAG-NEXT: v_log_f32_e32 v6, v6
-; VI-SDAG-NEXT: v_log_f32_e32 v7, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
-; VI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v7, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s7
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v6
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v7, v6, v4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3f317000, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v7, v9, v7
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s7
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v6, v0, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, s8
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, s9
; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_log_v4f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5
-; VI-GISEL-NEXT: v_log_f32_e32 v5, v5
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x800000
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v7, v1, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v7
+; VI-GISEL-NEXT: v_log_f32_e32 v7, v7
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v5, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v7
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v8, v7, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v10, 0x3805fdf4, v6
; VI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
-; VI-GISEL-NEXT: v_log_f32_e32 v5, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v9, v10, v9
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317000, v8
; VI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v6, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s8
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, s9
; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_log_v4f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x4f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x3377d1cf
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6
-; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s11, 0x7f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x41b17218
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, -v3
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s10, v5
+; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s11
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3
+; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v3
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v2
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
-; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6
-; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v1
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX900-SDAG-NEXT: global_store_dwordx4 v7, v[0:3], s[2:3]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v5
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s7, -v2
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s10, v7
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v7
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s5, v7
+; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v7
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s11
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[2:3]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v7
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s7, -v5
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s10, v8
+; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v8
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, s11
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[2:3]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s7, -v5
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s10, v7
+; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v7
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s11
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5
+; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_log_v4f32:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3377d1cf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, s5
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5
-; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3f317217
+; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5
+; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v9, s6, v9
+; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v9
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9
; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
-; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10
+; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9]
; GFX900-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-LABEL: s_log_v4f32:
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s5
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s4
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s2
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 1.0, 0x4f800000, s8
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v7, 1.0, 0x4f800000, s9
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v4, s7, v4 :: v_dual_mul_f32 v5, s6, v5
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v6, s5, v6 :: v_dual_mul_f32 v7, s4, v7
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s7, v0 :: v_dual_mul_f32 v1, s6, v1
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s5, v2 :: v_dual_mul_f32 v3, s4, v3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s3
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v6, v6
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v7, v7
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s8
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s9
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v0, v4, v0 :: v_dual_sub_f32 v1, v5, v1
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v14, 0, 0x41b17218, s8
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v15, 0, 0x41b17218, s9
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_mul_f32 v6, 0x3f317217, v1
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v4, v6, v2 :: v_dual_sub_f32 v5, v7, v3
-; GFX1100-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3f317218, v1 :: v_dual_mul_f32 v1, 0x3f317218, v4
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v5
-; GFX1100-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v7, 0x3f317217, v2 :: v_dual_mul_f32 v8, 0x3f317217, v3
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v10, 0x3f317217, v0, -v5
+; GFX1100-SDAG-NEXT: v_fma_f32 v11, 0x3f317217, v1, -v6
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7
+; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8
+; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v5, v2, v7, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
+; GFX1100-SDAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_sub_f32 v2, v1, v9
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v6, v3, v8, vcc_lo
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v3, v0, v4
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15
+; GFX1100-SDAG-NEXT: global_store_b128 v7, v[0:3], s[0:1]
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1
; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_mul_f32 v3, s7, v3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s3
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s3
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 0x41b17218, s8
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 0x41b17218, s9
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_mul_f32 v6, 0x3f317217, v1
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v2, v2, v6 :: v_dual_sub_f32 v3, v3, v7
-; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3f317218, v0 :: v_dual_mul_f32 v1, 0x3f317218, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3f317218, v2 :: v_dual_mul_f32 v3, 0x3f317218, v3
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v7, 0x3f317217, v2 :: v_dual_mul_f32 v8, 0x3f317217, v3
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v10, v0, 0x3f317217, -v5
+; GFX1100-GISEL-NEXT: v_fma_f32 v11, 0x3f317217, v1, -v6
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7
+; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v9
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
+; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v3, v3, v8 :: v_dual_sub_f32 v2, v2, v14
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15
; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log_v4f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 38, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; R600-NEXT: ALU 76, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: SETGT T0.W, literal.x, KC0[4].X,
-; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].W,
+; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z,
+; R600-NEXT: SETGT * T1.W, literal.x, KC0[4].X,
; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Z, KC0[4].X, PV.W,
-; R600-NEXT: SETGT T2.W, literal.x, KC0[3].Z,
+; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Z, PV.W,
+; R600-NEXT: SETGT T2.W, literal.x, KC0[3].W,
; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.y,
; R600-NEXT: 8388608(1.175494e-38), 1333788672(4.294967e+09)
-; R600-NEXT: MUL_IEEE T0.X, KC0[3].W, PS,
-; R600-NEXT: SETGT T0.Y, literal.x, KC0[3].Y,
-; R600-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y,
-; R600-NEXT: CNDE T0.W, PV.W, 1.0, literal.z,
-; R600-NEXT: LOG_IEEE * T0.Z, PV.Z,
-; R600-NEXT: 8388608(1.175494e-38), 1107296256(3.200000e+01)
+; R600-NEXT: MUL_IEEE T1.Z, KC0[4].X, PS,
+; R600-NEXT: CNDE T3.W, PV.W, 1.0, literal.x,
+; R600-NEXT: LOG_IEEE * T0.X, PV.Z,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T1.X, KC0[3].Z, PV.W,
-; R600-NEXT: ADD T1.Y, PS, -PV.Z,
-; R600-NEXT: CNDE T0.Z, T1.W, 0.0, literal.x,
-; R600-NEXT: CNDE T0.W, PV.Y, 1.0, literal.y,
-; R600-NEXT: LOG_IEEE * T0.X, PV.X,
-; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09)
-; R600-NEXT: MUL_IEEE T2.X, KC0[3].Y, PV.W,
-; R600-NEXT: CNDE T2.Y, T2.W, 0.0, literal.x,
-; R600-NEXT: ADD T0.Z, PS, -PV.Z,
-; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y,
-; R600-NEXT: LOG_IEEE * T0.X, PV.X,
-; R600-NEXT: 1107296256(3.200000e+01), 1060205080(6.931472e-01)
-; R600-NEXT: CNDE T1.Y, T0.Y, 0.0, literal.x,
-; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.y,
-; R600-NEXT: ADD T1.W, PS, -PV.Y,
-; R600-NEXT: LOG_IEEE * T0.X, PV.X,
-; R600-NEXT: 1107296256(3.200000e+01), 1060205080(6.931472e-01)
-; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
-; R600-NEXT: ADD * T1.W, PS, -PV.Y,
-; R600-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
-; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
-; R600-NEXT: 1060205080(6.931472e-01), 2(2.802597e-45)
+; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W,
+; R600-NEXT: SETGT T3.W, literal.x, KC0[3].Y,
+; R600-NEXT: LOG_IEEE * T0.Y, PV.Z,
+; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
+; R600-NEXT: AND_INT T1.Y, PS, literal.x,
+; R600-NEXT: AND_INT T1.Z, T0.X, literal.x,
+; R600-NEXT: CNDE T4.W, PV.W, 1.0, literal.y,
+; R600-NEXT: LOG_IEEE * T0.Z, PV.Z,
+; R600-NEXT: -4096(nan), 1333788672(4.294967e+09)
+; R600-NEXT: MUL_IEEE T2.Y, KC0[3].Y, PV.W,
+; R600-NEXT: ADD T2.Z, T0.X, -PV.Z,
+; R600-NEXT: AND_INT T4.W, PS, literal.x,
+; R600-NEXT: ADD * T5.W, T0.Y, -PV.Y,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T3.Y, PS, literal.x,
+; R600-NEXT: ADD T3.Z, T0.Z, -PV.W,
+; R600-NEXT: MUL_IEEE T6.W, PV.Z, literal.x,
+; R600-NEXT: LOG_IEEE * T1.X, PV.Y,
+; R600-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.x, PV.W,
+; R600-NEXT: MUL_IEEE T4.Z, PV.Z, literal.x,
+; R600-NEXT: AND_INT T6.W, PS, literal.y,
+; R600-NEXT: MULADD_IEEE * T7.W, T1.Y, literal.x, PV.Y, BS:VEC_021/SCL_122
+; R600-NEXT: 939916788(3.194618e-05), -4096(nan)
+; R600-NEXT: MULADD_IEEE T3.Y, T5.W, literal.x, PS,
+; R600-NEXT: ADD T5.Z, T1.X, -PV.W,
+; R600-NEXT: MULADD_IEEE T5.W, T4.W, literal.y, PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Y, BS:VEC_021/SCL_122
+; R600-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05)
+; R600-NEXT: MULADD_IEEE T2.X, T1.Z, literal.x, PS,
+; R600-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.x, PV.W, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE T1.Z, PV.Z, literal.y,
+; R600-NEXT: MULADD_IEEE T5.W, T1.Y, literal.x, PV.Y,
+; R600-NEXT: SETGT * T7.W, literal.z, |T0.Y|,
+; R600-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05)
+; R600-NEXT: 2139095040(INF), 0(0.000000e+00)
+; R600-NEXT: CNDE T3.X, PS, T0.Y, PV.W,
+; R600-NEXT: CNDE T0.Y, T1.W, 0.0, literal.x,
+; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.y, PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT: MULADD_IEEE T1.W, T4.W, literal.z, PV.Y, BS:VEC_201
+; R600-NEXT: SETGT * T4.W, literal.w, |T0.Z|,
+; R600-NEXT: 1102148120(2.218071e+01), 939916788(3.194618e-05)
+; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; R600-NEXT: SETGT T4.X, literal.x, |T0.X|,
+; R600-NEXT: CNDE T1.Y, PS, T0.Z, PV.W,
+; R600-NEXT: CNDE T0.Z, T2.W, 0.0, literal.y,
+; R600-NEXT: MULADD_IEEE T1.W, T5.Z, literal.z, PV.Z,
+; R600-NEXT: ADD * T2.W, PV.X, -PV.Y,
+; R600-NEXT: 2139095040(INF), 1102148120(2.218071e+01)
+; R600-NEXT: 1060204544(6.931152e-01), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T3.X, T6.W, literal.x, PV.W,
+; R600-NEXT: SETGT T0.Y, literal.y, |T1.X|,
+; R600-NEXT: ADD T2.Z, PV.Y, -PV.Z,
+; R600-NEXT: CNDE T1.W, PV.X, T0.X, T2.X, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z,
+; R600-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T2.Y, PV.W, -PS,
+; R600-NEXT: CNDE T0.W, PV.Y, T1.X, PV.X,
+; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.x,
+; R600-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T2.X, PV.W, -PS,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_log_v4f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 50, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT: ALU 84, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: SETGT T0.Z, literal.x, KC0[4].X,
; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y,
; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x,
-; CM-NEXT: CNDE T1.Z, PV.Z, 1.0, literal.x,
-; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
+; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W,
+; CM-NEXT: SETGT * T1.W, literal.y, KC0[4].X,
; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
; CM-NEXT: CNDE T0.X, PV.W, 1.0, literal.x,
-; CM-NEXT: SETGT T1.Y, literal.y, KC0[3].Z,
-; CM-NEXT: CNDE T0.Z, T0.Z, 0.0, literal.z,
-; CM-NEXT: MUL_IEEE * T2.W, KC0[4].X, PV.Z,
+; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.x,
+; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Y,
; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
-; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
+; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Y, T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: CNDE T1.X, T1.Z, 1.0, literal.x,
+; CM-NEXT: AND_INT T2.Y, PV.Y, literal.y,
+; CM-NEXT: MUL_IEEE T2.Z, KC0[3].W, T1.Y,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[4].X, T0.X,
+; CM-NEXT: 1333788672(4.294967e+09), -4096(nan)
+; CM-NEXT: LOG_IEEE T0.X, T2.W,
+; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T1.X (MASKED), T2.Z,
+; CM-NEXT: LOG_IEEE T1.Y, T2.Z,
+; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.Z,
+; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.Z,
+; CM-NEXT: ADD T2.X, T0.Y, -T2.Y,
+; CM-NEXT: AND_INT T3.Y, PV.Y, literal.x,
+; CM-NEXT: AND_INT T2.Z, T0.X, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.X,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
; CM-NEXT: LOG_IEEE T1.X, T2.W,
; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W,
; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W,
; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W,
-; CM-NEXT: ADD T1.X, PV.X, -T0.Z,
-; CM-NEXT: CNDE T2.Y, T1.Y, 1.0, literal.x,
-; CM-NEXT: CNDE T0.Z, T1.W, 0.0, literal.y,
-; CM-NEXT: MUL_IEEE * T1.W, KC0[3].W, T0.X,
-; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01)
-; CM-NEXT: LOG_IEEE T0.X, T1.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: ADD T0.X, PV.X, -T0.Z,
-; CM-NEXT: CNDE T1.Y, T1.Y, 0.0, literal.x,
-; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, T2.Y,
-; CM-NEXT: MUL_IEEE * T1.W, T1.X, literal.y,
-; CM-NEXT: 1107296256(3.200000e+01), 1060205080(6.931472e-01)
-; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.Z,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.Z,
-; CM-NEXT: LOG_IEEE T0.Z, T0.Z,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.Z,
-; CM-NEXT: CNDE T1.X, T0.W, 0.0, literal.x,
-; CM-NEXT: ADD T1.Y, PV.Z, -T1.Y,
-; CM-NEXT: MUL_IEEE T1.Z, T0.X, literal.y,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T0.Y, BS:VEC_021/SCL_122
-; CM-NEXT: 1107296256(3.200000e+01), 1060205080(6.931472e-01)
-; CM-NEXT: LOG_IEEE T0.X, T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE T1.Y, T1.Y, literal.x,
-; CM-NEXT: ADD * T0.W, PV.X, -T1.X,
-; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE * T1.X, PV.W, literal.x,
-; CM-NEXT: 1060205080(6.931472e-01), 0(0.000000e+00)
+; CM-NEXT: ADD T3.X, T0.X, -T2.Z,
+; CM-NEXT: ADD T4.Y, T1.Y, -T3.Y,
+; CM-NEXT: AND_INT T3.Z, PV.X, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: -4096(nan), 939916788(3.194618e-05)
+; CM-NEXT: MULADD_IEEE T4.X, T2.Y, literal.x, PV.W,
+; CM-NEXT: ADD T5.Y, T1.X, -PV.Z,
+; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, PV.X, literal.x,
+; CM-NEXT: 939916788(3.194618e-05), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T5.X, T2.Z, literal.x, PV.W,
+; CM-NEXT: MULADD_IEEE T6.Y, T3.Y, literal.x, PV.Z,
+; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x,
+; CM-NEXT: MULADD_IEEE * T2.W, T2.X, literal.y, PV.X,
+; CM-NEXT: 939916788(3.194618e-05), 1060204544(6.931152e-01)
+; CM-NEXT: MULADD_IEEE T2.X, T2.Y, literal.x, PV.W,
+; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.Z,
+; CM-NEXT: MULADD_IEEE T4.Z, T4.Y, literal.x, PV.Y, BS:VEC_120/SCL_212
+; CM-NEXT: MULADD_IEEE * T2.W, T3.X, literal.x, PV.X,
+; CM-NEXT: 1060204544(6.931152e-01), 939916788(3.194618e-05)
+; CM-NEXT: MULADD_IEEE T3.X, T2.Z, literal.x, PV.W,
+; CM-NEXT: SETGT T4.Y, literal.y, |T0.X|,
+; CM-NEXT: MULADD_IEEE T2.Z, T3.Y, literal.x, PV.Z,
+; CM-NEXT: SETGT * T2.W, literal.y, |T1.Y|,
+; CM-NEXT: 1060204544(6.931152e-01), 2139095040(INF)
+; CM-NEXT: CNDE T4.X, PV.W, T1.Y, PV.Z,
+; CM-NEXT: CNDE T1.Y, PV.Y, T0.X, PV.X,
+; CM-NEXT: CNDE T2.Z, T1.W, 0.0, literal.x,
+; CM-NEXT: MULADD_IEEE * T1.W, T5.Y, literal.y, T2.Y,
+; CM-NEXT: 1102148120(2.218071e+01), 1060204544(6.931152e-01)
+; CM-NEXT: CNDE T0.X, T0.Z, 0.0, literal.x,
+; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.W, BS:VEC_120/SCL_212
+; CM-NEXT: SETGT T0.Z, literal.z, |T1.X|,
+; CM-NEXT: ADD * T2.W, PV.Y, -PV.Z,
+; CM-NEXT: 1102148120(2.218071e+01), 1060204544(6.931152e-01)
+; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
+; CM-NEXT: SETGT T3.X, literal.x, |T0.Y|,
+; CM-NEXT: CNDE T1.Y, PV.Z, T1.X, PV.Y,
+; CM-NEXT: ADD T2.Z, T4.X, -PV.X,
+; CM-NEXT: CNDE * T1.W, T1.Z, 0.0, literal.y,
+; CM-NEXT: 2139095040(INF), 1102148120(2.218071e+01)
+; CM-NEXT: ADD T2.Y, PV.Y, -PV.W,
+; CM-NEXT: CNDE T0.Z, PV.X, T0.Y, T2.X,
+; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
+; CM-NEXT: 1102148120(2.218071e+01), 0(0.000000e+00)
+; CM-NEXT: ADD * T2.X, PV.Z, -PV.W,
; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <4 x float> @llvm.log.v4f32(<4 x float> %in)
}
define float @v_log_f32(float %in) {
-; GFX689-SDAG-LABEL: v_log_f32:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32:
}
define float @v_log_fabs_f32(float %in) {
-; GFX689-SDAG-LABEL: v_log_fabs_f32:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_fabs_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_log_fabs_f32:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-LABEL: v_log_fabs_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_fabs_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_fabs_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_fabs_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_fabs_f32:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
+; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_fabs_f32:
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_fabs_f32:
}
define float @v_log_fneg_fabs_f32(float %in) {
-; GFX689-SDAG-LABEL: v_log_fneg_fabs_f32:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_fneg_fabs_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_fneg_fabs_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_fneg_fabs_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_fneg_fabs_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_fneg_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_fneg_fabs_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_fneg_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_fneg_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0|
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
+; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_fneg_fabs_f32:
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_fneg_fabs_f32:
}
define float @v_log_fneg_f32(float %in) {
-; GFX689-SDAG-LABEL: v_log_fneg_f32:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000
-; GFX689-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_fneg_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_fneg_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_fneg_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_fneg_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_fneg_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_fneg_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_fneg_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_fneg_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_fneg_f32:
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_fneg_f32:
}
define float @v_log_f32_fast(float %in) {
-; GFX689-SDAG-LABEL: v_log_f32_fast:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_fast:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_log_f32_fast:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX689-LABEL: v_log_f32_fast:
+; GFX689: ; %bb.0:
+; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-NEXT: v_log_f32_e32 v0, v0
+; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX689-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log_f32_fast:
-; GFX1100-GISEL: ; %bb.0:
-; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_log_f32_fast:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_fast:
; R600: ; %bb.0:
}
define float @v_log_f32_ninf(float %in) {
-; GFX689-SDAG-LABEL: v_log_f32_ninf:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_ninf:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_ninf:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_ninf:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_ninf:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_ninf:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_ninf:
}
define float @v_log_f32_afn(float %in) {
-; GFX689-SDAG-LABEL: v_log_f32_afn:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_afn:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_log_f32_afn:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX689-LABEL: v_log_f32_afn:
+; GFX689: ; %bb.0:
+; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-NEXT: v_log_f32_e32 v0, v0
+; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX689-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log_f32_afn:
-; GFX1100-GISEL: ; %bb.0:
-; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_log_f32_afn:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_afn:
; R600: ; %bb.0:
}
define float @v_log_f32_afn_dynamic(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log_f32_afn_dynamic:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_afn_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_log_f32_afn_dynamic:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX689-LABEL: v_log_f32_afn_dynamic:
+; GFX689: ; %bb.0:
+; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-NEXT: v_log_f32_e32 v0, v0
+; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX689-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log_f32_afn_dynamic:
-; GFX1100-GISEL: ; %bb.0:
-; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_log_f32_afn_dynamic:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_afn_dynamic:
; R600: ; %bb.0:
}
define float @v_fabs_log_f32_afn(float %in) {
-; GFX689-SDAG-LABEL: v_fabs_log_f32_afn:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_fabs_log_f32_afn:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_log_f32_e64 v0, |v0|
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_fabs_log_f32_afn:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX689-LABEL: v_fabs_log_f32_afn:
+; GFX689: ; %bb.0:
+; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-NEXT: v_log_f32_e64 v0, |v0|
+; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX689-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_fabs_log_f32_afn:
-; GFX1100-GISEL: ; %bb.0:
-; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_log_f32_e64 v0, |v0|
-; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_fabs_log_f32_afn:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_log_f32_e64 v0, |v0|
+; GFX1100-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_fabs_log_f32_afn:
; R600: ; %bb.0:
}
define float @v_log_f32_daz(float %in) #0 {
-; GFX689-LABEL: v_log_f32_daz:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_daz:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log_f32_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_daz:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_daz:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_daz:
; R600: ; %bb.0:
}
define float @v_log_f32_nnan(float %in) {
-; GFX689-SDAG-LABEL: v_log_f32_nnan:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_nnan:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_nnan:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_nnan:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_nnan:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_nnan:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_nnan:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_nnan:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_nnan:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_nnan:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_nnan:
}
define float @v_log_f32_nnan_daz(float %in) #0 {
-; GFX689-LABEL: v_log_f32_nnan_daz:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_nnan_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_nnan_daz:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log_f32_nnan_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_nnan_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_nnan_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_nnan_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_nnan_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_nnan_daz:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_nnan_daz:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_nnan_daz:
; R600: ; %bb.0:
}
define float @v_log_f32_nnan_dynamic(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log_f32_nnan_dynamic:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_nnan_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_nnan_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_nnan_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_nnan_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_nnan_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_nnan_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_nnan_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_nnan_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_nnan_dynamic:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_nnan_dynamic:
}
define float @v_log_f32_ninf_daz(float %in) #0 {
-; GFX689-LABEL: v_log_f32_ninf_daz:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_ninf_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_ninf_daz:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log_f32_ninf_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_ninf_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_ninf_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_ninf_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_ninf_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_ninf_daz:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_ninf_daz:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_ninf_daz:
; R600: ; %bb.0:
}
define float @v_log_f32_ninf_dynamic(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log_f32_ninf_dynamic:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_ninf_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_ninf_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_ninf_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_ninf_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_ninf_dynamic:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_ninf_dynamic:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_log_f32_ninf_dynamic:
+; R600: ; %bb.0:
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+;
+; CM-LABEL: v_log_f32_ninf_dynamic:
+; CM: ; %bb.0:
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+ %result = call ninf float @llvm.log.f32(float %in)
+ ret float %result
+}
+
+define float @v_log_f32_nnan_ninf(float %in) {
+; SI-SDAG-LABEL: v_log_f32_nnan_ninf:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_nnan_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_nnan_ninf:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_nnan_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-SDAG-LABEL: v_log_f32_ninf_dynamic:
+; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log_f32_ninf_dynamic:
+; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; R600-LABEL: v_log_f32_ninf_dynamic:
+; R600-LABEL: v_log_f32_nnan_ninf:
; R600: ; %bb.0:
; R600-NEXT: CF_END
; R600-NEXT: PAD
;
-; CM-LABEL: v_log_f32_ninf_dynamic:
+; CM-LABEL: v_log_f32_nnan_ninf:
; CM: ; %bb.0:
; CM-NEXT: CF_END
; CM-NEXT: PAD
- %result = call ninf float @llvm.log.f32(float %in)
+ %result = call nnan ninf float @llvm.log.f32(float %in)
ret float %result
}
-define float @v_log_f32_nnan_ninf(float %in) {
-; GFX689-SDAG-LABEL: v_log_f32_nnan_ninf:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_nnan_ninf:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+define float @v_log_f32_nnan_ninf_daz(float %in) #0 {
+; SI-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v1, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1
+; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0
+; SI-SDAG-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf:
+; SI-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1
+; SI-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0
+; SI-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0
+; SI-GISEL-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_log_f32_nnan_ninf_daz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_log_f32_e32 v0, v0
+; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v2
+; VI-NEXT: v_mac_f32_e32 v0, 0x3805fdf4, v1
+; VI-NEXT: v_mac_f32_e32 v0, 0x3f317000, v2
+; VI-NEXT: v_mac_f32_e32 v0, 0x3f317000, v1
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf:
+; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; R600-LABEL: v_log_f32_nnan_ninf:
-; R600: ; %bb.0:
-; R600-NEXT: CF_END
-; R600-NEXT: PAD
-;
-; CM-LABEL: v_log_f32_nnan_ninf:
-; CM: ; %bb.0:
-; CM-NEXT: CF_END
-; CM-NEXT: PAD
- %result = call nnan ninf float @llvm.log.f32(float %in)
- ret float %result
-}
-
-define float @v_log_f32_nnan_ninf_daz(float %in) #0 {
-; GFX689-LABEL: v_log_f32_nnan_ninf_daz:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: v_log_f32_nnan_ninf_daz:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
-;
; R600-LABEL: v_log_f32_nnan_ninf_daz:
; R600: ; %bb.0:
; R600-NEXT: CF_END
}
define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0
+; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3f317000, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218
+; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3f317000, v1
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0
+; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3f317000, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218
+; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3f317000, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_nnan_ninf_dynamic:
}
define float @v_log_f32_dynamic_mode(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log_f32_dynamic_mode:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_dynamic_mode:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_dynamic_mode:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_dynamic_mode:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_dynamic_mode:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_dynamic_mode:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_dynamic_mode:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_dynamic_mode:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_dynamic_mode:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_dynamic_mode:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_dynamic_mode:
}
define float @v_log_f32_undef() {
-; GFX689-SDAG-LABEL: v_log_f32_undef:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, s4
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_undef:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_undef:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, s4
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_undef:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
+; SI-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_undef:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, s4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_undef:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
+; VI-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_undef:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, s4
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_undef:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_undef:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_undef:
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, s0, 1.0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_undef:
; R600-NEXT: CF_END
; R600-NEXT: PAD
;
-; CM-LABEL: v_log_f32_undef:
-; CM: ; %bb.0:
-; CM-NEXT: CF_END
-; CM-NEXT: PAD
- %result = call float @llvm.log.f32(float undef)
- ret float %result
-}
-
-define float @v_log_f32_0() {
-; GFX689-SDAG-LABEL: v_log_f32_0:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, 0
-; GFX689-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_0:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; CM-LABEL: v_log_f32_undef:
+; CM: ; %bb.0:
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+ %result = call float @llvm.log.f32(float undef)
+ ret float %result
+}
+
+define float @v_log_f32_0() {
+; SI-SDAG-LABEL: v_log_f32_0:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, 0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_0:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
+; SI-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_0:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, 0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_0:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
+; VI-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_0:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, 0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_0:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc1b17218, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_0:
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, 0, 1.0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_0:
}
define float @v_log_f32_from_fpext_f16(i16 %src.i) {
-; GFX689-LABEL: v_log_f32_from_fpext_f16:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_from_fpext_f16:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_from_fpext_f16:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log_f32_from_fpext_f16:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_from_fpext_f16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_from_fpext_f16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_from_fpext_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_from_fpext_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_f16:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_f16:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_from_fpext_f16:
; R600: ; %bb.0:
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_f32_from_fpext_math_f16:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_log_f32_from_fpext_math_f16:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_add_f16_e32 v0, v0, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_log_f32_e32 v0, v0
-; VI-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_log_f32_from_fpext_math_f16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_log_f32_from_fpext_math_f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX900-NEXT: v_log_f32_e32 v0, v0
-; GFX900-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_log_f32_from_fpext_math_f16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_from_fpext_math_f16:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log_f32_from_fpext_math_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_from_fpext_math_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_math_f16:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_math_f16:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_from_fpext_math_f16:
; R600: ; %bb.0:
}
define float @v_log_f32_from_fpext_bf16(bfloat %src) {
-; GFX689-SDAG-LABEL: v_log_f32_from_fpext_bf16:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log_f32_from_fpext_bf16:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_from_fpext_bf16:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log_f32_from_fpext_bf16:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_from_fpext_bf16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_from_fpext_bf16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_from_fpext_bf16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_from_fpext_bf16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_bf16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_bf16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_from_fpext_bf16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_fabs_f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_fneg_fabs_f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_fneg_f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_f16_fast:
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_v2f16:
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_fabs_v2f16:
; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0
-; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
+; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_fneg_fabs_v2f16:
; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
-; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
+; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_fneg_v2f16:
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_v2f16_fast:
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_v3f16:
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_v3f16_fast:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_v4f16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317218, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317218, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317218, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_v4f16_fast:
attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX689-GISEL: {{.*}}
+; GFX689-SDAG: {{.*}}
; SI: {{.*}}
; SI-SDAG-LABEL: s_log10_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb
+; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: s_mov_b32 s0, 0x3e9a209a
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: s_mov_b32 s2, -1
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, -v1
+; SI-SDAG-NEXT: s_mov_b32 s0, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, v2
+; SI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_log10_f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_log10_f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; VI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_log10_f32:
; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1]
+; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s1, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s0, -v2
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s1, v3
+; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_log10_f32:
; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GFX900-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s0, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-LABEL: s_log10_f32:
; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s3
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v0, v1, v0 :: v_dual_mov_b32 v1, 0
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s3
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x411a209b, s3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log10_f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
; R600-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T1.W, KC0[2].Z, PV.W,
-; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
-; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W,
; R600-NEXT: LOG_IEEE * T0.X, PV.W,
-; R600-NEXT: ADD * T0.W, PS, -T0.W,
-; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
-; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
-; R600-NEXT: 1050288283(3.010300e-01), 2(2.802597e-45)
+; R600-NEXT: AND_INT * T1.W, PS, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD * T2.W, T0.X, -PV.W,
+; R600-NEXT: MUL_IEEE * T3.W, PV.W, literal.x,
+; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
+; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
+; R600-NEXT: 1050288128(3.010254e-01), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
+; R600-NEXT: SETGT * T2.W, literal.y, |T0.X|,
+; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; R600-NEXT: CNDE T1.W, PS, T0.X, PV.W,
+; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
+; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; R600-NEXT: ADD T0.X, PV.W, -PS,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_log10_f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 15, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 26, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
; CM-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x,
; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; CM-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, PV.W,
-; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
-; CM-NEXT: LOG_IEEE T0.X, T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: ADD * T0.W, PV.X, -T0.Z,
-; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x,
-; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W,
+; CM-NEXT: LOG_IEEE T0.X, T1.W,
+; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: ADD * T2.W, T0.X, -PV.W,
+; CM-NEXT: MUL_IEEE * T3.W, PV.W, literal.x,
+; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
+; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
+; CM-NEXT: 1050288128(3.010254e-01), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T0.Z, T1.W, literal.x, PV.W,
+; CM-NEXT: SETGT * T1.W, literal.y, |T0.X|,
+; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; CM-NEXT: CNDE T0.Z, PV.W, T0.X, PV.Z,
+; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
+; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; CM-NEXT: ADD * T0.X, PV.Z, -PV.W,
; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call float @llvm.log10.f32(float %in)
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: s_mov_b32 s8, 0x3284fbcf
+; SI-SDAG-NEXT: s_mov_b32 s9, 0x7f800000
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; SI-SDAG-NEXT: v_log_f32_e32 v3, v1
-; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s3, 0x3e9a209a
; SI-SDAG-NEXT: s_mov_b32 s4, s0
-; SI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v2
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0
; SI-SDAG-NEXT: s_mov_b32 s5, s1
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2
+; SI-SDAG-NEXT: v_fma_f32 v4, v2, s3, -v3
+; SI-SDAG-NEXT: v_fma_f32 v4, v2, s8, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s9
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v4, v0, s3, -v2
+; SI-SDAG-NEXT: v_fma_f32 v4, v0, s8, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s9
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_log10_v2f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s4, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf
+; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, s2
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, s3
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_log_f32_e32 v4, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
+; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2
+; SI-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4
+; SI-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5
+; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
+; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1
+; SI-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
+; SI-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_log10_v2f32:
; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: s_mov_b32 s2, 0x7f800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; VI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; VI-SDAG-NEXT: v_log_f32_e32 v2, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s2
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_log10_v2f32:
; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x800000
+; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s0, 0x800000
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, s2
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0
-; VI-GISEL-NEXT: v_log_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_log_f32_e32 v4, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s6
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v0, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
+; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v4, v1, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v0, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v5, v2, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v7, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_log10_v2f32:
; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: s_mov_b32 s2, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s3, 0x3284fbcf
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1
-; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4
-; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v1
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3
+; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3
+; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x7f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v3
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s2, -v4
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s3, v5
+; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v3|, s7
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s2, -v3
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s3, v5
+; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_log10_v2f32:
; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf
+; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s3
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v0, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v3
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s0, -v4
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v2, s2, v5
+; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
+; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, s2, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX900-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-LABEL: s_log10_v2f32:
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s5
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s4
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s5
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s3, v2 :: v_dual_mul_f32 v3, s2, v3
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s3, v0 :: v_dual_mul_f32 v1, s2, v1
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v2, v3, v1 :: v_dual_mov_b32 v3, 0
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3e9a209b, v0 :: v_dual_mul_f32 v0, 0x3e9a209b, v2
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3e9a209a, v0 :: v_dual_mul_f32 v3, 0x3e9a209a, v1
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v4, 0x3e9a209a, v0, -v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3
+; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v4, 0x3284fbcf, v0 :: v_dual_fmac_f32 v5, 0x3284fbcf, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s4
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT: v_dual_cndmask_b32 v2, v1, v3 :: v_dual_mov_b32 v3, 0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5
; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1]
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s5
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
-; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3e9a209b, v0 :: v_dual_mul_f32 v1, 0x3e9a209b, v1
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3e9a209a, v0 :: v_dual_mul_f32 v3, 0x3e9a209a, v1
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v4, v0, 0x3e9a209a, -v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v4, 0x3284fbcf, v0 :: v_dual_fmac_f32 v5, 0x3284fbcf, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s4
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, v1, v3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log10_v2f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 21, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT: ALU 41, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XY, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: MUL_IEEE T2.W, KC0[3].X, PV.W,
; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Z, KC0[2].W, PS,
-; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.x,
+; R600-NEXT: MUL_IEEE T3.W, KC0[2].W, PS,
; R600-NEXT: LOG_IEEE * T0.X, PV.W,
-; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
-; R600-NEXT: CNDE T1.Z, T1.W, 0.0, literal.x,
-; R600-NEXT: ADD T0.W, PS, -PV.W,
-; R600-NEXT: LOG_IEEE * T0.X, PV.Z,
-; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
-; R600-NEXT: ADD * T0.W, PS, -PV.Z,
-; R600-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
-; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
-; R600-NEXT: 1050288283(3.010300e-01), 2(2.802597e-45)
+; R600-NEXT: AND_INT T2.W, PS, literal.x,
+; R600-NEXT: LOG_IEEE * T0.Y, PV.W,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD T3.W, T0.X, -PV.W,
+; R600-NEXT: AND_INT * T4.W, PS, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD T5.W, T0.Y, -PS,
+; R600-NEXT: MUL_IEEE * T6.W, PV.W, literal.x,
+; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T6.W, T2.W, literal.x, PS,
+; R600-NEXT: MUL_IEEE * T7.W, PV.W, literal.x,
+; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T7.W, T4.W, literal.x, PS,
+; R600-NEXT: MULADD_IEEE * T3.W, T3.W, literal.y, PV.W, BS:VEC_021/SCL_122
+; R600-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01)
+; R600-NEXT: MULADD_IEEE T0.Z, T2.W, literal.x, PS,
+; R600-NEXT: SETGT T2.W, literal.y, |T0.X|,
+; R600-NEXT: MULADD_IEEE * T3.W, T5.W, literal.x, PV.W, BS:VEC_021/SCL_122
+; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; R600-NEXT: MULADD_IEEE T1.Y, T4.W, literal.x, PS,
+; R600-NEXT: SETGT T1.Z, literal.y, |T0.Y|,
+; R600-NEXT: CNDE T2.W, PV.W, T0.X, PV.Z,
+; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z,
+; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; R600-NEXT: ADD T2.Y, PV.W, -PS,
+; R600-NEXT: CNDE T0.W, PV.Z, T0.Y, PV.Y,
+; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.x,
+; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; R600-NEXT: ADD T2.X, PV.W, -PS,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_log10_v2f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 27, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: ALU 47, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x,
; CM-NEXT: SETGT * T1.W, literal.y, KC0[2].W,
; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
-; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x,
-; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].X, PV.Z,
-; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01)
-; CM-NEXT: LOG_IEEE T0.X, T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x,
-; CM-NEXT: ADD T0.Z, PV.X, -T1.Z,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[2].W, T0.Y,
-; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
-; CM-NEXT: LOG_IEEE T0.X, T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE T0.Y, T0.Z, literal.x,
-; CM-NEXT: ADD * T0.W, PV.X, -T1.Y,
-; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x,
-; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00)
-; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: CNDE T1.Z, PV.W, 1.0, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].X, PV.Z,
+; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
+; CM-NEXT: LOG_IEEE T0.X, T2.W,
+; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: AND_INT T0.Z, PV.X, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[2].W, T1.Z,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Y, T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: ADD T1.Z, T0.X, -T0.Z,
+; CM-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: ADD T2.Z, T0.Y, -PV.W,
+; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x,
+; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T3.Z, T0.Z, literal.x, PV.W,
+; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.x,
+; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T4.Z, T2.W, literal.x, PV.W,
+; CM-NEXT: MULADD_IEEE * T3.W, T1.Z, literal.y, PV.Z,
+; CM-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01)
+; CM-NEXT: MULADD_IEEE T1.Y, T0.Z, literal.x, PV.W,
+; CM-NEXT: SETGT T0.Z, literal.y, |T0.X|,
+; CM-NEXT: MULADD_IEEE * T3.W, T2.Z, literal.x, PV.Z, BS:VEC_120/SCL_212
+; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; CM-NEXT: MULADD_IEEE T1.X, T2.W, literal.x, PV.W,
+; CM-NEXT: SETGT T2.Y, literal.y, |T0.Y|,
+; CM-NEXT: CNDE T0.Z, PV.Z, T0.X, PV.Y,
+; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; CM-NEXT: ADD T1.Y, PV.Z, -PV.W,
+; CM-NEXT: CNDE T0.Z, PV.Y, T0.Y, PV.X,
+; CM-NEXT: CNDE * T0.W, T1.W, 0.0, literal.x,
+; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; CM-NEXT: ADD * T1.X, PV.Z, -PV.W,
+; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <2 x float> @llvm.log10.v2f32(<2 x float> %in)
store <2 x float> %result, ptr addrspace(1) %out
define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; SI-SDAG-LABEL: s_log10_v3f32:
; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
+; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4
-; SI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT: s_mov_b32 s11, 0x3284fbcf
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, s9, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s9, 0x3e9a209a
+; SI-SDAG-NEXT: s_mov_b32 s12, 0x7f800000
+; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v1
+; SI-SDAG-NEXT: v_fma_f32 v4, v1, s9, -v3
+; SI-SDAG-NEXT: v_fma_f32 v4, v1, s11, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s12
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1]
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, s8, v3
+; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v5
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5
+; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s10, v0
+; SI-SDAG-NEXT: v_fma_f32 v6, v3, s9, -v5
; SI-SDAG-NEXT: v_log_f32_e32 v2, v0
-; SI-SDAG-NEXT: v_log_f32_e32 v5, v5
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
-; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6
-; SI-SDAG-NEXT: s_mov_b32 s2, -1
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v5, v4
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v3
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
-; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-SDAG-NEXT: v_fma_f32 v6, v3, s11, v6
+; SI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s12
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[2:3]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2
+; SI-SDAG-NEXT: v_fma_f32 v5, v2, s9, -v3
+; SI-SDAG-NEXT: v_fma_f32 v5, v2, s11, v5
+; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s12
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
+; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_log10_v3f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s2, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, s5
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a
+; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4
+; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
+; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v4, s9, v4
; SI-GISEL-NEXT: v_log_f32_e32 v4, v4
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
+; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v4
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1
+; SI-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8
; SI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
+; SI-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9
+; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
+; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3284fbcf
+; SI-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; SI-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_log10_v3f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: s_mov_b32 s8, 0x7f800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4
-; VI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24
+; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s8
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, s5, v3
+; VI-SDAG-NEXT: v_log_f32_e32 v3, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5
+; VI-SDAG-NEXT: v_and_b32_e32 v5, 0xfffff000, v3
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
-; VI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; VI-SDAG-NEXT: v_log_f32_e32 v6, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v3
-; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v5
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v6, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0
+; VI-SDAG-NEXT: v_sub_f32_e32 v6, v3, v5
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5
+; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s8
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3]
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v5, v0, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT: v_add_f32_e32 v5, v6, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s8
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s6
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, s7
; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_log10_v3f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1]
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, s5, v3
+; VI-GISEL-NEXT: v_log_f32_e32 v3, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6
; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
-; VI-GISEL-NEXT: v_log_f32_e32 v4, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7
; VI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v6, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s8
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s9
; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_log10_v3f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3284fbcf
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x7f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s6, -v4
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, v5
+; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s10
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4
; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
-; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6
-; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v1
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v3
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v4
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v7, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX900-SDAG-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v4
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s6, -v6
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s7, v7
+; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v6, v7
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s10
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v4, v6, s[2:3]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s6, -v4
+; GFX900-SDAG-NEXT: v_fma_f32 v6, v0, s7, v6
+; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s10
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4
+; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_log10_v3f32:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s5
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a
+; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v4
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
+; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v2, s[0:1]
; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v4
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v4
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, v3, -v8
; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v5
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v4, s2, v9
+; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v8, s[2:3]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v4
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v6, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
+; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9]
; GFX900-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-LABEL: s_log10_v3f32:
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s4
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s2
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s7
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v3, s6, v3 :: v_dual_mul_f32 v4, s5, v4
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s7
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, s4, v5
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 0x411a209b, s3
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s6, v0 :: v_dual_mul_f32 v1, s5, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, s4, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v4, v1 :: v_dual_mov_b32 v4, 0
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_fma_f32 v6, 0x3e9a209a, v0, -v3
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v3, v5, v2 :: v_dual_mul_f32 v2, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v1, 0x3e9a209b, v1 :: v_dual_mul_f32 v0, 0x3e9a209b, v3
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v2
+; GFX1100-SDAG-NEXT: v_fma_f32 v7, 0x3e9a209a, v1, -v4
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v6, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v7, 0x3284fbcf, v1
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v8, 0x3284fbcf, v2 :: v_dual_cndmask_b32 v1, v1, v4
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_mov_b32 v4, 0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v1, v10
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc_lo
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v0, v9
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v3, v6
; GFX1100-SDAG-NEXT: global_store_b96 v4, v[0:2], s[0:1]
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x411a209b, s3
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s2
; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_sub_f32 v1, v1, v4
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_mov_b32 v3, 0
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v1, 0x3e9a209b, v1 :: v_dual_mul_f32 v0, 0x3e9a209b, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_fma_f32 v6, v0, 0x3e9a209a, -v3
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v2
+; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3e9a209a, v1, -v4
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v6, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v7, 0x3284fbcf, v1
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v4, v4, v7
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v8, 0x3284fbcf, v2 :: v_dual_mov_b32 v3, 0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v10
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_sub_f32 v0, v0, v9
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1]
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log10_v3f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 62, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z,
; R600-NEXT: MUL_IEEE T2.W, KC0[3].Z, PV.W,
; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Y, KC0[3].Y, PS,
-; R600-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x,
-; R600-NEXT: SETGT T0.W, literal.y, KC0[3].W,
+; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Y, PS,
+; R600-NEXT: SETGT T3.W, literal.x, KC0[3].W,
; R600-NEXT: LOG_IEEE * T0.X, PV.W,
-; R600-NEXT: 1107296256(3.200000e+01), 8388608(1.175494e-38)
-; R600-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x,
-; R600-NEXT: CNDE T1.Z, PV.W, 1.0, literal.y,
-; R600-NEXT: ADD T1.W, PS, -PV.Z,
-; R600-NEXT: LOG_IEEE * T0.X, PV.Y,
-; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09)
-; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
-; R600-NEXT: MUL_IEEE T1.W, KC0[3].W, PV.Z,
-; R600-NEXT: ADD * T2.W, PS, -PV.Y,
-; R600-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.X, PS, literal.x,
-; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.y,
-; R600-NEXT: LOG_IEEE * T0.Z, PV.W,
-; R600-NEXT: 1050288283(3.010300e-01), 1107296256(3.200000e+01)
-; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: ADD * T0.W, PS, -PV.W,
-; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T2.X, PV.W, literal.x,
-; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; R600-NEXT: 1050288283(3.010300e-01), 8(1.121039e-44)
+; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
+; R600-NEXT: AND_INT T1.Z, PS, literal.x,
+; R600-NEXT: CNDE T2.W, PV.W, 1.0, literal.y,
+; R600-NEXT: LOG_IEEE * T0.Y, PV.Z,
+; R600-NEXT: -4096(nan), 1333788672(4.294967e+09)
+; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W,
+; R600-NEXT: ADD T2.W, T0.X, -PV.Z,
+; R600-NEXT: AND_INT * T4.W, PS, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD T2.Z, T0.Y, -PS,
+; R600-NEXT: MUL_IEEE T5.W, PV.W, literal.x,
+; R600-NEXT: LOG_IEEE * T0.Z, PV.Z,
+; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T3.Z, T1.Z, literal.x, PV.W,
+; R600-NEXT: AND_INT T5.W, PS, literal.y,
+; R600-NEXT: MUL_IEEE * T6.W, PV.Z, literal.x,
+; R600-NEXT: 916096251(4.605039e-06), -4096(nan)
+; R600-NEXT: MULADD_IEEE T4.Z, T4.W, literal.x, PS,
+; R600-NEXT: ADD T6.W, T0.Z, -PV.W,
+; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.y, PV.Z, BS:VEC_021/SCL_122
+; R600-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01)
+; R600-NEXT: MULADD_IEEE T1.Y, T1.Z, literal.x, PS,
+; R600-NEXT: SETGT T1.Z, literal.y, |T0.X|,
+; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.z,
+; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Z, BS:VEC_021/SCL_122
+; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T1.X, T4.W, literal.x, PS,
+; R600-NEXT: SETGT T2.Y, literal.y, |T0.Y|,
+; R600-NEXT: MULADD_IEEE T2.Z, T5.W, literal.z, PV.W, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE T2.W, PV.Z, T0.X, PV.Y,
+; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.w,
+; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; R600-NEXT: 916096251(4.605039e-06), 1092231323(9.632960e+00)
+; R600-NEXT: ADD T1.Y, PV.W, -PS,
+; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.x, PV.Z,
+; R600-NEXT: CNDE T0.W, PV.Y, T0.Y, PV.X,
+; R600-NEXT: CNDE * T1.W, T1.W, 0.0, literal.y,
+; R600-NEXT: 1050288128(3.010254e-01), 1092231323(9.632960e+00)
+; R600-NEXT: ADD T1.X, PV.W, -PS,
+; R600-NEXT: MULADD_IEEE T0.W, T5.W, literal.x, PV.Z,
+; R600-NEXT: SETGT * T1.W, literal.y, |T0.Z|,
+; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: CNDE T0.W, PS, T0.Z, PV.W,
+; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.y,
+; R600-NEXT: 2(2.802597e-45), 1092231323(9.632960e+00)
+; R600-NEXT: ADD T2.X, PV.W, -PS,
+; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
+; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00)
; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_log10_v3f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T1.X
+; CM-NEXT: ALU 68, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T2.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].W,
+; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y,
; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
+; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x,
+; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
+; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x,
-; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z,
-; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Y,
+; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Z,
; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
-; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x,
-; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.y,
-; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.x,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, PV.Y,
-; CM-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09)
-; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Y, T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: CNDE T1.X, T1.W, 1.0, literal.x,
-; CM-NEXT: CNDE T2.Y, T0.Z, 0.0, literal.y,
-; CM-NEXT: ADD T0.Z, PV.Y, -T1.Z,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Z, T1.Y,
-; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01)
-; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Y, T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE T2.X, T0.Z, literal.x,
-; CM-NEXT: ADD T0.Y, PV.Y, -T2.Y,
-; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T1.X,
-; CM-NEXT: 1050288283(3.010300e-01), 8(1.121039e-44)
-; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W, T0.W,
-; CM-NEXT: LSHR T1.X, T0.Z, literal.x,
-; CM-NEXT: MUL_IEEE T0.Y, T0.Y, literal.y,
-; CM-NEXT: ADD * T0.W, PV.W, -T0.X,
-; CM-NEXT: 2(2.802597e-45), 1050288283(3.010300e-01)
-; CM-NEXT: MUL_IEEE * T0.X, PV.W, literal.x,
-; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00)
-; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; CM-NEXT: LOG_IEEE T0.X, T2.W,
+; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: CNDE T1.Y, T1.Z, 1.0, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.X, literal.y,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].W, T0.Y,
+; CM-NEXT: 1333788672(4.294967e+09), -4096(nan)
+; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Y, T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: ADD T2.Y, T0.X, -T0.Z,
+; CM-NEXT: AND_INT T2.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.Y,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: LOG_IEEE T1.X, T2.W,
+; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W,
+; CM-NEXT: ADD T1.Y, T0.Y, -T2.Z,
+; CM-NEXT: AND_INT T3.Z, PV.X, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, T2.Y, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: -4096(nan), 916096251(4.605039e-06)
+; CM-NEXT: MULADD_IEEE T3.Y, T0.Z, literal.x, PV.W,
+; CM-NEXT: ADD T4.Z, T1.X, -PV.Z,
+; CM-NEXT: MUL_IEEE * T2.W, PV.Y, literal.x,
+; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T4.Y, T2.Z, literal.x, PV.W,
+; CM-NEXT: MUL_IEEE T5.Z, PV.Z, literal.x,
+; CM-NEXT: MULADD_IEEE * T2.W, T2.Y, literal.y, PV.Y,
+; CM-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01)
+; CM-NEXT: MULADD_IEEE T2.Y, T0.Z, literal.x, PV.W,
+; CM-NEXT: MULADD_IEEE T0.Z, T3.Z, literal.y, PV.Z, BS:VEC_120/SCL_212
+; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.x, PV.Y,
+; CM-NEXT: 1050288128(3.010254e-01), 916096251(4.605039e-06)
+; CM-NEXT: SETGT T2.X, literal.x, |T0.X|,
+; CM-NEXT: MULADD_IEEE T1.Y, T2.Z, literal.y, PV.W,
+; CM-NEXT: SETGT T2.Z, literal.x, |T0.Y|,
+; CM-NEXT: MULADD_IEEE * T2.W, T4.Z, literal.y, PV.Z, BS:VEC_120/SCL_212
+; CM-NEXT: 2139095040(INF), 1050288128(3.010254e-01)
+; CM-NEXT: MULADD_IEEE T3.X, T3.Z, literal.x, PV.W,
+; CM-NEXT: SETGT T3.Y, literal.y, |T1.X|,
+; CM-NEXT: CNDE T0.Z, PV.Z, T0.Y, PV.Y,
+; CM-NEXT: CNDE * T1.W, T1.W, 0.0, literal.z,
+; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; CM-NEXT: ADD T4.X, PV.Z, -PV.W,
+; CM-NEXT: CNDE T0.Y, PV.Y, T1.X, PV.X,
+; CM-NEXT: CNDE T0.Z, T1.Z, 0.0, literal.x,
+; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; CM-NEXT: 1092231323(9.632960e+00), 8(1.121039e-44)
+; CM-NEXT: LSHR T1.X, PV.W, literal.x,
+; CM-NEXT: ADD T0.Y, PV.Y, -PV.Z,
+; CM-NEXT: CNDE T0.Z, T2.X, T0.X, T2.Y,
+; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.y,
+; CM-NEXT: 2(2.802597e-45), 1092231323(9.632960e+00)
+; CM-NEXT: ADD * T0.X, PV.Z, -PV.W,
+; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <3 x float> @llvm.log10.v3f32(<3 x float> %in)
store <3 x float> %result, ptr addrspace(1) %out
define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; SI-SDAG-LABEL: s_log10_v4f32:
; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x4f800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: s_mov_b32 s12, 0x3284fbcf
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
-; SI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6
-; SI-SDAG-NEXT: v_log_f32_e32 v6, v6
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; SI-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v2
-; SI-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s11, 0x3e9a209a
+; SI-SDAG-NEXT: s_mov_b32 s13, 0x7f800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2
+; SI-SDAG-NEXT: v_fma_f32 v4, v2, s11, -v3
+; SI-SDAG-NEXT: v_fma_f32 v4, v2, s12, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s13
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3
+; SI-SDAG-NEXT: v_log_f32_e32 v4, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v4
+; SI-SDAG-NEXT: v_fma_f32 v6, v4, s11, -v2
+; SI-SDAG-NEXT: v_fma_f32 v6, v4, s12, v6
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v6, s9, v6
; SI-SDAG-NEXT: v_log_f32_e32 v6, v6
-; SI-SDAG-NEXT: v_log_f32_e32 v4, v1
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
-; SI-SDAG-NEXT: s_mov_b32 s2, -1
-; SI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s13
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v6
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0
+; SI-SDAG-NEXT: v_fma_f32 v7, v6, s11, -v4
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_fma_f32 v7, v6, s12, v7
+; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s13
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3]
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4
+; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v6, v0, s11, -v4
+; SI-SDAG-NEXT: v_fma_f32 v6, v0, s12, v6
+; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s13
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4
+; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_log10_v4f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s2, 0x800000
+; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x800000
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000
+; SI-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, s5
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, s8
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5
-; SI-GISEL-NEXT: v_log_f32_e32 v5, v5
+; SI-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a
+; SI-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
-; SI-GISEL-NEXT: v_log_f32_e32 v5, v5
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1
+; SI-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3e9a209a
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
+; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v1
+; SI-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
+; SI-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9
+; SI-GISEL-NEXT: v_log_f32_e32 v9, v9
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
+; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v9
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x3284fbcf
+; SI-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
; SI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; SI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; SI-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10
+; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
+; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3
+; SI-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
+; SI-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_log10_v4f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
+; VI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, s7, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6
-; VI-SDAG-NEXT: v_log_f32_e32 v4, v4
-; VI-SDAG-NEXT: v_log_f32_e32 v6, v6
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s7, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s7
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3
+; VI-SDAG-NEXT: v_log_f32_e32 v4, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3
+; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v6, v4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v3
-; VI-SDAG-NEXT: v_sub_f32_e32 v4, v6, v5
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v2, vcc
-; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc
; VI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
; VI-SDAG-NEXT: v_log_f32_e32 v6, v6
-; VI-SDAG-NEXT: v_log_f32_e32 v7, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3
-; VI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v7, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s7
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3]
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v6
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v7, v6, v4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3e9a2000, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v7, v9, v7
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s7
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v6, v0, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s7
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, s8
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, s9
; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_log10_v4f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; VI-GISEL-NEXT: s_mov_b32 s2, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: s_mov_b32 s0, 0x7f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5
-; VI-GISEL-NEXT: v_log_f32_e32 v5, v5
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x800000
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v7, v1, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v7
+; VI-GISEL-NEXT: v_log_f32_e32 v7, v7
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v5, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v7
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v8, v7, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v10, 0x369a84fb, v6
; VI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
-; VI-GISEL-NEXT: v_log_f32_e32 v5, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v9, v10, v9
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a2000, v8
; VI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v6, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s8
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, s9
; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_log10_v4f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x4f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x3284fbcf
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v3
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6
-; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s7, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s11, 0x7f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x411a209b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s7, -v3
+; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s10, v5
+; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s11
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s6, v3
+; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v3
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v2
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1
-; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6
-; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v1
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX900-SDAG-NEXT: global_store_dwordx4 v7, v[0:3], s[2:3]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v5
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s7, -v2
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s10, v7
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v7
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s5, v7
+; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v7
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s11
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[2:3]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v7
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s7, -v5
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s10, v8
+; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v8
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, s11
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[2:3]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s7, -v5
+; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s10, v7
+; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v7
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s11
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5
+; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_log10_v4f32:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3284fbcf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, s5
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s5, v5
-; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0x3e9a209a
+; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x7f800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s0, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v5, v0, s2, v5
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, s2, v9
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v5, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5
+; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v9, s6, v9
+; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v9
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v9
; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
-; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v6
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v6, v10
+; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v6, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3
; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9]
; GFX900-GISEL-NEXT: s_endpgm
;
; GFX1100-SDAG-LABEL: s_log10_v4f32:
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s5
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s4
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s2
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 1.0, 0x4f800000, s8
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v7, 1.0, 0x4f800000, s9
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v4, s7, v4 :: v_dual_mul_f32 v5, s6, v5
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v6, s5, v6 :: v_dual_mul_f32 v7, s4, v7
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s7, v0 :: v_dual_mul_f32 v1, s6, v1
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s5, v2 :: v_dual_mul_f32 v3, s4, v3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s3
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v6, v6
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v7, v7
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s8
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s9
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v0, v4, v0 :: v_dual_sub_f32 v1, v5, v1
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v14, 0, 0x411a209b, s8
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v15, 0, 0x411a209b, s9
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_mul_f32 v6, 0x3e9a209a, v1
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_dual_sub_f32 v4, v6, v2 :: v_dual_sub_f32 v5, v7, v3
-; GFX1100-SDAG-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3e9a209b, v1 :: v_dual_mul_f32 v1, 0x3e9a209b, v4
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v5
-; GFX1100-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1]
+; GFX1100-SDAG-NEXT: v_dual_mul_f32 v7, 0x3e9a209a, v2 :: v_dual_mul_f32 v8, 0x3e9a209a, v3
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v10, 0x3e9a209a, v0, -v5
+; GFX1100-SDAG-NEXT: v_fma_f32 v11, 0x3e9a209a, v1, -v6
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7
+; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8
+; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v10, 0x3284fbcf, v0 :: v_dual_fmac_f32 v11, 0x3284fbcf, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v5, v2, v7, vcc_lo
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
+; GFX1100-SDAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_sub_f32 v2, v1, v9
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v6, v3, v8, vcc_lo
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v3, v0, v4
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15
+; GFX1100-SDAG-NEXT: global_store_b128 v7, v[0:3], s[0:1]
; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-SDAG-NEXT: s_endpgm
;
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1
; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_mul_f32 v3, s7, v3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s3
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s3
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 0x411a209b, s8
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 0x411a209b, s9
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_mul_f32 v6, 0x3e9a209a, v1
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v2, v2, v6 :: v_dual_sub_f32 v3, v3, v7
-; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, 0x3e9a209b, v0 :: v_dual_mul_f32 v1, 0x3e9a209b, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3e9a209b, v2 :: v_dual_mul_f32 v3, 0x3e9a209b, v3
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v7, 0x3e9a209a, v2 :: v_dual_mul_f32 v8, 0x3e9a209a, v3
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v10, v0, 0x3e9a209a, -v5
+; GFX1100-GISEL-NEXT: v_fma_f32 v11, 0x3e9a209a, v1, -v6
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7
+; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v10, 0x3284fbcf, v0 :: v_dual_fmac_f32 v11, 0x3284fbcf, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v9
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
+; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v3, v3, v8 :: v_dual_sub_f32 v2, v2, v14
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15
; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log10_v4f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 38, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; R600-NEXT: ALU 76, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: SETGT T0.W, literal.x, KC0[4].X,
-; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].W,
+; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z,
+; R600-NEXT: SETGT * T1.W, literal.x, KC0[4].X,
; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Z, KC0[4].X, PV.W,
-; R600-NEXT: SETGT T2.W, literal.x, KC0[3].Z,
+; R600-NEXT: MUL_IEEE T0.Z, KC0[3].Z, PV.W,
+; R600-NEXT: SETGT T2.W, literal.x, KC0[3].W,
; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.y,
; R600-NEXT: 8388608(1.175494e-38), 1333788672(4.294967e+09)
-; R600-NEXT: MUL_IEEE T0.X, KC0[3].W, PS,
-; R600-NEXT: SETGT T0.Y, literal.x, KC0[3].Y,
-; R600-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y,
-; R600-NEXT: CNDE T0.W, PV.W, 1.0, literal.z,
-; R600-NEXT: LOG_IEEE * T0.Z, PV.Z,
-; R600-NEXT: 8388608(1.175494e-38), 1107296256(3.200000e+01)
+; R600-NEXT: MUL_IEEE T1.Z, KC0[4].X, PS,
+; R600-NEXT: CNDE T3.W, PV.W, 1.0, literal.x,
+; R600-NEXT: LOG_IEEE * T0.X, PV.Z,
; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T1.X, KC0[3].Z, PV.W,
-; R600-NEXT: ADD T1.Y, PS, -PV.Z,
-; R600-NEXT: CNDE T0.Z, T1.W, 0.0, literal.x,
-; R600-NEXT: CNDE T0.W, PV.Y, 1.0, literal.y,
-; R600-NEXT: LOG_IEEE * T0.X, PV.X,
-; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09)
-; R600-NEXT: MUL_IEEE T2.X, KC0[3].Y, PV.W,
-; R600-NEXT: CNDE T2.Y, T2.W, 0.0, literal.x,
-; R600-NEXT: ADD T0.Z, PS, -PV.Z,
-; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y,
-; R600-NEXT: LOG_IEEE * T0.X, PV.X,
-; R600-NEXT: 1107296256(3.200000e+01), 1050288283(3.010300e-01)
-; R600-NEXT: CNDE T1.Y, T0.Y, 0.0, literal.x,
-; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.y,
-; R600-NEXT: ADD T1.W, PS, -PV.Y,
-; R600-NEXT: LOG_IEEE * T0.X, PV.X,
-; R600-NEXT: 1107296256(3.200000e+01), 1050288283(3.010300e-01)
-; R600-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
-; R600-NEXT: ADD * T1.W, PS, -PV.Y,
-; R600-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
-; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
-; R600-NEXT: 1050288283(3.010300e-01), 2(2.802597e-45)
+; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, PV.W,
+; R600-NEXT: SETGT T3.W, literal.x, KC0[3].Y,
+; R600-NEXT: LOG_IEEE * T0.Y, PV.Z,
+; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
+; R600-NEXT: AND_INT T1.Y, PS, literal.x,
+; R600-NEXT: AND_INT T1.Z, T0.X, literal.x,
+; R600-NEXT: CNDE T4.W, PV.W, 1.0, literal.y,
+; R600-NEXT: LOG_IEEE * T0.Z, PV.Z,
+; R600-NEXT: -4096(nan), 1333788672(4.294967e+09)
+; R600-NEXT: MUL_IEEE T2.Y, KC0[3].Y, PV.W,
+; R600-NEXT: ADD T2.Z, T0.X, -PV.Z,
+; R600-NEXT: AND_INT T4.W, PS, literal.x,
+; R600-NEXT: ADD * T5.W, T0.Y, -PV.Y,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T3.Y, PS, literal.x,
+; R600-NEXT: ADD T3.Z, T0.Z, -PV.W,
+; R600-NEXT: MUL_IEEE T6.W, PV.Z, literal.x,
+; R600-NEXT: LOG_IEEE * T1.X, PV.Y,
+; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.x, PV.W,
+; R600-NEXT: MUL_IEEE T4.Z, PV.Z, literal.x,
+; R600-NEXT: AND_INT T6.W, PS, literal.y,
+; R600-NEXT: MULADD_IEEE * T7.W, T1.Y, literal.x, PV.Y, BS:VEC_021/SCL_122
+; R600-NEXT: 916096251(4.605039e-06), -4096(nan)
+; R600-NEXT: MULADD_IEEE T3.Y, T5.W, literal.x, PS,
+; R600-NEXT: ADD T5.Z, T1.X, -PV.W,
+; R600-NEXT: MULADD_IEEE T5.W, T4.W, literal.y, PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT: MULADD_IEEE * T7.W, T2.Z, literal.x, PV.Y, BS:VEC_021/SCL_122
+; R600-NEXT: 1050288128(3.010254e-01), 916096251(4.605039e-06)
+; R600-NEXT: MULADD_IEEE T2.X, T1.Z, literal.x, PS,
+; R600-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.x, PV.W, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE T1.Z, PV.Z, literal.y,
+; R600-NEXT: MULADD_IEEE T5.W, T1.Y, literal.x, PV.Y,
+; R600-NEXT: SETGT * T7.W, literal.z, |T0.Y|,
+; R600-NEXT: 1050288128(3.010254e-01), 916096251(4.605039e-06)
+; R600-NEXT: 2139095040(INF), 0(0.000000e+00)
+; R600-NEXT: CNDE T3.X, PS, T0.Y, PV.W,
+; R600-NEXT: CNDE T0.Y, T1.W, 0.0, literal.x,
+; R600-NEXT: MULADD_IEEE T1.Z, T6.W, literal.y, PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT: MULADD_IEEE T1.W, T4.W, literal.z, PV.Y, BS:VEC_201
+; R600-NEXT: SETGT * T4.W, literal.w, |T0.Z|,
+; R600-NEXT: 1092231323(9.632960e+00), 916096251(4.605039e-06)
+; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; R600-NEXT: SETGT T4.X, literal.x, |T0.X|,
+; R600-NEXT: CNDE T1.Y, PS, T0.Z, PV.W,
+; R600-NEXT: CNDE T0.Z, T2.W, 0.0, literal.y,
+; R600-NEXT: MULADD_IEEE T1.W, T5.Z, literal.z, PV.Z,
+; R600-NEXT: ADD * T2.W, PV.X, -PV.Y,
+; R600-NEXT: 2139095040(INF), 1092231323(9.632960e+00)
+; R600-NEXT: 1050288128(3.010254e-01), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T3.X, T6.W, literal.x, PV.W,
+; R600-NEXT: SETGT T0.Y, literal.y, |T1.X|,
+; R600-NEXT: ADD T2.Z, PV.Y, -PV.Z,
+; R600-NEXT: CNDE T1.W, PV.X, T0.X, T2.X, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.z,
+; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; R600-NEXT: ADD T2.Y, PV.W, -PS,
+; R600-NEXT: CNDE T0.W, PV.Y, T1.X, PV.X,
+; R600-NEXT: CNDE * T1.W, T3.W, 0.0, literal.x,
+; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; R600-NEXT: ADD T2.X, PV.W, -PS,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_log10_v4f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 50, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT: ALU 84, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: SETGT T0.Z, literal.x, KC0[4].X,
; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].Y,
; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x,
-; CM-NEXT: CNDE T1.Z, PV.Z, 1.0, literal.x,
-; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
+; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W,
+; CM-NEXT: SETGT * T1.W, literal.y, KC0[4].X,
; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
; CM-NEXT: CNDE T0.X, PV.W, 1.0, literal.x,
-; CM-NEXT: SETGT T1.Y, literal.y, KC0[3].Z,
-; CM-NEXT: CNDE T0.Z, T0.Z, 0.0, literal.z,
-; CM-NEXT: MUL_IEEE * T2.W, KC0[4].X, PV.Z,
+; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.x,
+; CM-NEXT: SETGT T1.Z, literal.y, KC0[3].Z,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Y, PV.Y,
; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38)
-; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00)
+; CM-NEXT: LOG_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Y, T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: CNDE T1.X, T1.Z, 1.0, literal.x,
+; CM-NEXT: AND_INT T2.Y, PV.Y, literal.y,
+; CM-NEXT: MUL_IEEE T2.Z, KC0[3].W, T1.Y,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[4].X, T0.X,
+; CM-NEXT: 1333788672(4.294967e+09), -4096(nan)
+; CM-NEXT: LOG_IEEE T0.X, T2.W,
+; CM-NEXT: LOG_IEEE T0.Y (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: LOG_IEEE T1.X (MASKED), T2.Z,
+; CM-NEXT: LOG_IEEE T1.Y, T2.Z,
+; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.Z,
+; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.Z,
+; CM-NEXT: ADD T2.X, T0.Y, -T2.Y,
+; CM-NEXT: AND_INT T3.Y, PV.Y, literal.x,
+; CM-NEXT: AND_INT T2.Z, T0.X, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, KC0[3].Z, T1.X,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
; CM-NEXT: LOG_IEEE T1.X, T2.W,
; CM-NEXT: LOG_IEEE T1.Y (MASKED), T2.W,
; CM-NEXT: LOG_IEEE T1.Z (MASKED), T2.W,
; CM-NEXT: LOG_IEEE * T1.W (MASKED), T2.W,
-; CM-NEXT: ADD T1.X, PV.X, -T0.Z,
-; CM-NEXT: CNDE T2.Y, T1.Y, 1.0, literal.x,
-; CM-NEXT: CNDE T0.Z, T1.W, 0.0, literal.y,
-; CM-NEXT: MUL_IEEE * T1.W, KC0[3].W, T0.X,
-; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01)
-; CM-NEXT: LOG_IEEE T0.X, T1.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: ADD T0.X, PV.X, -T0.Z,
-; CM-NEXT: CNDE T1.Y, T1.Y, 0.0, literal.x,
-; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, T2.Y,
-; CM-NEXT: MUL_IEEE * T1.W, T1.X, literal.y,
-; CM-NEXT: 1107296256(3.200000e+01), 1050288283(3.010300e-01)
-; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.Z,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.Z,
-; CM-NEXT: LOG_IEEE T0.Z, T0.Z,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.Z,
-; CM-NEXT: CNDE T1.X, T0.W, 0.0, literal.x,
-; CM-NEXT: ADD T1.Y, PV.Z, -T1.Y,
-; CM-NEXT: MUL_IEEE T1.Z, T0.X, literal.y,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T0.Y, BS:VEC_021/SCL_122
-; CM-NEXT: 1107296256(3.200000e+01), 1050288283(3.010300e-01)
-; CM-NEXT: LOG_IEEE T0.X, T0.W,
-; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE T1.Y, T1.Y, literal.x,
-; CM-NEXT: ADD * T0.W, PV.X, -T1.X,
-; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE * T1.X, PV.W, literal.x,
-; CM-NEXT: 1050288283(3.010300e-01), 0(0.000000e+00)
+; CM-NEXT: ADD T3.X, T0.X, -T2.Z,
+; CM-NEXT: ADD T4.Y, T1.Y, -T3.Y,
+; CM-NEXT: AND_INT T3.Z, PV.X, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: -4096(nan), 916096251(4.605039e-06)
+; CM-NEXT: MULADD_IEEE T4.X, T2.Y, literal.x, PV.W,
+; CM-NEXT: ADD T5.Y, T1.X, -PV.Z,
+; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, PV.X, literal.x,
+; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T5.X, T2.Z, literal.x, PV.W,
+; CM-NEXT: MULADD_IEEE T6.Y, T3.Y, literal.x, PV.Z,
+; CM-NEXT: MUL_IEEE T4.Z, PV.Y, literal.x,
+; CM-NEXT: MULADD_IEEE * T2.W, T2.X, literal.y, PV.X,
+; CM-NEXT: 916096251(4.605039e-06), 1050288128(3.010254e-01)
+; CM-NEXT: MULADD_IEEE T2.X, T2.Y, literal.x, PV.W,
+; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.Z,
+; CM-NEXT: MULADD_IEEE T4.Z, T4.Y, literal.x, PV.Y, BS:VEC_120/SCL_212
+; CM-NEXT: MULADD_IEEE * T2.W, T3.X, literal.x, PV.X,
+; CM-NEXT: 1050288128(3.010254e-01), 916096251(4.605039e-06)
+; CM-NEXT: MULADD_IEEE T3.X, T2.Z, literal.x, PV.W,
+; CM-NEXT: SETGT T4.Y, literal.y, |T0.X|,
+; CM-NEXT: MULADD_IEEE T2.Z, T3.Y, literal.x, PV.Z,
+; CM-NEXT: SETGT * T2.W, literal.y, |T1.Y|,
+; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF)
+; CM-NEXT: CNDE T4.X, PV.W, T1.Y, PV.Z,
+; CM-NEXT: CNDE T1.Y, PV.Y, T0.X, PV.X,
+; CM-NEXT: CNDE T2.Z, T1.W, 0.0, literal.x,
+; CM-NEXT: MULADD_IEEE * T1.W, T5.Y, literal.y, T2.Y,
+; CM-NEXT: 1092231323(9.632960e+00), 1050288128(3.010254e-01)
+; CM-NEXT: CNDE T0.X, T0.Z, 0.0, literal.x,
+; CM-NEXT: MULADD_IEEE T2.Y, T3.Z, literal.y, PV.W, BS:VEC_120/SCL_212
+; CM-NEXT: SETGT T0.Z, literal.z, |T1.X|,
+; CM-NEXT: ADD * T2.W, PV.Y, -PV.Z,
+; CM-NEXT: 1092231323(9.632960e+00), 1050288128(3.010254e-01)
+; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
+; CM-NEXT: SETGT T3.X, literal.x, |T0.Y|,
+; CM-NEXT: CNDE T1.Y, PV.Z, T1.X, PV.Y,
+; CM-NEXT: ADD T2.Z, T4.X, -PV.X,
+; CM-NEXT: CNDE * T1.W, T1.Z, 0.0, literal.y,
+; CM-NEXT: 2139095040(INF), 1092231323(9.632960e+00)
+; CM-NEXT: ADD T2.Y, PV.Y, -PV.W,
+; CM-NEXT: CNDE T0.Z, PV.X, T0.Y, T2.X,
+; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x,
+; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00)
+; CM-NEXT: ADD * T2.X, PV.Z, -PV.W,
; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <4 x float> @llvm.log10.v4f32(<4 x float> %in)
}
define float @v_log10_f32(float %in) {
-; GFX689-SDAG-LABEL: v_log10_f32:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32:
}
define float @v_log10_fabs_f32(float %in) {
-; GFX689-SDAG-LABEL: v_log10_fabs_f32:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_fabs_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_log10_fabs_f32:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-LABEL: v_log10_fabs_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_fabs_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_fabs_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_fabs_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_fabs_f32:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
+; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_fabs_f32:
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_fabs_f32:
}
define float @v_log10_fneg_fabs_f32(float %in) {
-; GFX689-SDAG-LABEL: v_log10_fneg_fabs_f32:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_fneg_fabs_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_fneg_fabs_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_fneg_fabs_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_fneg_fabs_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_fneg_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_fneg_fabs_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_fneg_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_fneg_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0|
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
+; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_fneg_fabs_f32:
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_fneg_fabs_f32:
}
define float @v_log10_fneg_f32(float %in) {
-; GFX689-SDAG-LABEL: v_log10_fneg_f32:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000
-; GFX689-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_fneg_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_fneg_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_fneg_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_fneg_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_fneg_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_fneg_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_fneg_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_fneg_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_fneg_f32:
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_fneg_f32:
}
define float @v_log10_f32_fast(float %in) {
-; GFX689-SDAG-LABEL: v_log10_f32_fast:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_fast:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_log10_f32_fast:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX689-LABEL: v_log10_f32_fast:
+; GFX689: ; %bb.0:
+; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-NEXT: v_log_f32_e32 v0, v0
+; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX689-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log10_f32_fast:
-; GFX1100-GISEL: ; %bb.0:
-; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_log10_f32_fast:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_fast:
; R600: ; %bb.0:
}
define float @v_log10_f32_ninf(float %in) {
-; GFX689-SDAG-LABEL: v_log10_f32_ninf:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_ninf:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_ninf:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_ninf:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_ninf:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_ninf:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_ninf:
}
define float @v_log10_f32_afn(float %in) {
-; GFX689-SDAG-LABEL: v_log10_f32_afn:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_afn:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_log10_f32_afn:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX689-LABEL: v_log10_f32_afn:
+; GFX689: ; %bb.0:
+; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-NEXT: v_log_f32_e32 v0, v0
+; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX689-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log10_f32_afn:
-; GFX1100-GISEL: ; %bb.0:
-; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_log10_f32_afn:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_afn:
; R600: ; %bb.0:
}
define float @v_log10_f32_afn_dynamic(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log10_f32_afn_dynamic:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_afn_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_log10_f32_afn_dynamic:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX689-LABEL: v_log10_f32_afn_dynamic:
+; GFX689: ; %bb.0:
+; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-NEXT: v_log_f32_e32 v0, v0
+; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX689-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log10_f32_afn_dynamic:
-; GFX1100-GISEL: ; %bb.0:
-; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_log10_f32_afn_dynamic:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_afn_dynamic:
; R600: ; %bb.0:
}
define float @v_fabs_log10_f32_afn(float %in) {
-; GFX689-SDAG-LABEL: v_fabs_log10_f32_afn:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_fabs_log10_f32_afn:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_log_f32_e64 v0, |v0|
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-SDAG-LABEL: v_fabs_log10_f32_afn:
-; GFX1100-SDAG: ; %bb.0:
-; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX689-LABEL: v_fabs_log10_f32_afn:
+; GFX689: ; %bb.0:
+; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-NEXT: v_log_f32_e64 v0, |v0|
+; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX689-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_fabs_log10_f32_afn:
-; GFX1100-GISEL: ; %bb.0:
-; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_log_f32_e64 v0, |v0|
-; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1100-LABEL: v_fabs_log10_f32_afn:
+; GFX1100: ; %bb.0:
+; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: v_log_f32_e64 v0, |v0|
+; GFX1100-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_fabs_log10_f32_afn:
; R600: ; %bb.0:
}
define float @v_log10_f32_daz(float %in) #0 {
-; GFX689-LABEL: v_log10_f32_daz:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_daz:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log10_f32_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_daz:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_daz:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_daz:
; R600: ; %bb.0:
}
define float @v_log10_f32_nnan(float %in) {
-; GFX689-SDAG-LABEL: v_log10_f32_nnan:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_nnan:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_nnan:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_nnan:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_nnan:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_nnan:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_nnan:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_nnan:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_nnan:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_nnan:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_nnan:
}
define float @v_log10_f32_nnan_daz(float %in) #0 {
-; GFX689-LABEL: v_log10_f32_nnan_daz:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_nnan_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_nnan_daz:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log10_f32_nnan_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_nnan_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_nnan_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_nnan_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_nnan_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_nnan_daz:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_nnan_daz:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_nnan_daz:
; R600: ; %bb.0:
}
define float @v_log10_f32_nnan_dynamic(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log10_f32_nnan_dynamic:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_nnan_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_nnan_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_nnan_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_nnan_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_nnan_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_nnan_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_nnan_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_nnan_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_nnan_dynamic:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_nnan_dynamic:
}
define float @v_log10_f32_ninf_daz(float %in) #0 {
-; GFX689-LABEL: v_log10_f32_ninf_daz:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_ninf_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_ninf_daz:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log10_f32_ninf_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_ninf_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_ninf_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_ninf_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_ninf_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_ninf_daz:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_ninf_daz:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_ninf_daz:
; R600: ; %bb.0:
}
define float @v_log10_f32_ninf_dynamic(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log10_f32_ninf_dynamic:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_ninf_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_ninf_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_ninf_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_ninf_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_ninf_dynamic:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_ninf_dynamic:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_log10_f32_ninf_dynamic:
+; R600: ; %bb.0:
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+;
+; CM-LABEL: v_log10_f32_ninf_dynamic:
+; CM: ; %bb.0:
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+ %result = call ninf float @llvm.log10.f32(float %in)
+ ret float %result
+}
+
+define float @v_log10_f32_nnan_ninf(float %in) {
+; SI-SDAG-LABEL: v_log10_f32_nnan_ninf:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_nnan_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_nnan_ninf:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x369a84fb, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_nnan_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-SDAG-LABEL: v_log10_f32_ninf_dynamic:
+; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log10_f32_ninf_dynamic:
+; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; R600-LABEL: v_log10_f32_ninf_dynamic:
+; R600-LABEL: v_log10_f32_nnan_ninf:
; R600: ; %bb.0:
; R600-NEXT: CF_END
; R600-NEXT: PAD
;
-; CM-LABEL: v_log10_f32_ninf_dynamic:
+; CM-LABEL: v_log10_f32_nnan_ninf:
; CM: ; %bb.0:
; CM-NEXT: CF_END
; CM-NEXT: PAD
- %result = call ninf float @llvm.log10.f32(float %in)
+ %result = call nnan ninf float @llvm.log10.f32(float %in)
ret float %result
}
-define float @v_log10_f32_nnan_ninf(float %in) {
-; GFX689-SDAG-LABEL: v_log10_f32_nnan_ninf:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_nnan_ninf:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+define float @v_log10_f32_nnan_ninf_daz(float %in) #0 {
+; SI-SDAG-LABEL: v_log10_f32_nnan_ninf_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v1, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1
+; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0
+; SI-SDAG-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf:
+; SI-GISEL-LABEL: v_log10_f32_nnan_ninf_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1
+; SI-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0
+; SI-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0
+; SI-GISEL-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_log10_f32_nnan_ninf_daz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_log_f32_e32 v0, v0
+; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-NEXT: v_mul_f32_e32 v0, 0x369a84fb, v2
+; VI-NEXT: v_mac_f32_e32 v0, 0x369a84fb, v1
+; VI-NEXT: v_mac_f32_e32 v0, 0x3e9a2000, v2
+; VI-NEXT: v_mac_f32_e32 v0, 0x3e9a2000, v1
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf_daz:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf:
+; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf_daz:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; R600-LABEL: v_log10_f32_nnan_ninf:
-; R600: ; %bb.0:
-; R600-NEXT: CF_END
-; R600-NEXT: PAD
-;
-; CM-LABEL: v_log10_f32_nnan_ninf:
-; CM: ; %bb.0:
-; CM-NEXT: CF_END
-; CM-NEXT: PAD
- %result = call nnan ninf float @llvm.log10.f32(float %in)
- ret float %result
-}
-
-define float @v_log10_f32_nnan_ninf_daz(float %in) #0 {
-; GFX689-LABEL: v_log10_f32_nnan_ninf_daz:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: v_log10_f32_nnan_ninf_daz:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
-;
; R600-LABEL: v_log10_f32_nnan_ninf_daz:
; R600: ; %bb.0:
; R600-NEXT: CF_END
}
define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0
+; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b
+; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0
+; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b
+; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_nnan_ninf_dynamic:
}
define float @v_log10_f32_dynamic_mode(float %in) #1 {
-; GFX689-SDAG-LABEL: v_log10_f32_dynamic_mode:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_dynamic_mode:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_dynamic_mode:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_dynamic_mode:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_dynamic_mode:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_dynamic_mode:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_dynamic_mode:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_dynamic_mode:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_dynamic_mode:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_dynamic_mode:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_dynamic_mode:
}
define float @v_log10_f32_undef() {
-; GFX689-SDAG-LABEL: v_log10_f32_undef:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, s4
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_undef:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_undef:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, s4
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_undef:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
+; SI-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_undef:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, s4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_undef:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
+; VI-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_undef:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, s4
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_undef:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, s4, 1.0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_undef:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_undef:
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, s0, 1.0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_undef:
; R600-NEXT: CF_END
; R600-NEXT: PAD
;
-; CM-LABEL: v_log10_f32_undef:
-; CM: ; %bb.0:
-; CM-NEXT: CF_END
-; CM-NEXT: PAD
- %result = call float @llvm.log10.f32(float undef)
- ret float %result
-}
-
-define float @v_log10_f32_0() {
-; GFX689-SDAG-LABEL: v_log10_f32_0:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, 0
-; GFX689-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_0:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; CM-LABEL: v_log10_f32_undef:
+; CM: ; %bb.0:
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+ %result = call float @llvm.log10.f32(float undef)
+ ret float %result
+}
+
+define float @v_log10_f32_0() {
+; SI-SDAG-LABEL: v_log10_f32_0:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_log_f32_e32 v0, 0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, 0xc11a209b, v0
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_0:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
+; SI-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_0:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_log_f32_e32 v0, 0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v0, 0xc11a209b, v0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_0:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
+; VI-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_0:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, 0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, 0xc11a209b, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_0:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, 0, 1.0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_0:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc11a209b, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_0:
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0
; GFX1100-GISEL-NEXT: v_mul_f32_e64 v1, 0, 1.0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_0:
}
define float @v_log10_f32_from_fpext_f16(i16 %src.i) {
-; GFX689-LABEL: v_log10_f32_from_fpext_f16:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_from_fpext_f16:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_from_fpext_f16:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log10_f32_from_fpext_f16:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_from_fpext_f16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_from_fpext_f16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_from_fpext_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_from_fpext_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_f16:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_f16:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_from_fpext_f16:
; R600: ; %bb.0:
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_f32_from_fpext_math_f16:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; VI-LABEL: v_log10_f32_from_fpext_math_f16:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_add_f16_e32 v0, v0, v1
-; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT: v_log_f32_e32 v0, v0
-; VI-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_log10_f32_from_fpext_math_f16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_log10_f32_from_fpext_math_f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX900-NEXT: v_log_f32_e32 v0, v0
-; GFX900-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_log10_f32_from_fpext_math_f16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_from_fpext_math_f16:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log10_f32_from_fpext_math_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
+; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_from_fpext_math_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_math_f16:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_math_f16:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_from_fpext_math_f16:
; R600: ; %bb.0:
}
define float @v_log10_f32_from_fpext_bf16(bfloat %src) {
-; GFX689-SDAG-LABEL: v_log10_f32_from_fpext_bf16:
-; GFX689-SDAG: ; %bb.0:
-; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
-; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
-; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX689-GISEL-LABEL: v_log10_f32_from_fpext_bf16:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_from_fpext_bf16:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_log10_f32_from_fpext_bf16:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_from_fpext_bf16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_from_fpext_bf16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_from_fpext_bf16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_from_fpext_bf16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_bf16:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_bf16:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_from_fpext_bf16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_fabs_f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_fneg_fabs_f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_fneg_f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_f16_fast:
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_v2f16:
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_fabs_v2f16:
; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0
-; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
+; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_fneg_fabs_v2f16:
; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
-; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
+; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_fneg_v2f16:
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_v2f16_fast:
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_v3f16:
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_v3f16_fast:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_v4f16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
; SI-SDAG-NEXT: v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_log_f32_e32 v2, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209b, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209b, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_v4f16_fast:
attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX689-GISEL: {{.*}}
+; GFX689-SDAG: {{.*}}
; SI: {{.*}}