Previously this did a fast math expansion only.
:ref:`llvm.log10 <int_log10>` Implemented for float and half (and vectors).
+ :ref:`llvm.exp2 <int_exp2>` Implemented for float and half (and vectors of float or
+ half). Not implemented for double. Hardware provides
+ 1ULP accuracy for float, and 0.51ULP for half. Float
+ instruction does not natively support denormal
+ inputs. Backend will optimize out denormal scaling if
+ marked with the :ref:`afn <fastmath_afn>` flag.
+
========================================= ==========================================================
.. TODO::
When specified with the fast-math-flag 'afn', the result may be approximated
using a less accurate calculation.
+.. _int_exp2:
+
'``llvm.exp2.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^
accurately. Use llvm.amdgcn.log.f32 to access the old behavior for
llvm.log2.f32.
+* llvm.exp2.f32 is now lowered accurately. Use llvm.amdgcn.exp2.f32 to
+ access the old behavior for llvm.exp2.f32.
+
Changes to the ARM Backend
--------------------------
// Library functions. These default to Expand, but we have instructions
// for them.
- setOperationAction({ISD::FCEIL, ISD::FEXP2, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
- ISD::FRINT, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
+ setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR, ISD::FRINT,
+ ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
- setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP}, MVT::f32, Custom);
+ setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2}, MVT::f32,
+ Custom);
setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
else {
setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
- setOperationAction(ISD::FLOG2, MVT::f16, Custom);
+ setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
}
setOperationAction({ISD::FLOG10, ISD::FLOG}, MVT::f16, Custom);
return LowerFLOGCommon(Op, DAG);
case ISD::FEXP:
return lowerFEXP(Op, DAG);
+ case ISD::FEXP2:
+ return lowerFEXP2(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
if (SDValue Lowered = LowerFLOGCommon(SDValue(N, 0), DAG))
Results.push_back(Lowered);
return;
+ case ISD::FEXP2:
+ if (SDValue Lowered = lowerFEXP2(SDValue(N, 0), DAG))
+ Results.push_back(Lowered);
+ return;
default:
return;
}
Flags);
}
+SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
+ // v_exp_f32 is good enough for OpenCL, except it doesn't handle denormals.
+ // If we have to handle denormals, scale up the input and adjust the result.
+
+ SDLoc SL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ SDNodeFlags Flags = Op->getFlags();
+
+ if (VT == MVT::f16) {
+ // Nothing in half is a denormal when promoted to f32.
+ assert(!Subtarget->has16BitInsts());
+ SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);
+ SDValue Log = DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Ext, Flags);
+ return DAG.getNode(ISD::FP_ROUND, SL, VT, Log,
+ DAG.getTargetConstant(0, SL, MVT::i32), Flags);
+ }
+
+ if (!needsDenormHandlingF32(DAG, Src, Flags))
+ return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags);
+
+ // bool needs_scaling = x < -0x1.f80000p+6f;
+ // v_exp_f32(x + (s ? 0x1.0p+6f : 0.0f)) * (s ? 0x1.0p-64f : 1.0f);
+
+ // -nextafter(128.0, -1)
+ SDValue RangeCheckConst = DAG.getConstantFP(-0x1.f80000p+6f, SL, VT);
+
+ SDValue NeedsScaling = DAG.getSetCC(
+ SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
+ RangeCheckConst, ISD::SETOLT);
+
+ SDValue SixtyFour = DAG.getConstantFP(0x1.0p+6f, SL, VT);
+ SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
+
+ SDValue AddOffset =
+ DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, SixtyFour, Zero);
+
+ SDValue AddInput = DAG.getNode(ISD::FADD, SL, VT, Src, AddOffset, Flags);
+ SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, AddInput, Flags);
+
+ SDValue TwoExpNeg64 = DAG.getConstantFP(0x1.0p-64f, SL, VT);
+ SDValue One = DAG.getConstantFP(1.0, SL, VT);
+ SDValue ResultScale =
+ DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, TwoExpNeg64, One);
+
+ return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);
+}
+
// exp2(M_LOG2E_F * f);
SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
double Log2BaseInverted, SDNodeFlags Flags) const;
+ SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
(flog2 node:$src)]>;
def AMDGPUexp : PatFrags<(ops node:$src), [(int_amdgcn_exp2 node:$src),
- (AMDGPUexp_impl node:$src),
- (fexp2 node:$src)]>; // FIXME: Remove me
+ (AMDGPUexp_impl node:$src)]>;
def AMDGPUexpf16 : PatFrags<(ops node:$src), [(int_amdgcn_exp2 node:$src),
(fexp2 node:$src)]>;
.scalarize(0);
// FIXME: fpow has a selection pattern that should move to custom lowering.
- auto &Exp2Ops = getActionDefinitionsBuilder(G_FEXP2);
- if (ST.has16BitInsts())
- Exp2Ops.legalFor({S32, S16});
- else
- Exp2Ops.legalFor({S32});
- Exp2Ops.clampScalar(0, MinScalarFPTy, S32);
- Exp2Ops.scalarize(0);
-
auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FPOW});
if (ST.has16BitInsts())
ExpOps.customFor({{S32}, {S16}});
.clampScalar(0, MinScalarFPTy, S32)
.lower();
- auto &Log2Ops = getActionDefinitionsBuilder(G_FLOG2);
+ auto &Log2Ops = getActionDefinitionsBuilder({G_FLOG2, G_FEXP2});
Log2Ops.customFor({S32});
if (ST.has16BitInsts())
Log2Ops.legalFor({S16});
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG10:
return legalizeFlogCommon(MI, B);
+ case TargetOpcode::G_FEXP2:
+ return legalizeFExp2(MI, B);
case TargetOpcode::G_FEXP:
return legalizeFExp(MI, B);
case TargetOpcode::G_FPOW:
return true;
}
+bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ // v_exp_f32 is good enough for OpenCL, except it doesn't handle denormals.
+ // If we have to handle denormals, scale up the input and adjust the result.
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ unsigned Flags = MI.getFlags();
+ LLT Ty = B.getMRI()->getType(Dst);
+ const LLT F16 = LLT::scalar(16);
+ const LLT F32 = LLT::scalar(32);
+
+ if (Ty == F16) {
+ // Nothing in half is a denormal when promoted to f32.
+ auto Ext = B.buildFPExt(F32, Src, Flags);
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32}, false)
+ .addUse(Ext.getReg(0))
+ .setMIFlags(Flags);
+ B.buildFPTrunc(Dst, Log2, Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ assert(Ty == F32);
+
+ if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) {
+ B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
+ .addUse(Src)
+ .setMIFlags(Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // bool needs_scaling = x < -0x1.f80000p+6f;
+ // v_exp_f32(x + (s ? 0x1.0p+6f : 0.0f)) * (s ? 0x1.0p-64f : 1.0f);
+
+ // -nextafter(128.0, -1)
+ auto RangeCheckConst = B.buildFConstant(Ty, -0x1.f80000p+6f);
+ auto NeedsScaling = B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Src,
+ RangeCheckConst, Flags);
+
+ auto SixtyFour = B.buildFConstant(Ty, 0x1.0p+6f);
+ auto Zero = B.buildFConstant(Ty, 0.0);
+ auto AddOffset = B.buildSelect(F32, NeedsScaling, SixtyFour, Zero, Flags);
+ auto AddInput = B.buildFAdd(F32, Src, AddOffset, Flags);
+
+ auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
+ .addUse(AddInput.getReg(0))
+ .setMIFlags(Flags);
+
+ auto TwoExpNeg64 = B.buildFConstant(Ty, 0x1.0p-64f);
+ auto One = B.buildFConstant(Ty, 1.0);
+ auto ResultScale = B.buildSelect(F32, NeedsScaling, TwoExpNeg64, One, Flags);
+ B.buildFMul(Dst, Exp2, ResultScale, Flags);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
MachineIRBuilder &B) const {
Register Dst = MI.getOperand(0).getReg();
bool legalizeFlogCommon(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, Register Src,
double Log2BaseInverted, unsigned Flags) const;
+ bool legalizeFExp2(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFPow(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI,
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32:
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32:
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32:
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%pow = call float @llvm.pow.f32(float %x, float %y)
ret float %pow
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
; GFX6-NEXT: v_mul_f32_e32 v0, v0, v5
-; GFX6-NEXT: v_mov_b32_e32 v5, 0x42000000
-; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
-; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x800000
+; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v5
+; GFX6-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[4:5]
; GFX6-NEXT: v_log_f32_e32 v0, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v4, 1.0, v4, vcc
; GFX6-NEXT: v_mul_f32_e32 v1, v1, v4
; GFX6-NEXT: v_log_f32_e32 v1, v1
-; GFX6-NEXT: v_sub_f32_e32 v0, v0, v6
+; GFX6-NEXT: v_mov_b32_e32 v6, 0x42000000
+; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
+; GFX6-NEXT: v_sub_f32_e32 v0, v0, v7
+; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5]
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
-; GFX6-NEXT: v_sub_f32_e32 v1, v1, v2
+; GFX6-NEXT: s_mov_b32 s6, 0xc2fc0000
+; GFX6-NEXT: v_sub_f32_e32 v1, v1, v5
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v2, vcc
+; GFX6-NEXT: v_cmp_gt_f32_e64 s[4:5], s6, v1
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v7
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_add_f32_e32 v1, v1, v2
; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5]
+; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_v2f32:
; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; GFX8-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
; GFX8-NEXT: v_mul_f32_e32 v0, v0, v5
-; GFX8-NEXT: v_mov_b32_e32 v5, 0x42000000
-; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
-; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x800000
+; GFX8-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v5
+; GFX8-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[4:5]
; GFX8-NEXT: v_log_f32_e32 v0, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v4, 1.0, v4, vcc
; GFX8-NEXT: v_mul_f32_e32 v1, v1, v4
; GFX8-NEXT: v_log_f32_e32 v1, v1
-; GFX8-NEXT: v_sub_f32_e32 v0, v0, v6
+; GFX8-NEXT: v_mov_b32_e32 v6, 0x42000000
+; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
+; GFX8-NEXT: v_sub_f32_e32 v0, v0, v7
+; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5]
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
-; GFX8-NEXT: v_sub_f32_e32 v1, v1, v2
+; GFX8-NEXT: s_mov_b32 s6, 0xc2fc0000
+; GFX8-NEXT: v_sub_f32_e32 v1, v1, v5
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v2, vcc
+; GFX8-NEXT: v_cmp_gt_f32_e64 s[4:5], s6, v1
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v7
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_add_f32_e32 v1, v1, v2
; GFX8-NEXT: v_exp_f32_e32 v1, v1
+; GFX8-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5]
+; GFX8-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_v2f32:
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; GFX9-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v5
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x42000000
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x800000
+; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v5
+; GFX9-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[4:5]
; GFX9-NEXT: v_log_f32_e32 v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v4, 1.0, v4, vcc
; GFX9-NEXT: v_mul_f32_e32 v1, v1, v4
; GFX9-NEXT: v_log_f32_e32 v1, v1
-; GFX9-NEXT: v_sub_f32_e32 v0, v0, v6
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x42000000
+; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
+; GFX9-NEXT: v_sub_f32_e32 v0, v0, v7
+; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5]
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
-; GFX9-NEXT: v_sub_f32_e32 v1, v1, v2
+; GFX9-NEXT: s_mov_b32 s6, 0xc2fc0000
+; GFX9-NEXT: v_sub_f32_e32 v1, v1, v5
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0
; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v2, vcc
+; GFX9-NEXT: v_cmp_gt_f32_e64 s[4:5], s6, v1
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v7
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_add_f32_e32 v1, v1, v2
; GFX9-NEXT: v_exp_f32_e32 v1, v1
+; GFX9-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5]
+; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_v2f32:
; GFX10-NEXT: v_sub_f32_e32 v1, v1, v5
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0xc2fc0000, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s4
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_add_f32_e32 v1, v1, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x1f800000, s4
; GFX10-NEXT: v_exp_f32_e32 v0, v0
; GFX10-NEXT: v_exp_f32_e32 v1, v1
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_v2f32:
; GFX11-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v2 :: v_dual_mul_dx9_zero_f32 v1, v1, v3
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x1f800000, s0
; GFX11-NEXT: v_exp_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_exp_f32_e32 v1, v1
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
ret <2 x float> %pow
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v3, 0x42800000
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: s_mov_b32 s4, 0xc2fc0000
; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v4, 0x42800000
; GFX6-NEXT: v_log_f32_e32 v1, v1
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
-; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v3
+; GFX6-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2
+; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX6-NEXT: v_add_f32_e32 v1, v1, v2
; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v5
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-NEXT: v_log_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: s_mov_b32 s4, 0xc2fc0000
; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2
-; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v1, v1, v4
+; GFX6-NEXT: v_mov_b32_e32 v4, 0x1f800000
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v2
; GFX6-NEXT: v_exp_f32_e32 v2, v0
-; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v1
-; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX6-NEXT: v_mul_f32_e32 v0, v1, v5
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
+; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1
+; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX6-NEXT: v_or_b32_e32 v2, v3, v2
; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v2
; GFX6-NEXT: v_log_f32_e32 v0, v0
-; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX6-NEXT: v_log_f32_e32 v1, v1
-; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3
+; GFX6-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v3, 0x42800000
+; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v4
+; GFX6-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2
+; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
+; GFX6-NEXT: v_add_f32_e32 v1, v1, v2
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v5
+; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
+; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0
; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v1
+; GFX6-NEXT: v_log_f32_e32 v3, v3
+; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX6-NEXT: v_log_f32_e32 v0, v0
-; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX6-NEXT: v_log_f32_e32 v2, v2
+; GFX6-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
+; GFX6-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v3, 0x42800000
+; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
+; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; GFX6-NEXT: v_add_f32_e32 v2, v2, v4
+; GFX6-NEXT: v_mov_b32_e32 v4, 0x1f800000
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v2, v3
-; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX6-NEXT: v_exp_f32_e32 v2, v2
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_exp_f32_e32 v1, v0
+; GFX6-NEXT: v_mul_f32_e32 v0, v2, v5
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
+; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fabs_lhs:
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fabs_lhs:
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fabs_lhs:
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fabs_lhs:
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
%pow = call float @llvm.pow.f32(float %fabs.x, float %y)
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fabs_rhs:
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fabs_rhs:
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fabs_rhs:
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fabs_rhs:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, |v1|
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
%pow = call float @llvm.pow.f32(float %x, float %fabs.y)
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, |v1|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
%fabs.y = call float @llvm.fabs.f32(float %y)
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX6-NEXT: v_sub_f32_e32 v1, v1, v2
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX8-NEXT: v_sub_f32_e32 v1, v1, v2
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX9-NEXT: v_sub_f32_e32 v1, v1, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: v_pow_f32_sgpr_vgpr:
; GFX10-NEXT: v_log_f32_e32 v1, v1
; GFX10-NEXT: v_sub_f32_e32 v1, v1, v2
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: v_pow_f32_sgpr_vgpr:
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v1, v1, v2
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: ; return to shader part epilog
%pow = call float @llvm.pow.f32(float %x, float %y)
ret float %pow
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: v_pow_f32_vgpr_sgpr:
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: v_pow_f32_vgpr_sgpr:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: ; return to shader part epilog
%pow = call float @llvm.pow.f32(float %x, float %y)
ret float %pow
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: v_pow_f32_sgpr_sgpr:
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: v_pow_f32_sgpr_sgpr:
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: ; return to shader part epilog
%pow = call float @llvm.pow.f32(float %x, float %y)
ret float %pow
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fneg_lhs:
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fneg_lhs:
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fneg_lhs:
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fneg_lhs:
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg float %x
%pow = call float @llvm.pow.f32(float %neg.x, float %y)
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fneg_rhs:
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fneg_rhs:
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1
+; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fneg_rhs:
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fneg_rhs:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, -v1
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.y = fneg float %y
%pow = call float @llvm.pow.f32(float %x, float %neg.y)
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
---
-name: fexp2_s32_vs
+name: fexp2_s16_vs
legalized: true
regBankSelected: true
tracksRegLiveness: true
bb.0:
liveins: $sgpr0
- ; CHECK-LABEL: name: fexp2_s32_vs
+ ; CHECK-LABEL: name: fexp2_s16_vs
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit %1
+ ; CHECK-NEXT: [[V_EXP_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_EXP_F16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
- %1:vgpr(s32) = G_FEXP2 %0
- S_ENDPGM 0, implicit %1
+ %1:sgpr(s16) = G_TRUNC %0
+ %2:vgpr(s16) = G_FEXP2 %1
+ S_ENDPGM 0, implicit %2
...
---
-name: fexp2_s32_vv
+name: fexp2_s16_vv
legalized: true
regBankSelected: true
tracksRegLiveness: true
bb.0:
liveins: $vgpr0
- ; CHECK-LABEL: name: fexp2_s32_vv
+ ; CHECK-LABEL: name: fexp2_s16_vv
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit %1
+ ; CHECK-NEXT: [[V_EXP_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_EXP_F16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
- %1:vgpr(s32) = G_FEXP2 %0
- S_ENDPGM 0, implicit %1
+ %1:vgpr(s16) = G_TRUNC %0
+ %2:vgpr(s16) = G_FEXP2 %1
+ S_ENDPGM 0, implicit %2
...
; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]]
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
; GFX8-LABEL: name: test_fexp_s32
; GFX8: liveins: $vgpr0
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]]
- ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX8-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX8-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
; GFX9-LABEL: name: test_fexp_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]]
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_FEXP %0
$vgpr0 = COPY %1
; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]]
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]]
- ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[SELECT]]
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[SELECT1]]
+ ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
; GFX8-LABEL: name: test_fexp_s32_nnan
; GFX8: liveins: $vgpr0
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]]
- ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]]
- ; GFX8-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[SELECT]]
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[SELECT1]]
+ ; GFX8-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
; GFX9-LABEL: name: test_fexp_s32_nnan
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]]
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]]
- ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[SELECT]]
+ ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT]], [[SELECT1]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = nnan G_FEXP %0
$vgpr0 = COPY %1
; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]]
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]]
- ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]]
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX8-LABEL: name: test_fexp_v2s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]]
- ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
- ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]]
- ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
+ ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]]
+ ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]]
+ ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]]
+ ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]]
+ ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32)
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX9-LABEL: name: test_fexp_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]]
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
- ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]]
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]]
+ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]]
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]]
+ ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]]
+ ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = G_FEXP %0
; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]]
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]]
- ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]]
- ; GFX6-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]]
- ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]]
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]]
+ ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL4]](s32), [[C1]]
+ ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[SELECT4]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
+ ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]]
+ ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; GFX8-LABEL: name: test_fexp_v3s32
; GFX8: liveins: $vgpr0_vgpr1_vgpr2
; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]]
- ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
- ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]]
- ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]]
- ; GFX8-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]]
- ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
+ ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]]
+ ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]]
+ ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]]
+ ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]]
+ ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX8-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]]
+ ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL4]](s32), [[C1]]
+ ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]]
+ ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[SELECT4]]
+ ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
+ ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[C5]]
+ ; GFX8-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]]
+ ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; GFX9-LABEL: name: test_fexp_v3s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]]
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
- ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]]
- ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]]
- ; GFX9-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]]
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]]
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]]
+ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]]
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]]
+ ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]]
+ ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]]
+ ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL4]](s32), [[C1]]
+ ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C2]], [[C3]]
+ ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[SELECT4]]
+ ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
+ ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C4]], [[C5]]
+ ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = G_FEXP %0
; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]]
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32)
; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX8-LABEL: name: test_fexp_s16
; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16)
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000
; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]]
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]]
- ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL]](s32), [[C1]]
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[SELECT]]
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32)
; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16)
- ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]]
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]]
- ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32)
+ ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]]
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[FMUL2]](s32), [[C1]]
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32)
; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
- ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+ ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
; GFX6: liveins: $vgpr0
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[COPY]]
- ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]]
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[SELECT]]
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX6-NEXT: $vgpr0 = COPY [[FMUL]](s32)
; GFX8-LABEL: name: test_fexp2_s32
; GFX8: liveins: $vgpr0
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[COPY]]
- ; GFX8-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]]
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[SELECT]]
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX8-NEXT: $vgpr0 = COPY [[FMUL]](s32)
; GFX9-LABEL: name: test_fexp2_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[COPY]]
- ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[COPY]](s32), [[C]]
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[SELECT]]
+ ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMUL]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_FEXP2 %0
$vgpr0 = COPY %1
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[UV]]
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[UV1]]
- ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]]
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]]
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX8-LABEL: name: test_fexp2_v2s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
- ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[UV]]
- ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[UV1]]
- ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]]
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]]
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
+ ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]]
+ ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
+ ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32)
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX9-LABEL: name: test_fexp2_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[UV]]
- ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[UV1]]
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]]
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]]
+ ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]]
+ ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = G_FEXP2 %0
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[UV]]
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[UV1]]
- ; GFX6-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[UV2]]
- ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]]
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]]
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
+ ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[SELECT4]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
+ ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]]
+ ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; GFX8-LABEL: name: test_fexp2_v3s32
; GFX8: liveins: $vgpr0_vgpr1_vgpr2
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[UV]]
- ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[UV1]]
- ; GFX8-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[UV2]]
- ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]]
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]]
+ ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
+ ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]]
+ ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
+ ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
+ ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
+ ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[SELECT4]]
+ ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
+ ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]]
+ ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]]
+ ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; GFX9-LABEL: name: test_fexp2_v3s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[UV]]
- ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[UV1]]
- ; GFX9-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[UV2]]
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV]](s32), [[C]]
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV]], [[SELECT]]
+ ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT]], [[SELECT1]]
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[SELECT2]]
+ ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT1]], [[SELECT3]]
+ ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
+ ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
+ ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[SELECT4]]
+ ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
+ ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]]
+ ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT5]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = G_FEXP2 %0
; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FPEXT]]
- ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FPEXT]](s32)
+ ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX8-LABEL: name: test_fexp2_s16
; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FPEXT]]
- ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FPEXT]](s32)
+ ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FPEXT1]]
- ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32)
+ ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FPEXT1]](s32)
+ ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[COPY1]](s32)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
- ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
; GFX9-LABEL: name: test_fpow_s32
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[COPY1]](s32)
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
- ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]]
+ ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_FPOW %0, %1
; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[UV2]](s32)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
- ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
- ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
- ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]]
- ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL1]](s32)
- ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
- ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT2]], [[SELECT3]]
- ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV3]](s32)
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT3]]
- ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT4]]
+ ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
+ ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT3]], [[SELECT5]]
+ ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV3]](s32)
+ ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C5]]
+ ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[C4]]
+ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT6]]
+ ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C7]], [[C2]]
+ ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT7]]
+ ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX9-LABEL: name: test_fpow_v2s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[UV2]](s32)
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
- ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
- ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
- ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]]
- ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL1]](s32)
- ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
- ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT2]], [[SELECT3]]
- ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV3]](s32)
- ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT3]]
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]]
+ ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
+ ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
+ ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT4]]
+ ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
+ ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]]
+ ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT3]], [[SELECT5]]
+ ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C5]]
+ ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[C4]]
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT6]]
+ ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C7]], [[C2]]
+ ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT7]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[UV3]](s32)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
- ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
- ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
- ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]]
- ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL1]](s32)
- ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
- ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT2]], [[SELECT3]]
- ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV4]](s32)
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT3]]
- ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
- ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT4]]
- ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
+ ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT4]]
+ ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]]
- ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT4]], [[SELECT5]]
- ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB2]](s32), [[UV5]](s32)
- ; GFX6-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[INT5]]
- ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT3]], [[SELECT5]]
+ ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV4]](s32)
+ ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C5]]
+ ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[C4]]
+ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT6]]
+ ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C7]], [[C2]]
+ ; GFX6-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT7]]
+ ; GFX6-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
+ ; GFX6-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT8]]
+ ; GFX6-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32)
+ ; GFX6-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT6]], [[SELECT9]]
+ ; GFX6-NEXT: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB2]](s32), [[UV5]](s32)
+ ; GFX6-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT7]](s32), [[C5]]
+ ; GFX6-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C6]], [[C4]]
+ ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[INT7]], [[SELECT10]]
+ ; GFX6-NEXT: [[INT8:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
+ ; GFX6-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C7]], [[C2]]
+ ; GFX6-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT8]], [[SELECT11]]
+ ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; GFX9-LABEL: name: test_fpow_v3s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[INT]], [[SELECT1]]
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[UV3]](s32)
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
- ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
- ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C1]], [[C2]]
- ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT2]]
- ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL1]](s32)
- ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C3]], [[C4]]
- ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT2]], [[SELECT3]]
- ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV4]](s32)
- ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT3]]
- ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
+ ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT2]]
+ ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C]]
; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C1]], [[C2]]
- ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT4]]
- ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
+ ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[SELECT4]]
+ ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL2]](s32)
; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[FCMP2]](s1), [[C3]], [[C4]]
- ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT4]], [[SELECT5]]
- ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB2]](s32), [[UV5]](s32)
- ; GFX9-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[INT5]]
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[INT3]], [[SELECT5]]
+ ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB1]](s32), [[UV4]](s32)
+ ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C5]]
+ ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C6]], [[C4]]
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT6]]
+ ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[FCMP3]](s1), [[C7]], [[C2]]
+ ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT7]]
+ ; GFX9-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C]]
+ ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C1]], [[C2]]
+ ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[SELECT8]]
+ ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FMUL4]](s32)
+ ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[FCMP4]](s1), [[C3]], [[C4]]
+ ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[INT6]], [[SELECT9]]
+ ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB2]](s32), [[UV5]](s32)
+ ; GFX9-NEXT: [[FCMP5:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT7]](s32), [[C5]]
+ ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C6]], [[C4]]
+ ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[INT7]], [[SELECT10]]
+ ; GFX9-NEXT: [[INT8:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32)
+ ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[FCMP5]](s1), [[C7]], [[C2]]
+ ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[INT8]], [[SELECT11]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL1]](s32), [[FMUL3]](s32), [[FMUL5]](s32)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan nsz G_FSUB [[INT]], [[SELECT1]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[COPY1]](s32)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT1]]
- ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan nsz G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan nsz G_FADD [[INT1]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
; GFX9-LABEL: name: test_fpow_s32_flags
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan nsz G_FSUB [[INT]], [[SELECT1]]
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[COPY1]](s32)
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT1]]
- ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan nsz G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan nsz G_FADD [[INT1]], [[SELECT2]]
+ ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = nnan nsz G_FPOW %0, %1
; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32)
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](s32), [[FPEXT1]](s32)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
- ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C]]
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT1]]
+ ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-LABEL: name: test_fpow_s16
; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32)
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](s32), [[FPEXT1]](s32)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
- ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT1]](s32), [[C1]]
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INT1]], [[SELECT]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[SELECT1]]
+ ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
- ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT2]](s32)
- ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT2]](s32), [[FPEXT3]](s32)
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT3]]
- ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32)
+ ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT2]](s32)
+ ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT3]](s32), [[FPEXT3]](s32)
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[INT4]](s32), [[C1]]
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INT4]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT5]], [[SELECT3]]
+ ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32)
; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32)
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](s32), [[FPEXT1]](s32)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT1]]
- ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan nsz G_FCMP floatpred(olt), [[INT1]](s32), [[C1]]
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan nsz G_FADD [[INT1]], [[SELECT]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[INT2]], [[SELECT1]]
+ ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
- ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT2]](s32)
- ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT2]](s32), [[FPEXT3]](s32)
- ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT3]]
- ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32)
+ ; GFX6-NEXT: [[INT3:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT2]](s32)
+ ; GFX6-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT3]](s32), [[FPEXT3]](s32)
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan nsz G_FCMP floatpred(olt), [[INT4]](s32), [[C1]]
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C2]], [[C3]]
+ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan nsz G_FADD [[INT4]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32)
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan nsz G_SELECT [[FCMP1]](s1), [[C4]], [[C5]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan nsz G_FMUL [[INT5]], [[SELECT3]]
+ ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32)
; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX6-NEXT: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32)
; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[FPEXT]](s32)
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[INT]](s32), [[SITOFP]](s32)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[INT1]]
- ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[INT1]](s32), [[C]]
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C1]], [[C2]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[INT1]], [[SELECT]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
+ ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[SELECT1]]
+ ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-LABEL: name: test_fpowi_s16_s32_flags
; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[INT]], [[SELECT1]]
; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[SITOFP]](s32)
- ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[INT1]]
- ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[INT1]], [[SELECT2]]
+ ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX6-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
; GFX9-LABEL: name: test_fpowi_s32_s32_flags
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP]](s1), [[C3]], [[C4]]
; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[INT]], [[SELECT1]]
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FSUB]](s32), [[SITOFP]](s32)
- ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[INT1]]
- ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.260000e+02
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(olt), [[INT1]](s32), [[C5]]
+ ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.400000e+01
+ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C6]], [[C4]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[INT1]], [[SELECT2]]
+ ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32)
+ ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3BF0000000000000
+ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = nnan G_SELECT [[FCMP1]](s1), [[C7]], [[C2]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[SELECT3]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMUL1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = nnan G_FPOWI %0, %1
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX7-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; GFX7-NEXT: v_mov_b32_e32 v3, 0x42800000
; GFX7-NEXT: v_log_f32_e32 v0, v0
; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX7-NEXT: v_add_f32_e32 v0, v0, v1
; GFX7-NEXT: v_exp_f32_e32 v0, v0
+; GFX7-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX7-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX78-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
; GFX78-NEXT: v_exp_f32_e32 v0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX78-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 %r)
ret float %res
; GFX78-NEXT: v_log_f32_e32 v0, v0
; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, -1.0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
; GFX78-NEXT: v_exp_f32_e32 v0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX78-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_neg1_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, -1.0, v0
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 -1)
ret float %res
; GFX78-NEXT: v_log_f32_e32 v0, v0
; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 2.0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
; GFX78-NEXT: v_exp_f32_e32 v0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX78-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_2_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 2.0, v0
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 2)
ret float %res
; GFX78-NEXT: v_log_f32_e32 v0, v0
; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, -2.0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
; GFX78-NEXT: v_exp_f32_e32 v0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX78-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_neg2_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, -2.0, v0
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 -2)
ret float %res
; GFX78-NEXT: v_log_f32_e32 v0, v0
; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 4.0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
; GFX78-NEXT: v_exp_f32_e32 v0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX78-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_4_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 4.0, v0
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 4)
ret float %res
; GFX78-NEXT: v_log_f32_e32 v0, v0
; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 0x41000000, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
; GFX78-NEXT: v_exp_f32_e32 v0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX78-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_8_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0x41000000, v0
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 8)
ret float %res
; GFX78-NEXT: v_log_f32_e32 v0, v0
; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 0x41800000, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
; GFX78-NEXT: v_exp_f32_e32 v0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX78-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_16_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0x41800000, v0
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 16)
ret float %res
; GFX78-NEXT: v_log_f32_e32 v0, v0
; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 0x43000000, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
; GFX78-NEXT: v_exp_f32_e32 v0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX78-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_128_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0x43000000, v0
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 128)
ret float %res
; GFX78-NEXT: v_log_f32_e32 v0, v0
; GFX78-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, 0xc3000000, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
; GFX78-NEXT: v_exp_f32_e32 v0, v0
+; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX78-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_neg128_f32:
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, 0xc3000000, v0
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 -128)
ret float %res
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = call float @llvm.fabs.f32(float %r0)
%r2 = fsub float -0.000000e+00, %r1
- %r3 = call float @llvm.exp2.f32(float %r2)
+ %r3 = call afn float @llvm.exp2.f32(float %r2)
%vec = insertelement <4 x float> undef, float %r3, i32 0
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
-; VI-LABEL: s_exp_f32:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_mul_f32_e32 v0, s2, v0
-; VI-NEXT: v_exp_f32_e32 v2, v0
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: flat_store_dword v[0:1], v2
-; VI-NEXT: s_endpgm
-;
-; GFX900-LABEL: s_exp_f32:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_load_dword s4, s[0:1], 0x2c
-; GFX900-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; GFX900-NEXT: v_mov_b32_e32 v1, 0
-; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_mul_f32_e32 v0, s4, v0
-; GFX900-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-NEXT: global_store_dword v1, v0, s[2:3]
-; GFX900-NEXT: s_endpgm
-;
-; SI-LABEL: s_exp_f32:
-; SI: ; %bb.0:
-; SI-NEXT: s_load_dword s2, s[0:1], 0xb
-; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; SI-NEXT: s_mov_b32 s3, 0xf000
-; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, s2, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; SI-NEXT: s_endpgm
+; VI-SDAG-LABEL: s_exp_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; VI-SDAG-NEXT: s_mov_b32 s3, 0xc2fc0000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s3, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: s_exp_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX900-SDAG-LABEL: s_exp_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3]
+; GFX900-SDAG-NEXT: s_endpgm
+;
+; GFX900-GISEL-LABEL: s_exp_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX900-GISEL-NEXT: s_endpgm
+;
+; SI-SDAG-LABEL: s_exp_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb
+; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-SDAG-NEXT: s_endpgm
+;
+; SI-GISEL-LABEL: s_exp_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: s_mov_b32 s2, -1
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp_f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: MUL_IEEE * T0.W, KC0[2].Z, literal.x,
; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; R600-NEXT: EXP_IEEE * T1.X, PV.W,
+; R600-NEXT: SETGT * T1.W, literal.x, PV.W,
+; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; R600-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T0.W, T0.W, PV.W,
+; R600-NEXT: CNDE * T1.W, T1.W, 1.0, literal.x,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: MUL_IEEE T0.X, PS, T1.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_exp_f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: ALU 15, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, literal.y,
-; CM-NEXT: 2(2.802597e-45), 1069066811(1.442695e+00)
-; CM-NEXT: EXP_IEEE T1.X, T0.W,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
+; CM-NEXT: SETGT * T1.W, literal.x, PV.W,
+; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; CM-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; CM-NEXT: CNDE T0.Z, T1.W, 1.0, literal.x,
+; CM-NEXT: ADD * T0.W, T0.W, PV.W, BS:VEC_120/SCL_212
+; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE * T0.X, PV.X, T0.Z,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call float @llvm.exp.f32(float %in)
store float %result, ptr addrspace(1) %out
ret void
define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; VI-SDAG-LABEL: s_exp_v2f32:
; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; VI-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s3, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
+; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_exp_v2f32:
; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s7
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3
+; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v3, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_exp_v2f32:
; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s3, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v0
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v3
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_exp_v2f32:
; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v3, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX900-GISEL-NEXT: s_endpgm
;
; SI-SDAG-LABEL: s_exp_v2f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
; SI-SDAG-NEXT: s_mov_b32 s6, -1
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, s0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, s3, v0
+; SI-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; SI-SDAG-NEXT: s_mov_b32 s5, s1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, s0
-; SI-SDAG-NEXT: s_mov_b32 s5, s1
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_exp_v2f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, s3
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, s7
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3
+; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_exp_f32_e32 v3, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp_v2f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 21, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: MUL_IEEE T0.W, KC0[3].X, literal.x,
-; R600-NEXT: MUL_IEEE * T1.W, KC0[2].W, literal.x,
+; R600-NEXT: MUL_IEEE * T0.W, KC0[3].X, literal.x,
; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; R600-NEXT: EXP_IEEE * T0.Y, PV.W,
-; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: EXP_IEEE * T0.X, T1.W,
+; R600-NEXT: SETGT T1.W, literal.x, PV.W,
+; R600-NEXT: MUL_IEEE * T2.W, KC0[2].W, literal.y,
+; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
+; R600-NEXT: SETGT T3.W, literal.x, PS,
+; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.y,
+; R600-NEXT: -1023672320(-1.260000e+02), 1115684864(6.400000e+01)
+; R600-NEXT: ADD T0.W, T0.W, PS,
+; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.x,
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T0.Z, T2.W, PS,
+; R600-NEXT: CNDE T1.W, T1.W, 1.0, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
+; R600-NEXT: CNDE T0.W, T3.W, 1.0, literal.x,
+; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_exp_v2f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 26, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: MUL_IEEE * T0.Z, KC0[2].W, literal.y,
-; CM-NEXT: 2(2.802597e-45), 1069066811(1.442695e+00)
; CM-NEXT: MUL_IEEE * T0.W, KC0[3].X, literal.x,
; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T1.X (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T1.Y, T0.W,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T1.X, T0.Z,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), T0.Z,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), T0.Z,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), T0.Z,
+; CM-NEXT: MUL_IEEE T0.Z, KC0[2].W, literal.x,
+; CM-NEXT: SETGT * T1.W, literal.y, PV.W,
+; CM-NEXT: 1069066811(1.442695e+00), -1023672320(-1.260000e+02)
+; CM-NEXT: CNDE T1.Z, PV.W, 0.0, literal.x,
+; CM-NEXT: SETGT * T2.W, literal.y, PV.Z,
+; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
+; CM-NEXT: CNDE T0.Y, PV.W, 0.0, literal.x,
+; CM-NEXT: CNDE T2.Z, T1.W, 1.0, literal.y,
+; CM-NEXT: ADD * T0.W, T0.W, PV.Z, BS:VEC_120/SCL_212
+; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE T1.Y, PV.X, T2.Z,
+; CM-NEXT: CNDE T1.Z, T2.W, 1.0, literal.x,
+; CM-NEXT: ADD * T0.W, T0.Z, T0.Y,
+; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE * T1.X, PV.X, T1.Z,
+; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <2 x float> @llvm.exp.v2f32(<2 x float> %in)
store <2 x float> %result, ptr addrspace(1) %out
ret void
; VI-SDAG-LABEL: s_exp_v3f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s6, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, s5, v0
+; VI-SDAG-NEXT: s_mov_b32 s7, 0xc2fc0000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s7, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v0
; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; VI-SDAG-NEXT: v_exp_f32_e32 v2, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v3
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s7, v4
+; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s7, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, s3
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s2
; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_exp_v3f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, s6
; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8aa3b, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v1, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v2, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_exp_f32_e32 v2, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, v5, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s2
; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s6, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s5, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v2, v5
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, s5, v0
; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v1
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v3
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v5, v5
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v5, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v4
; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, s5
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8aa3b, v5
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v1, v3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
+; GFX900-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v5, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
;
; SI-SDAG-LABEL: s_exp_v3f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
-; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SI-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, s4, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v3, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2
-; SI-SDAG-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:8
-; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v4
+; SI-SDAG-NEXT: v_mul_f32_e32 v4, s4, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v2, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s10, -1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v4, v0
+; SI-SDAG-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:8
+; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_exp_v3f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, s5
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, s5
+; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8aa3b, v5
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v2, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
+; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v5, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1]
+; SI-GISEL-NEXT: s_mov_b32 s10, -1
+; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; SI-GISEL-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:8
; SI-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp_v3f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.X, T2.X, 0
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: MUL_IEEE T0.W, KC0[3].Z, literal.x,
-; R600-NEXT: MUL_IEEE * T1.W, KC0[3].Y, literal.x,
+; R600-NEXT: MUL_IEEE * T0.W, KC0[3].Z, literal.x,
; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
+; R600-NEXT: SETGT T1.W, literal.x, PV.W,
+; R600-NEXT: MUL_IEEE * T2.W, KC0[3].Y, literal.y,
+; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
+; R600-NEXT: SETGT T3.W, literal.x, PS,
+; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.y,
+; R600-NEXT: -1023672320(-1.260000e+02), 1115684864(6.400000e+01)
+; R600-NEXT: ADD T0.Z, T0.W, PS,
+; R600-NEXT: MUL_IEEE T0.W, KC0[3].W, literal.x,
+; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.y,
+; R600-NEXT: 1069066811(1.442695e+00), 1115684864(6.400000e+01)
+; R600-NEXT: ADD T0.Y, T2.W, PS,
+; R600-NEXT: SETGT T1.Z, literal.x, PV.W,
+; R600-NEXT: CNDE T1.W, T1.W, 1.0, literal.y, BS:VEC_120/SCL_212
+; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
+; R600-NEXT: -1023672320(-1.260000e+02), 528482304(5.421011e-20)
+; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W,
+; R600-NEXT: CNDE T0.Z, PV.Z, 0.0, literal.x,
+; R600-NEXT: CNDE T1.W, T3.W, 1.0, literal.y,
+; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
+; R600-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
+; R600-NEXT: MUL_IEEE T1.X, PS, PV.W,
+; R600-NEXT: ADD T0.W, T0.W, PV.Z,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; R600-NEXT: CNDE T1.W, T1.Z, 1.0, literal.x,
; R600-NEXT: EXP_IEEE * T0.Y, PV.W,
-; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: MUL_IEEE T0.Z, KC0[3].W, literal.y,
-; R600-NEXT: ADD_INT T0.W, KC0[2].Y, literal.z,
-; R600-NEXT: EXP_IEEE * T0.X, T1.W,
-; R600-NEXT: 2(2.802597e-45), 1069066811(1.442695e+00)
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T2.X, PS, PV.W,
+; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; R600-NEXT: LSHR T2.X, PV.W, literal.x,
-; R600-NEXT: EXP_IEEE * T3.X, PV.Z,
+; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_exp_v3f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 19, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 39, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T0.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T1.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T2.X
; CM-NEXT: CF_END
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT: LSHR T1.X, PV.W, literal.x,
-; CM-NEXT: MUL_IEEE T0.Y, KC0[3].Y, literal.y,
-; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, literal.y,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, literal.y,
-; CM-NEXT: 2(2.802597e-45), 1069066811(1.442695e+00)
-; CM-NEXT: EXP_IEEE T2.X, T0.W,
-; CM-NEXT: EXP_IEEE T2.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T2.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T2.W (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T3.X (MASKED), T0.Z,
-; CM-NEXT: EXP_IEEE T3.Y, T0.Z,
-; CM-NEXT: EXP_IEEE T3.Z (MASKED), T0.Z,
-; CM-NEXT: EXP_IEEE * T3.W (MASKED), T0.Z,
-; CM-NEXT: EXP_IEEE T3.X, T0.Y,
-; CM-NEXT: EXP_IEEE T3.Y (MASKED), T0.Y,
-; CM-NEXT: EXP_IEEE T3.Z (MASKED), T0.Y,
-; CM-NEXT: EXP_IEEE * T3.W (MASKED), T0.Y,
+; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, literal.x,
+; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Y, KC0[3].Y, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, literal.x,
+; CM-NEXT: SETGT * T1.W, literal.y, PV.W,
+; CM-NEXT: 1069066811(1.442695e+00), -1023672320(-1.260000e+02)
+; CM-NEXT: CNDE T1.Y, PV.W, 0.0, literal.x,
+; CM-NEXT: SETGT T1.Z, literal.y, PV.Z,
+; CM-NEXT: SETGT * T2.W, literal.y, PV.Y,
+; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
+; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: CNDE T2.Y, PV.Z, 0.0, literal.x,
+; CM-NEXT: CNDE T2.Z, T1.W, 1.0, literal.y,
+; CM-NEXT: ADD * T0.W, T0.W, PV.Y, BS:VEC_120/SCL_212
+; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W, T0.W,
+; CM-NEXT: MUL_IEEE T1.X, PV.W, T2.Z,
+; CM-NEXT: CNDE T1.Y, T1.Z, 1.0, literal.x,
+; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y,
+; CM-NEXT: ADD * T0.W, T0.Z, T2.Y, BS:VEC_201
+; CM-NEXT: 528482304(5.421011e-20), 8(1.121039e-44)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z, T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: LSHR T2.X, T1.Z, literal.x,
+; CM-NEXT: MUL_IEEE T3.Y, PV.Z, T1.Y,
+; CM-NEXT: CNDE T0.Z, T2.W, 1.0, literal.y,
+; CM-NEXT: ADD * T0.W, T0.Y, T0.X,
+; CM-NEXT: 2(2.802597e-45), 528482304(5.421011e-20)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE * T3.X, PV.X, T0.Z,
+; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <3 x float> @llvm.exp.v3f32(<3 x float> %in)
store <3 x float> %result, ptr addrspace(1) %out
ret void
; VI-SDAG-LABEL: s_exp_v4f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; VI-SDAG-NEXT: s_mov_b32 s8, 0xc2fc0000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s7, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
-; VI-SDAG-NEXT: v_exp_f32_e32 v3, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0
+; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v3
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v1, s[0:1]
; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v5, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, v2, v3
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, v5, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, s5, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v5
+; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v5, v5
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v5, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v4
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, s3
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, s2
; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_exp_v4f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, s6
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s7
; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3
+; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v4, s[0:1]
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f32_e32 v3, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v5, s[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v2, v3
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v4, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_exp_f32_e32 v3, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v6, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, v6, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2
; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT: s_mov_b32 s8, 0xc2fc0000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x42800000
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s7, v0
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v1
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1]
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x1f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v6, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, v1, v3
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v6, s[0:1]
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v2, v1
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v1
; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v7
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[0:1]
; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v5
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v6, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v1, v5
+; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, v6, s[0:1]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5
; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, s7
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v4, s[0:1]
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v5, s[0:1]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v2, v3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v6, v2
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
;
; SI-SDAG-LABEL: s_exp_v4f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
-; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: s_mov_b32 s2, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42800000
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SI-SDAG-NEXT: v_mul_f32_e32 v1, s7, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v3, v1
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s2, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v5, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, v1, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v5, s[0:1]
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s2, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v6
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v5, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1]
+; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s10, -1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v4
+; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_exp_v4f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, s5
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, s7
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, s5
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3
+; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v4, s[0:1]
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_exp_f32_e32 v3, v3
-; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v5, s[0:1]
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6
+; SI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v2, v3
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v4, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
+; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6
+; SI-GISEL-NEXT: v_exp_f32_e32 v3, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, v6, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; SI-GISEL-NEXT: s_mov_b32 s10, -1
+; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
+; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
; SI-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp_v4f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; R600-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: MUL_IEEE T0.W, KC0[4].X, literal.x,
-; R600-NEXT: MUL_IEEE * T1.W, KC0[3].W, literal.x,
-; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; R600-NEXT: EXP_IEEE * T0.W, PV.W,
-; R600-NEXT: MUL_IEEE T2.W, KC0[3].Z, literal.x,
-; R600-NEXT: EXP_IEEE * T0.Z, T1.W,
-; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T1.W, KC0[3].Y, literal.x,
-; R600-NEXT: EXP_IEEE * T0.Y, PV.W,
+; R600-NEXT: MUL_IEEE * T0.W, KC0[4].X, literal.x,
; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: SETGT T1.W, literal.x, PV.W,
+; R600-NEXT: MUL_IEEE * T2.W, KC0[3].W, literal.y,
+; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
+; R600-NEXT: SETGT T0.Z, literal.x, PS,
+; R600-NEXT: MUL_IEEE T3.W, KC0[3].Z, literal.y,
+; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.z,
+; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T0.Y, T0.W, PS,
+; R600-NEXT: SETGT T1.Z, literal.x, PV.W,
+; R600-NEXT: MUL_IEEE T0.W, KC0[3].Y, literal.y,
+; R600-NEXT: CNDE * T4.W, PV.Z, 0.0, literal.z,
+; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T0.X, T2.W, PS,
+; R600-NEXT: CNDE T1.Y, T1.W, 1.0, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT: SETGT T2.Z, literal.y, PV.W,
+; R600-NEXT: CNDE T1.W, PV.Z, 0.0, literal.z,
+; R600-NEXT: EXP_IEEE * T0.Y, PV.Y,
+; R600-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02)
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T1.X, T3.W, PV.W,
+; R600-NEXT: CNDE T2.Y, T0.Z, 1.0, literal.x,
+; R600-NEXT: CNDE T0.Z, PV.Z, 0.0, literal.y,
+; R600-NEXT: MUL_IEEE T3.W, PS, PV.Y,
+; R600-NEXT: EXP_IEEE * T0.X, PV.X,
+; R600-NEXT: 528482304(5.421011e-20), 1115684864(6.400000e+01)
+; R600-NEXT: ADD T0.Y, T0.W, PV.Z,
+; R600-NEXT: MUL_IEEE T3.Z, PS, PV.Y,
+; R600-NEXT: CNDE T0.W, T1.Z, 1.0, literal.x,
+; R600-NEXT: EXP_IEEE * T0.X, PV.X,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T3.Y, PS, PV.W,
+; R600-NEXT: CNDE T0.W, T2.Z, 1.0, literal.x,
+; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T3.X, PS, PV.W,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_exp_v4f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 22, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT: ALU 49, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, literal.y,
-; CM-NEXT: 2(2.802597e-45), 1069066811(1.442695e+00)
-; CM-NEXT: MUL_IEEE T0.Y, KC0[3].Z, literal.x,
-; CM-NEXT: MUL_IEEE T0.Z, KC0[3].W, literal.x,
-; CM-NEXT: MUL_IEEE * T1.W, KC0[4].X, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, literal.x,
+; CM-NEXT: MUL_IEEE * T0.W, KC0[4].X, literal.x,
; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T1.X (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE * T1.W, T1.W,
-; CM-NEXT: EXP_IEEE T1.X (MASKED), T0.Z,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), T0.Z,
-; CM-NEXT: EXP_IEEE T1.Z, T0.Z,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), T0.Z,
-; CM-NEXT: EXP_IEEE T1.X (MASKED), T0.Y,
-; CM-NEXT: EXP_IEEE T1.Y, T0.Y,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), T0.Y,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), T0.Y,
-; CM-NEXT: EXP_IEEE T1.X, T0.W,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE T0.Y, KC0[3].W, literal.x,
+; CM-NEXT: SETGT T1.Z, literal.y, PV.W,
+; CM-NEXT: SETGT * T1.W, literal.y, PV.Z,
+; CM-NEXT: 1069066811(1.442695e+00), -1023672320(-1.260000e+02)
+; CM-NEXT: CNDE T1.Y, PV.W, 0.0, literal.x,
+; CM-NEXT: CNDE T2.Z, PV.Z, 0.0, literal.x,
+; CM-NEXT: SETGT * T2.W, literal.y, PV.Y,
+; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
+; CM-NEXT: CNDE T0.X, T1.Z, 1.0, literal.x,
+; CM-NEXT: CNDE T2.Y, PV.W, 0.0, literal.y,
+; CM-NEXT: MUL_IEEE T1.Z, KC0[3].Y, literal.z,
+; CM-NEXT: ADD * T0.W, T0.W, PV.Z,
+; CM-NEXT: 528482304(5.421011e-20), 1115684864(6.400000e+01)
+; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W, T0.W,
+; CM-NEXT: CNDE T1.X, T2.W, 1.0, literal.x,
+; CM-NEXT: SETGT T3.Y, literal.y, T1.Z,
+; CM-NEXT: ADD T2.Z, T0.Y, T2.Y,
+; CM-NEXT: MUL_IEEE * T2.W, PV.W, T0.X,
+; CM-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02)
+; CM-NEXT: EXP_IEEE T0.X, T2.Z,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T2.Z,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T2.Z,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T2.Z,
+; CM-NEXT: CNDE T2.X, T3.Y, 0.0, literal.x,
+; CM-NEXT: CNDE T0.Y, T1.W, 1.0, literal.y,
+; CM-NEXT: MUL_IEEE T2.Z, PV.X, T1.X,
+; CM-NEXT: ADD * T0.W, T0.Z, T1.Y,
+; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE T2.Y, PV.X, T0.Y,
+; CM-NEXT: CNDE T0.Z, T3.Y, 1.0, literal.x,
+; CM-NEXT: ADD * T0.W, T1.Z, T2.X,
+; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE * T2.X, PV.X, T0.Z,
+; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <4 x float> @llvm.exp.v4f32(<4 x float> %in)
store <4 x float> %result, ptr addrspace(1) %out
ret void
}
define float @v_exp_f32(float %in) {
-; GCN-LABEL: v_exp_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32:
; R600: ; %bb.0:
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GCN-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GCN-GISEL-LABEL: v_exp_fabs_f32:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
; GCN-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fabs_f32:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fabs_f32:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fabs_f32:
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
; GCN-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GCN-GISEL-LABEL: v_exp_fneg_fabs_f32:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
; GCN-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fneg_fabs_f32:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_fabs_f32:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fneg_fabs_f32:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GCN-GISEL-LABEL: v_exp_fneg_f32:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
; GCN-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fneg_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_f32:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fneg_f32:
}
define float @v_exp_f32_ninf(float %in) {
-; GCN-LABEL: v_exp_f32_ninf:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_ninf:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_ninf:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_ninf:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_ninf:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_ninf:
; R600: ; %bb.0:
}
define float @v_exp_f32_nnan(float %in) {
-; GCN-LABEL: v_exp_f32_nnan:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_nnan:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp_f32_nnan:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_nnan:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_exp_f32_nnan:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_nnan:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan:
; R600: ; %bb.0:
}
define float @v_exp_f32_nnan_dynamic(float %in) #1 {
-; GCN-LABEL: v_exp_f32_nnan_dynamic:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_nnan_dynamic:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_nnan_dynamic:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_nnan_dynamic:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_nnan_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan_dynamic:
; R600: ; %bb.0:
}
define float @v_exp_f32_ninf_dynamic(float %in) #1 {
-; GCN-LABEL: v_exp_f32_ninf_dynamic:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_ninf_dynamic:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_ninf_dynamic:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_ninf_dynamic:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_ninf_dynamic:
; R600: ; %bb.0:
}
define float @v_exp_f32_nnan_ninf(float %in) {
-; GCN-LABEL: v_exp_f32_nnan_ninf:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_nnan_ninf:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_nnan_ninf:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_nnan_ninf:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_nnan_ninf:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_nnan_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan_ninf:
; R600: ; %bb.0:
}
define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
-; GCN-LABEL: v_exp_f32_nnan_ninf_dynamic:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_nnan_ninf_dynamic:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan_ninf_dynamic:
; R600: ; %bb.0:
}
define float @v_exp_f32_dynamic_mode(float %in) #1 {
-; GCN-LABEL: v_exp_f32_dynamic_mode:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_dynamic_mode:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_dynamic_mode:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_dynamic_mode:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_dynamic_mode:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_dynamic_mode:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_dynamic_mode:
; R600: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
; GCN-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_undef:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_undef:
}
define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
-; GCN-LABEL: v_exp_f32_from_fpext_f16:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_from_fpext_f16:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_from_fpext_f16:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_from_fpext_f16:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_from_fpext_f16:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_from_fpext_f16:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_from_fpext_f16:
; R600: ; %bb.0:
}
define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
-; GCN-LABEL: v_exp_f32_from_fpext_math_f16:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_add_f16_e32 v0, v0, v1
-; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_from_fpext_math_f16:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GCN-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_from_fpext_bf16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fabs_f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_fabs_f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f16_fast:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5]
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_v2f16:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5]
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fabs_v2f16:
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v1, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fabs_v2f16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[4:5]
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v2, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_fabs_v2f16:
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v1, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fneg_fabs_v2f16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[4:5]
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v2, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_v2f16:
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v1, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fneg_v2f16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5]
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_v2f16_fast:
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[4:5]
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v3
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_v3f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v5
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v5, vcc
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[4:5]
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v3
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_v3f16_afn:
attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX900: {{.*}}
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) {
-; SI-LABEL: s_exp2_f32:
-; SI: ; %bb.0:
-; SI-NEXT: s_load_dword s2, s[0:1], 0xb
-; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT: s_mov_b32 s3, 0xf000
-; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_exp_f32_e32 v0, s2
-; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; SI-NEXT: s_endpgm
-;
-; VI-LABEL: s_exp2_f32:
-; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_exp_f32_e32 v2, s2
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: flat_store_dword v[0:1], v2
-; VI-NEXT: s_endpgm
-;
-; GFX900-LABEL: s_exp2_f32:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_load_dword s4, s[0:1], 0x2c
-; GFX900-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-NEXT: v_mov_b32_e32 v1, 0
-; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_exp_f32_e32 v0, s4
-; GFX900-NEXT: global_store_dword v1, v0, s[2:3]
-; GFX900-NEXT: s_endpgm
+; SI-SDAG-LABEL: s_exp2_f32:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb
+; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-SDAG-NEXT: s_endpgm
+;
+; SI-GISEL-LABEL: s_exp2_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb
+; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, s2, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: s_mov_b32 s2, -1
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-GISEL-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: s_exp2_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, v1, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: s_exp2_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, s2, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX900-SDAG-LABEL: s_exp2_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[2:3]
+; GFX900-SDAG-NEXT: s_endpgm
+;
+; GFX900-GISEL-LABEL: s_exp2_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s2, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX900-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp2_f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; R600-NEXT: EXP_IEEE * T1.X, KC0[2].Z,
+; R600-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
+; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; R600-NEXT: CNDE * T1.W, PV.W, 0.0, literal.x,
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T1.W, KC0[2].Z, PV.W,
+; R600-NEXT: CNDE * T0.W, T0.W, 1.0, literal.x,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: MUL_IEEE T0.X, PS, T0.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_exp2_f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
+; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; CM-NEXT: CNDE * T1.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; CM-NEXT: CNDE T0.Z, T0.W, 1.0, literal.x,
+; CM-NEXT: ADD * T0.W, KC0[2].Z, PV.W,
+; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE * T0.X, PV.X, T0.Z,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T1.X, KC0[2].Z,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[2].Z,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[2].Z,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[2].Z,
%result = call float @llvm.exp2.f32(float %in)
store float %result, ptr addrspace(1) %out
ret void
; SI-SDAG-LABEL: s_exp2_v2f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
-; SI-SDAG-NEXT: s_mov_b32 s6, -1
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, s3
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, s2
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v4, s3, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v1
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
; SI-SDAG-NEXT: s_mov_b32 s4, s0
; SI-SDAG-NEXT: s_mov_b32 s5, s1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v3, v0
; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_exp2_v2f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, s2
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, s3
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, s7
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
+; SI-GISEL-NEXT: v_add_f32_e32 v2, s6, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v0, s7, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_exp_f32_e32 v3, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_exp2_v2f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, s3
-; VI-SDAG-NEXT: v_exp_f32_e32 v0, s2
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v4, s3, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; VI-SDAG-NEXT: v_exp_f32_e32 v2, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v2, v0
; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1
; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0
; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
;
; VI-GISEL-LABEL: s_exp2_v2f32:
; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_exp_f32_e32 v0, s2
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, s3
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s7
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
+; VI-GISEL-NEXT: v_add_f32_e32 v2, s6, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, s7, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v3, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_exp2_v2f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, s3
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, s2
-; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-SDAG-NEXT: v_add_f32_e32 v4, s3, v4
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s2, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_exp2_v2f32:
; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, s2
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, s3
-; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, s7
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, s6, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s7, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX900-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp2_v2f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 18, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: EXP_IEEE * T0.Y, KC0[3].X,
-; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: EXP_IEEE * T0.X, KC0[2].W,
+; R600-NEXT: SETGT T0.W, literal.x, KC0[3].X,
+; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].W,
+; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; R600-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T2.W, KC0[3].X, PV.W,
+; R600-NEXT: CNDE * T3.W, T1.W, 0.0, literal.x,
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T0.Z, KC0[2].W, PS,
+; R600-NEXT: CNDE T0.W, T0.W, 1.0, literal.x,
+; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
+; R600-NEXT: CNDE T0.W, T1.W, 1.0, literal.x,
+; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_exp2_v2f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].X,
+; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; CM-NEXT: CNDE T0.Z, PV.W, 0.0, literal.x,
+; CM-NEXT: SETGT * T1.W, literal.y, KC0[2].W,
+; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
+; CM-NEXT: CNDE T0.Y, PV.W, 0.0, literal.x,
+; CM-NEXT: CNDE T1.Z, T0.W, 1.0, literal.y,
+; CM-NEXT: ADD * T0.W, KC0[3].X, PV.Z,
+; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE T1.Y, PV.X, T1.Z,
+; CM-NEXT: CNDE T0.Z, T1.W, 1.0, literal.x,
+; CM-NEXT: ADD * T0.W, KC0[2].W, T0.Y,
+; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE * T1.X, PV.X, T0.Z,
; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T1.X (MASKED), KC0[3].X,
-; CM-NEXT: EXP_IEEE T1.Y, KC0[3].X,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[3].X,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[3].X,
-; CM-NEXT: EXP_IEEE T1.X, KC0[2].W,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[2].W,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[2].W,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[2].W,
%result = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in)
store <2 x float> %result, ptr addrspace(1) %out
ret void
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
+; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v4, s5, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v0, s6, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v6, s4, v6
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc
; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
; SI-SDAG-NEXT: s_mov_b32 s2, -1
-; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, s6
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, s5
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, s4
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, v3, v7
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v6, v5
; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-SDAG-NEXT: s_endpgm
; SI-GISEL-LABEL: s_exp2_v3f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, s4
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, s5
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, s6
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; SI-GISEL-NEXT: s_mov_b32 s10, -1
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, s5
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v4, s5, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; SI-GISEL-NEXT: v_exp_f32_e32 v4, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, s6, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
+; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; SI-GISEL-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:8
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_exp2_v3f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_exp_f32_e32 v2, s6
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, s5
-; VI-SDAG-NEXT: v_exp_f32_e32 v0, s4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; VI-SDAG-NEXT: v_add_f32_e32 v4, s6, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v6, s5, v6
+; VI-SDAG-NEXT: v_exp_f32_e32 v3, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v6, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v3, v0
; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v6, v5
; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0
; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-SDAG-NEXT: s_endpgm
; VI-GISEL-LABEL: s_exp2_v3f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_exp_f32_e32 v0, s4
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, s5
-; VI-GISEL-NEXT: v_exp_f32_e32 v2, s6
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; VI-GISEL-NEXT: v_add_f32_e32 v4, s5, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v1, s6, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v4, v4
+; VI-GISEL-NEXT: v_exp_f32_e32 v2, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s2
; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, s6
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, s5
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, s4
-; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX900-SDAG-NEXT: v_add_f32_e32 v4, s6, v4
+; GFX900-SDAG-NEXT: v_add_f32_e32 v6, s5, v6
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v6, v6
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v1
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v4, v2
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v6, v5
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v3, v0
+; GFX900-SDAG-NEXT: global_store_dwordx3 v7, v[0:2], s[2:3]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_exp2_v3f32:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, s4
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, s5
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, s6
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, s5
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; GFX900-GISEL-NEXT: v_add_f32_e32 v4, s5, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s6, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v4, v4
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp2_v3f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.X, T2.X, 0
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT: ALU 29, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: EXP_IEEE * T0.Y, KC0[3].Z,
-; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y,
-; R600-NEXT: EXP_IEEE * T0.X, KC0[3].Y,
-; R600-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; R600-NEXT: LSHR T2.X, PV.W, literal.x,
-; R600-NEXT: EXP_IEEE * T3.X, KC0[3].W,
+; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z,
+; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].Y,
+; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; R600-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T2.W, KC0[3].Z, PV.W,
+; R600-NEXT: CNDE * T3.W, T1.W, 0.0, literal.x,
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T0.Y, KC0[3].Y, PS,
+; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].W,
+; R600-NEXT: CNDE T0.W, T0.W, 1.0, literal.y,
+; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: -1023672320(-1.260000e+02), 528482304(5.421011e-20)
+; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W,
+; R600-NEXT: CNDE T1.Z, PV.Z, 0.0, literal.x,
+; R600-NEXT: CNDE T0.W, T1.W, 1.0, literal.y,
+; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
+; R600-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
+; R600-NEXT: MUL_IEEE T1.X, PS, PV.W,
+; R600-NEXT: ADD T0.W, KC0[3].W, PV.Z,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; R600-NEXT: CNDE T1.W, T0.Z, 1.0, literal.x,
+; R600-NEXT: EXP_IEEE * T0.Y, PV.W,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T2.X, PS, PV.W,
+; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
+; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_exp2_v3f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 17, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T1.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
+; CM-NEXT: ALU 35, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T2.X
; CM-NEXT: CF_END
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR * T0.X, PV.W, literal.x,
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].W,
+; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; CM-NEXT: CNDE T0.Y, PV.W, 0.0, literal.x,
+; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z,
+; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Y,
+; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
+; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: CNDE T1.Y, PV.Z, 0.0, literal.x,
+; CM-NEXT: CNDE T1.Z, T0.W, 1.0, literal.y,
+; CM-NEXT: ADD * T0.W, KC0[3].W, PV.Y,
+; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Y, T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE T1.X, PV.Y, T1.Z,
+; CM-NEXT: CNDE T0.Y, T0.Z, 1.0, literal.x,
+; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
+; CM-NEXT: ADD * T0.W, KC0[3].Z, T1.Y,
+; CM-NEXT: 528482304(5.421011e-20), 8(1.121039e-44)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W, T0.W,
+; CM-NEXT: LSHR T2.X, T0.Z, literal.x,
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, T0.Y,
+; CM-NEXT: CNDE T0.Z, T1.W, 1.0, literal.y,
+; CM-NEXT: ADD * T0.W, KC0[3].Y, T0.X,
+; CM-NEXT: 2(2.802597e-45), 528482304(5.421011e-20)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: MUL_IEEE * T0.X, PV.X, T0.Z,
+; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T2.X, KC0[3].W,
-; CM-NEXT: EXP_IEEE T2.Y (MASKED), KC0[3].W,
-; CM-NEXT: EXP_IEEE T2.Z (MASKED), KC0[3].W,
-; CM-NEXT: EXP_IEEE * T2.W (MASKED), KC0[3].W,
-; CM-NEXT: EXP_IEEE T3.X (MASKED), KC0[3].Z,
-; CM-NEXT: EXP_IEEE T3.Y, KC0[3].Z,
-; CM-NEXT: EXP_IEEE T3.Z (MASKED), KC0[3].Z,
-; CM-NEXT: EXP_IEEE * T3.W (MASKED), KC0[3].Z,
-; CM-NEXT: EXP_IEEE T3.X, KC0[3].Y,
-; CM-NEXT: EXP_IEEE T3.Y (MASKED), KC0[3].Y,
-; CM-NEXT: EXP_IEEE T3.Z (MASKED), KC0[3].Y,
-; CM-NEXT: EXP_IEEE * T3.W (MASKED), KC0[3].Y,
%result = call <3 x float> @llvm.exp2.v3f32(<3 x float> %in)
store <3 x float> %result, ptr addrspace(1) %out
ret void
define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; SI-SDAG-LABEL: s_exp2_v4f32:
; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
-; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
+; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: v_exp_f32_e32 v3, s7
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, s6
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, s5
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, s4
-; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v3, vcc
+; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v4, s3, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v6, s2, v6
+; SI-SDAG-NEXT: v_add_f32_e32 v8, s1, v8
+; SI-SDAG-NEXT: v_add_f32_e32 v1, s0, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6
+; SI-SDAG-NEXT: v_exp_f32_e32 v8, v8
+; SI-SDAG-NEXT: v_exp_f32_e32 v9, v1
+; SI-SDAG-NEXT: s_mov_b32 s6, -1
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, v4, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, v6, v5
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v8, v7
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v9, v0
+; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_exp2_v4f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, s4
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, s5
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, s6
-; SI-GISEL-NEXT: v_exp_f32_e32 v3, s7
-; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, s5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
+; SI-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v3, s[0:1]
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_add_f32_e32 v1, s5, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1]
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
+; SI-GISEL-NEXT: v_add_f32_e32 v5, s6, v5
+; SI-GISEL-NEXT: v_add_f32_e32 v2, s7, v2
+; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5
+; SI-GISEL-NEXT: v_exp_f32_e32 v3, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; SI-GISEL-NEXT: s_mov_b32 s10, -1
+; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
+; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: s_exp2_v4f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_exp_f32_e32 v3, s7
-; VI-SDAG-NEXT: v_exp_f32_e32 v2, s6
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, s5
-; VI-SDAG-NEXT: v_exp_f32_e32 v0, s4
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v3, vcc
+; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v4, s7, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v6, s6, v6
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; VI-SDAG-NEXT: v_exp_f32_e32 v6, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v8, s5, v8
+; VI-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v8, v8
+; VI-SDAG-NEXT: v_exp_f32_e32 v9, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, v4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, v6, v5
; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v8, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v9, v0
; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0
; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-SDAG-NEXT: s_endpgm
; VI-GISEL-LABEL: s_exp2_v4f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_exp_f32_e32 v0, s4
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, s5
-; VI-GISEL-NEXT: v_exp_f32_e32 v2, s6
-; VI-GISEL-NEXT: v_exp_f32_e32 v3, s7
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v3, s[0:1]
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v1, s5, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1]
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
+; VI-GISEL-NEXT: v_add_f32_e32 v5, s6, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v2, s7, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5
+; VI-GISEL-NEXT: v_exp_f32_e32 v3, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2
; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, s7
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, s6
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, s5
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, s4
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v3, vcc
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v8, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v3, vcc
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX900-SDAG-NEXT: v_add_f32_e32 v5, s7, v5
+; GFX900-SDAG-NEXT: v_add_f32_e32 v7, s6, v7
+; GFX900-SDAG-NEXT: v_add_f32_e32 v9, s5, v9
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s4, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v5, v5
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v7, v7
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v9, v9
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v10, v1
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, v5, v2
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v7, v6
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v9, v8
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v10, v0
; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, s4
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, s5
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, s6
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, s7
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s4, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s5, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_add_f32_e32 v5, s6, v5
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, s7, v2
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp2_v4f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: EXP_IEEE * T0.W, KC0[4].X,
-; R600-NEXT: EXP_IEEE * T0.Z, KC0[3].W,
-; R600-NEXT: EXP_IEEE * T0.Y, KC0[3].Z,
-; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: EXP_IEEE * T0.X, KC0[3].Y,
+; R600-NEXT: SETGT T0.W, literal.x, KC0[4].X,
+; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].W,
+; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; R600-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T0.Z, KC0[4].X, PV.W,
+; R600-NEXT: SETGT T2.W, literal.x, KC0[3].Z,
+; R600-NEXT: CNDE * T3.W, T1.W, 0.0, literal.y,
+; R600-NEXT: -1023672320(-1.260000e+02), 1115684864(6.400000e+01)
+; R600-NEXT: ADD T0.X, KC0[3].W, PS,
+; R600-NEXT: CNDE T0.Y, T0.W, 1.0, literal.x,
+; R600-NEXT: SETGT T1.Z, literal.y, KC0[3].Y,
+; R600-NEXT: CNDE T0.W, PV.W, 0.0, literal.z,
+; R600-NEXT: EXP_IEEE * T0.Z, PV.Z,
+; R600-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02)
+; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
+; R600-NEXT: ADD T1.X, KC0[3].Z, PV.W,
+; R600-NEXT: CNDE T1.Y, T1.W, 1.0, literal.x,
+; R600-NEXT: CNDE T2.Z, PV.Z, 0.0, literal.y,
+; R600-NEXT: MUL_IEEE T0.W, PS, PV.Y,
+; R600-NEXT: EXP_IEEE * T0.X, PV.X,
+; R600-NEXT: 528482304(5.421011e-20), 1115684864(6.400000e+01)
+; R600-NEXT: ADD T2.Y, KC0[3].Y, PV.Z,
+; R600-NEXT: MUL_IEEE T0.Z, PS, PV.Y,
+; R600-NEXT: CNDE T1.W, T2.W, 1.0, literal.x,
+; R600-NEXT: EXP_IEEE * T0.X, PV.X,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
+; R600-NEXT: CNDE T1.W, T1.Z, 1.0, literal.x,
+; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
+; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_exp2_v4f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 17, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT: ALU 43, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: SETGT * T0.W, literal.x, KC0[4].X,
+; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
+; CM-NEXT: CNDE T0.Y, PV.W, 0.0, literal.x,
+; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W,
+; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Z,
+; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
+; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: CNDE T1.Y, T0.W, 1.0, literal.y,
+; CM-NEXT: CNDE T1.Z, PV.Z, 0.0, literal.x,
+; CM-NEXT: ADD * T0.W, KC0[4].X, PV.Y,
+; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Y, T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: CNDE T1.X, T0.Z, 1.0, literal.x,
+; CM-NEXT: SETGT T2.Y, literal.y, KC0[3].Y,
+; CM-NEXT: ADD T0.Z, KC0[3].W, T1.Z,
+; CM-NEXT: MUL_IEEE * T0.W, PV.Y, T1.Y,
+; CM-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.Z,
+; CM-NEXT: EXP_IEEE T0.Y, T0.Z,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.Z,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.Z,
+; CM-NEXT: CNDE T2.X, T2.Y, 0.0, literal.x,
+; CM-NEXT: CNDE T1.Y, T1.W, 1.0, literal.y,
+; CM-NEXT: MUL_IEEE T0.Z, PV.Y, T1.X,
+; CM-NEXT: ADD * T1.W, KC0[3].Z, T0.X, BS:VEC_021/SCL_122
+; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
+; CM-NEXT: EXP_IEEE T0.X, T1.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: MUL_IEEE T0.Y, PV.X, T1.Y,
+; CM-NEXT: CNDE T1.Z, T2.Y, 1.0, literal.x,
+; CM-NEXT: ADD * T1.W, KC0[3].Y, T2.X,
+; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T1.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: MUL_IEEE * T0.X, PV.X, T1.Z,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T1.X (MASKED), KC0[4].X,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[4].X,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[4].X,
-; CM-NEXT: EXP_IEEE * T1.W, KC0[4].X,
-; CM-NEXT: EXP_IEEE T1.X (MASKED), KC0[3].W,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[3].W,
-; CM-NEXT: EXP_IEEE T1.Z, KC0[3].W,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[3].W,
-; CM-NEXT: EXP_IEEE T1.X (MASKED), KC0[3].Z,
-; CM-NEXT: EXP_IEEE T1.Y, KC0[3].Z,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[3].Z,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[3].Z,
-; CM-NEXT: EXP_IEEE T1.X, KC0[3].Y,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[3].Y,
-; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[3].Y,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[3].Y,
%result = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
store <4 x float> %result, ptr addrspace(1) %out
ret void
}
define float @v_exp2_f32(float %in) {
-; GCN-LABEL: v_exp2_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32:
; R600: ; %bb.0:
}
define float @v_exp2_fabs_f32(float %in) {
-; GCN-LABEL: v_exp2_fabs_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e64 v0, |v0|
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_fabs_f32:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_fabs_f32:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_fabs_f32:
; R600: ; %bb.0:
}
define float @v_exp2_fneg_fabs_f32(float %in) {
-; GCN-LABEL: v_exp2_fneg_fabs_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e64 v0, -|v0|
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_fneg_fabs_f32:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_sub_f32_e64 v0, v2, |v0|
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_fneg_fabs_f32:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0|
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_fneg_fabs_f32:
; R600: ; %bb.0:
}
define float @v_exp2_fneg_f32(float %in) {
-; GCN-LABEL: v_exp2_fneg_f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e64 v0, -v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_fneg_f32:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000
+; GCN-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_fneg_f32:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_fneg_f32:
; R600: ; %bb.0:
}
define float @v_exp2_f32_ninf(float %in) {
-; GCN-LABEL: v_exp2_f32_ninf:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_ninf:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_ninf:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_ninf:
; R600: ; %bb.0:
}
define float @v_exp2_f32_nnan(float %in) {
-; GCN-LABEL: v_exp2_f32_nnan:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_nnan:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_nnan:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_nnan:
; R600: ; %bb.0:
}
define float @v_exp2_f32_nnan_dynamic(float %in) #1 {
-; GCN-LABEL: v_exp2_f32_nnan_dynamic:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_nnan_dynamic:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_nnan_dynamic:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_nnan_dynamic:
; R600: ; %bb.0:
}
define float @v_exp2_f32_ninf_dynamic(float %in) #1 {
-; GCN-LABEL: v_exp2_f32_ninf_dynamic:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_ninf_dynamic:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_ninf_dynamic:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_ninf_dynamic:
; R600: ; %bb.0:
}
define float @v_exp2_f32_nnan_ninf(float %in) {
-; GCN-LABEL: v_exp2_f32_nnan_ninf:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_nnan_ninf:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_nnan_ninf:
; R600: ; %bb.0:
}
define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 {
-; GCN-LABEL: v_exp2_f32_nnan_ninf_dynamic:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_nnan_ninf_dynamic:
; R600: ; %bb.0:
}
define float @v_exp2_f32_dynamic_mode(float %in) #1 {
-; GCN-LABEL: v_exp2_f32_dynamic_mode:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_dynamic_mode:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_dynamic_mode:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_dynamic_mode:
; R600: ; %bb.0:
}
define float @v_exp2_f32_undef() {
-; GCN-LABEL: v_exp2_f32_undef:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_exp_f32_e32 v0, s4
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_undef:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, 0x7fc00000
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_undef:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GCN-GISEL-NEXT: v_add_f32_e32 v1, s4, v1
+; GCN-GISEL-NEXT: v_add_f32_e64 v2, s4, 0
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_undef:
; R600: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_f32_from_fpext_math_f16:
; GCN-SDAG-LABEL: v_exp2_f32_from_fpext_bf16:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GCN-GISEL-LABEL: v_exp2_f32_from_fpext_bf16:
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_f16:
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_fabs_f16:
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_fneg_fabs_f16:
; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_fneg_f16:
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_f16_fast:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_v2f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_fabs_v2f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_fneg_fabs_v2f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_fneg_v2f16:
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_v2f16_fast:
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_v3f16:
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_v3f16_afn:
attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SI: {{.*}}