return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
}
-/// Generate Min/Max node
-SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
- SDValue LHS, SDValue RHS,
- SDValue True, SDValue False,
- SDValue CC,
- DAGCombinerInfo &DCI) const {
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
+// TODO: Handle fabs too
+static SDValue peekFNeg(SDValue Val) {
+ if (Val.getOpcode() == ISD::FNEG)
+ return Val.getOperand(0);
+ return Val;
+}
+SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
+ const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
+ SDValue False, SDValue CC, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
return SDValue();
}
+/// Generate Min/Max node
+SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
+ SDValue LHS, SDValue RHS,
+ SDValue True, SDValue False,
+ SDValue CC,
+ DAGCombinerInfo &DCI) const {
+ if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
+ return combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, True, False, CC, DCI);
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ // If we can't directly match this, try to see if we can fold an fneg to
+ // match.
+
+ ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
+ ConstantFPSDNode *CFalse = dyn_cast<ConstantFPSDNode>(False);
+ SDValue NegTrue = peekFNeg(True);
+
+ // Undo the combine foldFreeOpFromSelect does if it helps us match the
+ // fmin/fmax.
+ //
+ // select (fcmp olt (lhs, K)), (fneg lhs), -K
+ // -> fneg (fmin_legacy lhs, K)
+ //
+ // TODO: Use getNegatedExpression
+ if (LHS == NegTrue && CFalse && CRHS) {
+ APFloat NegRHS = neg(CRHS->getValueAPF());
+ if (NegRHS == CFalse->getValueAPF()) {
+ SDValue Combined =
+ combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, NegTrue, False, CC, DCI);
+ if (Combined)
+ return DAG.getNode(ISD::FNEG, DL, VT, Combined);
+ return SDValue();
+ }
+ }
+
+ return SDValue();
+}
+
std::pair<SDValue, SDValue>
AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+ SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ SDValue CC, DAGCombinerInfo &DCI) const;
+
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
SDValue CC, DAGCombinerInfo &DCI) const;
Info.True = MI.getOperand(2).getReg();
Info.False = MI.getOperand(3).getReg();
+ // TODO: Handle case where the the selected value is an fneg and the compared
+ // constant is the negation of the selected value.
if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
!(Info.LHS == Info.False && Info.RHS == Info.True))
return false;
; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
; SI-SAFE: ; %bb.0: ; %.entry
; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s0
-; SI-SAFE-NEXT: v_bfrev_b32_e32 v1, 1
-; SI-SAFE-NEXT: v_mov_b32_e32 v2, 0x7fc00000
+; SI-SAFE-NEXT: s_brev_b32 s0, 1
+; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
+; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, 0, v0
+; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
+; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-SAFE-NEXT: ; return to shader part epilog
;
; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #2 {
; SI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
; SI-SAFE: ; %bb.0: ; %.entry
-; SI-SAFE-NEXT: v_bfrev_b32_e32 v0, 1
-; SI-SAFE-NEXT: v_mov_b32_e32 v1, s0
-; SI-SAFE-NEXT: v_cmp_ngt_f32_e64 vcc, s0, 0
-; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SAFE-NEXT: v_min_legacy_f32_e64 v0, 0, s0
+; SI-SAFE-NEXT: s_brev_b32 s0, 1
; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
+; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-SAFE-NEXT: ; return to shader part epilog
;