From 721f8f7530ec3b3f79c3a187fae177178a74c25c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 10 Jun 2020 13:01:57 -0400 Subject: [PATCH] AMDGPU: Stop using getSelectCC in division lowering This was promoting booleans to i32 to perform a comparison against them to feed to a select condition. Just use the booleans directly. This produces the same final code, since the combiner is unable to undo the mess this creates. I untangled this logic when I ported this code to GlobalISel, so port the cleanups back. --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 63 +++++++++++++------------- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 2db9af8..375b707 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1991,10 +1991,15 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RCP_LO); + const SDValue Zero = DAG.getConstant(0, DL, VT); + const EVT CCVT = getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), VT); + // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) - SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT), - NEG_RCP_LO, RCP_LO, - ISD::SETEQ); + SDValue CmpRcpHiZero = DAG.getSetCC(DL, CCVT, RCP_HI, Zero, ISD::SETEQ); + SDValue ABS_RCP_LO = DAG.getNode(ISD::SELECT, + DL, VT, CmpRcpHiZero, NEG_RCP_LO, RCP_LO); + // Calculate the rounding error from the URECIP instruction // E = mulhu(ABS_RCP_LO, RCP) SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); @@ -2006,9 +2011,9 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) - SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT), - RCP_A_E, RCP_S_E, - ISD::SETEQ); + SDValue Tmp0 = DAG.getNode(ISD::SELECT, DL, VT, + CmpRcpHiZero, RCP_A_E, RCP_S_E); + // Quotient = mulhu(Tmp0, Num) SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); @@ -2018,20 +2023,16 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, // Remainder = Num - Num_S_Remainder SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); - // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) - SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, - DAG.getConstant(-1, DL, VT), - DAG.getConstant(0, DL, VT), - ISD::SETUGE); - // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) - SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num, - Num_S_Remainder, - DAG.getConstant(-1, DL, VT), - DAG.getConstant(0, DL, VT), - ISD::SETUGE); + // Remainder_GE_Den = (Remainder >= Den) + SDValue Remainder_GE_Den = DAG.getSetCC(DL, CCVT, Remainder, Den, ISD::SETUGE); + + // Remainder_GE_Zero = (Num >= Num_S_Remainder) + SDValue Remainder_GE_Zero = DAG.getSetCC(DL, CCVT, Num, Num_S_Remainder, + ISD::SETUGE); + // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero - SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, - Remainder_GE_Zero); + SDValue Tmp1 = DAG.getNode(ISD::AND, DL, CCVT, Remainder_GE_Den, + Remainder_GE_Zero); // Calculate Division result: @@ -2043,13 +2044,13 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, DAG.getConstant(1, DL, VT)); - // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) - SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT), - Quotient, Quotient_A_One, ISD::SETEQ); + // Div = (Tmp1 ? Quotient_A_One : Quotient) + SDValue Div = DAG.getNode(ISD::SELECT, DL, VT, Tmp1, + Quotient_A_One, Quotient); - // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) - Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT), - Quotient_S_One, Div, ISD::SETEQ); + // Div = (Remainder_GE_Zero ? Div : Quotient_S_One) + Div = DAG.getNode(ISD::SELECT, DL, VT, Remainder_GE_Zero, + Div, Quotient_S_One); // Calculate Rem result: @@ -2059,13 +2060,13 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, // Remainder_A_Den = Remainder + Den SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); - // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) - SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT), - Remainder, Remainder_S_Den, ISD::SETEQ); + // Rem = (Tmp1 ? Remainder_S_Den : Remainder) + SDValue Rem = DAG.getNode(ISD::SELECT, DL, VT, Tmp1, + Remainder_S_Den, Remainder); - // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) - Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT), - Remainder_A_Den, Rem, ISD::SETEQ); + // Rem = (Remainder_GE_Zero ? Rem : Remainder_A_Den) + Rem = DAG.getNode(ISD::SELECT, DL, VT, + Remainder_GE_Zero, Rem, Remainder_A_Den); SDValue Ops[2] = { Div, Rem diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index f7cebcb..67f889f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2561,7 +2561,7 @@ void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B, // Quotient_S_One = Quotient - 1 auto Quotient_S_One = B.buildSub(S32, Quotient, One); - // Div = (Tmp1 == 0 ? Quotient_A_One : Quotient) + // Div = (Tmp1 ? Quotient : Quotient_A_One) auto Div = B.buildSelect(S32, Tmp1, Quotient, Quotient_A_One); // Div = (Remainder_GE_Zero ? Div : Quotient_S_One) -- 2.7.4