From: Matt Arsenault Date: Thu, 2 Feb 2023 14:14:36 +0000 (-0400) Subject: CodeGen: Reorder case handling for is.fpclass legalization X-Git-Tag: upstream/17.0.6~14447 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9356ec1516cae84837629ef19b7158c0e1155852;p=platform%2Fupstream%2Fllvm.git CodeGen: Reorder case handling for is.fpclass legalization Subnormal and zero checks can be combined into one, so move the code closer to reduce the diff in a future change. --- diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 1406fec..f0a5b2e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7438,6 +7438,20 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { AsInt, SignBitC)); } + if (FPClassTest PartialCheck = Mask & fcSubnormal) { + // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set) + // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set) + auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs; + auto OneC = MIRBuilder.buildConstant(IntTy, 1); + auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC); + auto SubnormalRes = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne, + MIRBuilder.buildConstant(IntTy, AllOneMantissa)); + if (PartialCheck == fcNegSubnormal) + SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign); + appendToRes(SubnormalRes); + } + if (FPClassTest PartialCheck = Mask & fcInf) { if (PartialCheck == fcPosInf) appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, @@ -7474,20 +7488,6 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { } } - if (FPClassTest PartialCheck = Mask & fcSubnormal) { - // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set) - // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set) - auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs; - auto OneC = MIRBuilder.buildConstant(IntTy, 1); - auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC); - auto SubnormalRes = - MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne, - MIRBuilder.buildConstant(IntTy, AllOneMantissa)); - if (PartialCheck == fcNegSubnormal) - SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign); - appendToRes(SubnormalRes); - } - if (FPClassTest PartialCheck = Mask & fcNormal) { // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u< // (max_exp-1)) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 362330c..6d0de755 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8149,6 +8149,19 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, appendResult(PartialRes); } + if (unsigned PartialCheck = Test & fcSubnormal) { + // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set) + // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set) + SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV; + SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT); + SDValue VMinusOneV = + DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT)); + PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT); + if (PartialCheck == fcNegSubnormal) + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + appendResult(PartialRes); + } + if (unsigned PartialCheck = Test & fcInf) { if (PartialCheck == fcPosInf) PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ); @@ -8193,19 +8206,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, appendResult(PartialRes); } - if (unsigned PartialCheck = Test & fcSubnormal) { - // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set) - // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set) - SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV; - SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT); - SDValue VMinusOneV = - DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT)); - PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT); - if (PartialCheck == fcNegSubnormal) - PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); - appendResult(PartialRes); - } - if (unsigned PartialCheck = Test & fcNormal) { // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1)) APInt ExpLSB = ExpMask & ~(ExpMask.shl(1)); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index 03d9f77..35090ec 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1758,16 +1758,16 @@ define i1 @not_isnormal_f16(half %x) { ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 -; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00 +; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff ; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v1 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v2 +; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v0 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] @@ -1827,19 +1827,19 @@ define i1 @not_is_plus_normal_f16(half %x) { ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16 -; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v2 +; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2 +; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3 +; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2 -; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc -; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v1 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -1905,19 +1905,19 @@ define i1 @not_is_neg_normal_f16(half %x) { ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16 -; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v2 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2 +; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3 +; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc -; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v1 -; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2 ; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -2155,15 +2155,15 @@ define i1 @not_iszero_f16(half %x) { ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 -; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2 +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 +; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v1 +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -2659,7 +2659,7 @@ define i1 @not_iszero_or_nan_f16(half %x) { ; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff ; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 @@ -2671,13 +2671,13 @@ define i1 @not_iszero_or_nan_f16(half %x) { ; GFX7GLISEL: ; %bb.0: ; %entry ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v1, s[4:5], 1, v0 +; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2 +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -2735,7 +2735,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 { ; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff ; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 @@ -2747,13 +2747,13 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 { ; GFX7GLISEL: ; %bb.0: ; %entry ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v1, s[4:5], 1, v0 +; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2 +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -2811,7 +2811,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 { ; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff ; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 @@ -2823,13 +2823,13 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 { ; GFX7GLISEL: ; %bb.0: ; %entry ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7GLISEL-NEXT: v_subrev_i32_e64 v1, s[4:5], 1, v0 +; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2 +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -3009,18 +3009,18 @@ define i1 @not_iszero_or_qnan_f16(half %x) { ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7e00 -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7c00 +; GFX7SELDAG-NEXT: s_movk_i32 s8, 0x7c00 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 -; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], s6, v0 -; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s6, v0 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7SELDAG-NEXT: v_add_i32_e32 v1, vcc, -1, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x3ff -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v1 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e64 s[4:5], s8, v0 +; GFX7SELDAG-NEXT: s_and_b64 s[6:7], s[4:5], vcc +; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff +; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s8, v0 +; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 ; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 +; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 ; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 @@ -3032,22 +3032,22 @@ define i1 @not_iszero_or_qnan_f16(half %x) { ; GFX7GLISEL: ; %bb.0: ; %entry ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 -; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v1 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v1, v2 -; GFX7GLISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 +; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s8, v1 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00 +; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v1 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2 +; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800 +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] @@ -3094,15 +3094,15 @@ define i1 @not_iszero_or_snan_f16(half %x) { ; GFX7SELDAG: ; %bb.0: ; %entry ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7dff -; GFX7SELDAG-NEXT: s_movk_i32 s5, 0x7c00 -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x3ff +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 +; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7dff ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 -; GFX7SELDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s5, v0 +; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff +; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 ; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GFX7SELDAG-NEXT: v_add_i32_e32 v1, vcc, -1, v0 -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v1 +; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s6, v0 ; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -3116,16 +3116,16 @@ define i1 @not_iszero_or_snan_f16(half %x) { ; GFX7GLISEL: ; %bb.0: ; %entry ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00 -; GFX7GLISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2 +; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00 +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll index 18a5b27..a12efe7 100644 --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -1922,6 +1922,9 @@ entry: define i1 @not_iszero_or_qnan_f(float %x) { ; CHECK-32-LABEL: not_iszero_or_qnan_f: ; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: pushl %esi +; CHECK-32-NEXT: .cfi_def_cfa_offset 8 +; CHECK-32-NEXT: .cfi_offset %esi, -8 ; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 @@ -1931,15 +1934,17 @@ define i1 @not_iszero_or_qnan_f(float %x) { ; CHECK-32-NEXT: andb %cl, %dl ; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 ; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: orb %dl, %cl -; CHECK-32-NEXT: leal -1(%eax), %edx -; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %dl +; CHECK-32-NEXT: leal -1(%eax), %esi +; CHECK-32-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF +; CHECK-32-NEXT: setb %ch +; CHECK-32-NEXT: orb %cl, %ch ; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 ; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 ; CHECK-32-NEXT: setb %al ; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: orb %ch, %al +; CHECK-32-NEXT: popl %esi +; CHECK-32-NEXT: .cfi_def_cfa_offset 4 ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_iszero_or_qnan_f: @@ -1953,15 +1958,15 @@ define i1 @not_iszero_or_qnan_f(float %x) { ; CHECK-64-NEXT: andb %cl, %dl ; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 ; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: orb %dl, %cl -; CHECK-64-NEXT: leal -1(%rax), %edx -; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %dl +; CHECK-64-NEXT: leal -1(%rax), %esi +; CHECK-64-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF +; CHECK-64-NEXT: setb %sil +; CHECK-64-NEXT: orb %cl, %sil ; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 ; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 ; CHECK-64-NEXT: setb %al ; CHECK-64-NEXT: orb %dl, %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: orb %sil, %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 925) ; ~(0x60|0x2) = "~(zero|qnan)" @@ -1973,38 +1978,38 @@ define i1 @not_iszero_or_snan_f(float %x) { ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %cl ; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %dl -; CHECK-32-NEXT: orb %cl, %dl -; CHECK-32-NEXT: leal -1(%eax), %ecx -; CHECK-32-NEXT: cmpl $8388607, %ecx # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %cl -; CHECK-32-NEXT: orb %dl, %cl +; CHECK-32-NEXT: sete %cl +; CHECK-32-NEXT: leal -1(%eax), %edx +; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF +; CHECK-32-NEXT: setb %dl +; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; CHECK-32-NEXT: setge %ch +; CHECK-32-NEXT: orb %cl, %ch +; CHECK-32-NEXT: orb %dl, %ch ; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 ; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 ; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: orb %ch, %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_iszero_or_snan_f: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax ; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %cl ; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %dl -; CHECK-64-NEXT: orb %cl, %dl -; CHECK-64-NEXT: leal -1(%rax), %ecx -; CHECK-64-NEXT: cmpl $8388607, %ecx # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %cl -; CHECK-64-NEXT: orb %dl, %cl +; CHECK-64-NEXT: sete %cl +; CHECK-64-NEXT: leal -1(%rax), %edx +; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF +; CHECK-64-NEXT: setb %dl +; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; CHECK-64-NEXT: setge %sil +; CHECK-64-NEXT: orb %cl, %sil +; CHECK-64-NEXT: orb %dl, %sil ; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 ; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 ; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: orb %sil, %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 926) ; ~(0x60|0x1) = "~(zero|snan)"