From d0101a2dfd8f81f57989743afe812b0b47acab87 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 6 Jan 2015 23:00:46 +0000 Subject: [PATCH] R600/SI: Add combine for isinfinite pattern llvm-svn: 225310 --- llvm/lib/Target/R600/SIISelLowering.cpp | 56 ++++++++++++++++++++++ llvm/lib/Target/R600/SIISelLowering.h | 1 + llvm/test/CodeGen/R600/fp-classify.ll | 85 +++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 0690792..a211504 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -218,6 +218,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::FMAXNUM); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::UINT_TO_FP); @@ -1302,6 +1303,59 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset); } +SDValue SITargetLowering::performAndCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + if (DCI.isBeforeLegalize()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + + // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) -> + // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity) + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + if (LHS.getOpcode() == ISD::SETCC && + RHS.getOpcode() == ISD::SETCC) { + ISD::CondCode LCC = cast(LHS.getOperand(2))->get(); + ISD::CondCode RCC = cast(RHS.getOperand(2))->get(); + + SDValue X = LHS.getOperand(0); + SDValue Y = RHS.getOperand(0); + if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X) + return SDValue(); + + if (LCC == ISD::SETO) { + if (X != LHS.getOperand(1)) + return SDValue(); + + if (RCC == ISD::SETUNE) { + const ConstantFPSDNode *C1 = dyn_cast(RHS.getOperand(1)); + if (!C1 || !C1->isInfinity() || C1->isNegative()) + return SDValue(); + + const uint32_t Mask = SIInstrFlags::N_NORMAL | + SIInstrFlags::N_SUBNORMAL | + SIInstrFlags::N_ZERO | + SIInstrFlags::P_ZERO | + SIInstrFlags::P_SUBNORMAL | + SIInstrFlags::P_NORMAL; + + static_assert(((~(SIInstrFlags::S_NAN | + SIInstrFlags::Q_NAN | + SIInstrFlags::N_INFINITY | + SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask, + "mask not equal"); + + return DAG.getNode(AMDGPUISD::FP_CLASS, SDLoc(N), MVT::i1, + X, DAG.getConstant(Mask, MVT::i32)); + } + } + } + + return SDValue(); +} + SDValue SITargetLowering::performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1607,6 +1661,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, } break; } + case ISD::AND: + return performAndCombine(N, DCI); case ISD::OR: return performOrCombine(N, DCI); case AMDGPUISD::FP_CLASS: diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h index 44b25dc..8b41245 100644 --- a/llvm/lib/Target/R600/SIISelLowering.h +++ b/llvm/lib/Target/R600/SIISelLowering.h @@ -58,6 +58,7 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue performSHLPtrCombine(SDNode *N, unsigned AS, DAGCombinerInfo &DCI) const; + SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/test/CodeGen/R600/fp-classify.ll b/llvm/test/CodeGen/R600/fp-classify.ll index e6ca5ef..a1b2f08 100644 --- a/llvm/test/CodeGen/R600/fp-classify.ll +++ b/llvm/test/CodeGen/R600/fp-classify.ll @@ -41,5 +41,90 @@ define void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture %out, float %x ret void } +; SI-LABEL: {{^}}test_isfinite_pattern_0: +; SI-NOT: v_cmp +; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8{{$}} +; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]] +; SI-NOT: v_cmp +; SI: s_endpgm +define void @test_isfinite_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Use negative infinity +; SI-LABEL: {{^}}test_isfinite_not_pattern_0: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp une float %x.fabs, 0xFFF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; No fabs +; SI-LABEL: {{^}}test_isfinite_not_pattern_1: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %ninf = fcmp une float %x, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; fabs of different value +; SI-LABEL: {{^}}test_isfinite_not_pattern_2: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_2(i32 addrspace(1)* nocapture %out, float %x, float %y) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %y) #1 + %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Wrong ordered compare type +; SI-LABEL: {{^}}test_isfinite_not_pattern_3: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_3(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp uno float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Wrong unordered compare +; SI-LABEL: {{^}}test_isfinite_not_pattern_4: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_4(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp one float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -- 2.7.4