From 1cffa4c191856746ad20850949558c8f96b029ee Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 13 Nov 2014 19:49:04 +0000 Subject: [PATCH] R600/SI: Get rid of FCLAMP_SI pseudo It's not necessary. Also use complex patterns to allow src modifier usage. llvm-svn: 221916 --- llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 12 +++++++++ llvm/lib/Target/R600/SIISelLowering.cpp | 14 ----------- llvm/lib/Target/R600/SIInstrInfo.td | 1 + llvm/lib/Target/R600/SIInstructions.td | 14 +++-------- llvm/test/CodeGen/R600/llvm.AMDGPU.clamp.ll | 38 +++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 59f395b..90b6672 100644 --- a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -111,6 +111,9 @@ private: bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Omod) const; + SDNode *SelectADD_SUB_I64(SDNode *N); SDNode *SelectDIV_SCALE(SDNode *N); @@ -1129,6 +1132,15 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, return SelectVOP3Mods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, + SDValue &SrcMods, + SDValue &Omod) const { + // FIXME: Handle Omod + Omod = CurDAG->getTargetConstant(0, MVT::i32); + + return SelectVOP3Mods(In, Src, SrcMods); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast(getTargetLowering()); diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 207539f..25acaa5 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -635,20 +635,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MI->eraseFromParent(); break; } - case AMDGPU::FCLAMP_SI: { - const SIInstrInfo *TII = static_cast( - getTargetMachine().getSubtargetImpl()->getInstrInfo()); - DebugLoc DL = MI->getDebugLoc(); - unsigned DestReg = MI->getOperand(0).getReg(); - BuildMI(*BB, I, DL, TII->get(AMDGPU::V_ADD_F32_e64), DestReg) - .addImm(0) // SRC0 modifiers - .addOperand(MI->getOperand(1)) - .addImm(0) // SRC1 modifiers - .addImm(0) // SRC1 - .addImm(1) // CLAMP - .addImm(0); // OMOD - MI->eraseFromParent(); - } } return BB; } diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td index ec4ac9a..0ea08ea 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.td +++ b/llvm/lib/Target/R600/SIInstrInfo.td @@ -245,6 +245,7 @@ def MUBUFOffset : ComplexPattern; def MUBUFOffsetAtomic : ComplexPattern; def VOP3Mods0 : ComplexPattern; +def VOP3Mods0Clamp : ComplexPattern; def VOP3Mods : ComplexPattern; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 33d2d07..2d730b5 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -2387,18 +2387,10 @@ def : BitConvert ; /********** Src & Dst modifiers **********/ /********** =================== **********/ -def FCLAMP_SI : AMDGPUShaderInst < - (outs VReg_32:$dst), - (ins VSrc_32:$src0), - "fclamp_si $dst, $src0", - [] -> { - let usesCustomInserter = 1; -} - def : Pat < - (AMDGPUclamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)), - (FCLAMP_SI f32:$src) + (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod), + (f32 FP_ZERO), (f32 FP_ONE)), + (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod) >; /********** ================================ **********/ diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.clamp.ll index 9cf7ca8..c6efdb9 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.clamp.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.clamp.ll @@ -1,6 +1,7 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +declare float @llvm.fabs.f32(float) nounwind readnone declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone @@ -17,6 +18,43 @@ define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind { ret void } +; FUNC-LABEL: {{^}}clamp_fabs_0_1_f32: +; SI: s_load_dword [[ARG:s[0-9]+]], +; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, |[[ARG]]| clamp{{$}} +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @clamp_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind { + %src.fabs = call float @llvm.fabs.f32(float %src) nounwind readnone + %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fabs, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}clamp_fneg_0_1_f32: +; SI: s_load_dword [[ARG:s[0-9]+]], +; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -[[ARG]] clamp{{$}} +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @clamp_fneg_0_1_f32(float addrspace(1)* %out, float %src) nounwind { + %src.fneg = fsub float -0.0, %src + %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fneg, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}clamp_fneg_fabs_0_1_f32: +; SI: s_load_dword [[ARG:s[0-9]+]], +; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -|[[ARG]]| clamp{{$}} +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @clamp_fneg_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind { + %src.fabs = call float @llvm.fabs.f32(float %src) nounwind readnone + %src.fneg.fabs = fsub float -0.0, %src.fabs + %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fneg.fabs, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: {{^}}clamp_0_1_amdil_legacy_f32: ; SI: s_load_dword [[ARG:s[0-9]+]], ; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}} -- 2.7.4