From: Matt Arsenault Date: Thu, 13 Nov 2014 23:03:09 +0000 (+0000) Subject: R600/SI: Fix fmin_legacy / fmax_legacy matching for SI X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=da59f3de450d31b8bc9f21e217d45f6f9ceeadd7;p=platform%2Fupstream%2Fllvm.git R600/SI: Fix fmin_legacy / fmax_legacy matching for SI select_cc is expanded on SI, so this was never matched. llvm-svn: 221941 --- diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 5561bf8..6d608d1 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -378,6 +378,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::STORE); @@ -999,21 +1000,21 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, } /// \brief Generate Min/Max node -SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, +SDValue AMDGPUTargetLowering::CombineMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, SelectionDAG &DAG) const { - SDLoc DL(N); - EVT VT = N->getValueType(0); - - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue True = N->getOperand(2); - SDValue False = N->getOperand(3); - SDValue CC = N->getOperand(4); + if (VT != MVT::f32 && + (VT != MVT::f64 || + Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)) + return SDValue(); - if (VT != MVT::f32 || - !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); - } ISD::CondCode CCOpcode = cast(CC)->get(); switch (CCOpcode) { @@ -1029,14 +1030,15 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, case ISD::SETTRUE2: case ISD::SETUO: case ISD::SETO: - llvm_unreachable("Operation should already be optimised!"); + break; case ISD::SETULE: case ISD::SETULT: case ISD::SETOLE: case ISD::SETOLT: case ISD::SETLE: case ISD::SETLT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::FMIN : AMDGPUISD::FMAX; + unsigned Opc + = (LHS == True) ? AMDGPUISD::FMIN_LEGACY : AMDGPUISD::FMAX_LEGACY; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETGT: @@ -1045,7 +1047,8 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, case ISD::SETOGE: case ISD::SETUGT: case ISD::SETOGT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::FMAX : AMDGPUISD::FMIN; + unsigned Opc + = (LHS == True) ? AMDGPUISD::FMAX_LEGACY : AMDGPUISD::FMIN_LEGACY; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETCC_INVALID: @@ -2110,9 +2113,37 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, simplifyI24(N1, DCI); return SDValue(); } - case ISD::SELECT_CC: { - return CombineMinMax(N, DAG); + case ISD::SELECT_CC: { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue False = N->getOperand(3); + SDValue CC = N->getOperand(4); + + return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); + } + case ISD::SELECT: { + SDValue Cond = N->getOperand(0); + if (Cond.getOpcode() == ISD::SETCC) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + SDValue CC = Cond.getOperand(2); + + SDValue True = N->getOperand(1); + SDValue False = N->getOperand(2); + + + return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); } + + break; + } case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { assert(!N->getValueType(0).isVector() && @@ -2289,10 +2320,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FRACT) NODE_NAME_CASE(CLAMP) NODE_NAME_CASE(MAD) - NODE_NAME_CASE(FMAX) + NODE_NAME_CASE(FMAX_LEGACY) NODE_NAME_CASE(SMAX) NODE_NAME_CASE(UMAX) - NODE_NAME_CASE(FMIN) + NODE_NAME_CASE(FMIN_LEGACY) NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) NODE_NAME_CASE(URECIP) diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h index ea60c1b..793c847 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.h +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h @@ -140,7 +140,14 @@ public: SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; - SDValue CombineMinMax(SDNode *N, SelectionDAG &DAG) const; + SDValue CombineMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + SelectionDAG &DAG) const; const char* getTargetNodeName(unsigned Opcode) const override; virtual SDNode *PostISelFolding(MachineSDNode *N, @@ -188,10 +195,10 @@ enum { // Denormals handled on some parts. COS_HW, SIN_HW, - FMAX, + FMAX_LEGACY, SMAX, UMAX, - FMIN, + FMIN_LEGACY, SMIN, UMIN, URECIP, diff --git a/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/llvm/lib/Target/R600/AMDGPUInstrInfo.td index 3d70791..037767d 100644 --- a/llvm/lib/Target/R600/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/R600/AMDGPUInstrInfo.td @@ -58,9 +58,12 @@ def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>; def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; -// out = max(a, b) a and b are floats -def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative] +// out = max(a, b) a and b are floats, where a nan comparison fails. +// This is not commutative because this gives the second operand: +// x < nan ? x : nan -> nan +// nan < x ? nan : x -> x +def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp, + [SDNPAssociative] >; def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>; @@ -76,9 +79,9 @@ def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] >; -// out = min(a, b) a and b are floats -def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative] +// out = min(a, b) a and b are floats, where a nan comparison fails. +def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, + [SDNPAssociative] >; // out = min(a, b) a snd b are signed ints @@ -137,7 +140,7 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", // MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src) // // src0: vec4(src, 0, 0, mask) -// src1: dst - rat offset (aka pointer) in dwords +// src1: dst - rat offset (aka pointer) in dwords def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", SDTypeProfile<0, 2, []>, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; diff --git a/llvm/lib/Target/R600/R600Instructions.td b/llvm/lib/Target/R600/R600Instructions.td index 7ce05a2..69695a3 100644 --- a/llvm/lib/Target/R600/R600Instructions.td +++ b/llvm/lib/Target/R600/R600Instructions.td @@ -674,8 +674,9 @@ def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; // Non-IEEE MUL: 0 * anything = 0 def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; -def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; -def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; +// TODO: Do these actually match the regular fmin/fmax behavior? +def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>; +def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>; // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, // so some of the instruction names don't match the asm string. diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 14ad452..9d4b8bb 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1398,11 +1398,11 @@ defm V_MUL_U32_U24 : VOP2Inst , "v_mul_u32_u24", defm V_MIN_LEGACY_F32 : VOP2Inst , "v_min_legacy_f32", - VOP_F32_F32_F32, AMDGPUfmin + VOP_F32_F32_F32, AMDGPUfmin_legacy >; defm V_MAX_LEGACY_F32 : VOP2Inst , "v_max_legacy_f32", - VOP_F32_F32_F32, AMDGPUfmax + VOP_F32_F32_F32, AMDGPUfmax_legacy >; defm V_MIN_F32 : VOP2Inst , "v_min_f32", VOP_F32_F32_F32, fminnum>; diff --git a/llvm/test/CodeGen/R600/fcmp64.ll b/llvm/test/CodeGen/R600/fcmp64.ll index 3cf4a3b..dc24443 100644 --- a/llvm/test/CodeGen/R600/fcmp64.ll +++ b/llvm/test/CodeGen/R600/fcmp64.ll @@ -1,60 +1,55 @@ ; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s -; CHECK: {{^}}flt_f64: +; CHECK-LABEL: {{^}}flt_f64: ; CHECK: v_cmp_lt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} - -define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 %r1 = load double addrspace(1)* %in2 %r2 = fcmp ult double %r0, %r1 - %r3 = select i1 %r2, double %r0, double %r1 - store double %r3, double addrspace(1)* %out + %r3 = zext i1 %r2 to i32 + store i32 %r3, i32 addrspace(1)* %out ret void } -; CHECK: {{^}}fle_f64: +; CHECK-LABEL: {{^}}fle_f64: ; CHECK: v_cmp_le_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} - -define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 %r1 = load double addrspace(1)* %in2 %r2 = fcmp ule double %r0, %r1 - %r3 = select i1 %r2, double %r0, double %r1 - store double %r3, double addrspace(1)* %out + %r3 = zext i1 %r2 to i32 + store i32 %r3, i32 addrspace(1)* %out ret void } -; CHECK: {{^}}fgt_f64: +; CHECK-LABEL: {{^}}fgt_f64: ; CHECK: v_cmp_gt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} - -define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 %r1 = load double addrspace(1)* %in2 %r2 = fcmp ugt double %r0, %r1 - %r3 = select i1 %r2, double %r0, double %r1 - store double %r3, double addrspace(1)* %out + %r3 = zext i1 %r2 to i32 + store i32 %r3, i32 addrspace(1)* %out ret void } -; CHECK: {{^}}fge_f64: +; CHECK-LABEL: {{^}}fge_f64: ; CHECK: v_cmp_ge_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} - -define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 %r1 = load double addrspace(1)* %in2 %r2 = fcmp uge double %r0, %r1 - %r3 = select i1 %r2, double %r0, double %r1 - store double %r3, double addrspace(1)* %out + %r3 = zext i1 %r2 to i32 + store i32 %r3, i32 addrspace(1)* %out ret void } -; CHECK: {{^}}fne_f64: +; CHECK-LABEL: {{^}}fne_f64: ; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} - define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 @@ -65,9 +60,8 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1, ret void } -; CHECK: {{^}}feq_f64: +; CHECK-LABEL: {{^}}feq_f64: ; CHECK: v_cmp_eq_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} - define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 diff --git a/llvm/test/CodeGen/R600/fmax_legacy.ll b/llvm/test/CodeGen/R600/fmax_legacy.ll new file mode 100644 index 0000000..9bfff8a7 --- /dev/null +++ b/llvm/test/CodeGen/R600/fmax_legacy.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: @test_fmax_legacy_uge_f32 +; SI: v_max_legacy_f32_e32 +; EG: MAX +define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float %a, float %b) nounwind { + %cmp = fcmp uge float %a, %b + %val = select i1 %cmp, float %a, float %b + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_fmax_legacy_oge_f32 +; SI: v_max_legacy_f32_e32 +; EG: MAX +define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float %a, float %b) nounwind { + %cmp = fcmp oge float %a, %b + %val = select i1 %cmp, float %a, float %b + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_fmax_legacy_ugt_f32 +; SI: v_max_legacy_f32_e32 +; EG: MAX +define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float %a, float %b) nounwind { + %cmp = fcmp ugt float %a, %b + %val = select i1 %cmp, float %a, float %b + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_fmax_legacy_ogt_f32 +; SI: v_max_legacy_f32_e32 +; EG: MAX +define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float %a, float %b) nounwind { + %cmp = fcmp ogt float %a, %b + %val = select i1 %cmp, float %a, float %b + store float %val, float addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/R600/fmin_legacy.ll b/llvm/test/CodeGen/R600/fmin_legacy.ll new file mode 100644 index 0000000..5840e7e --- /dev/null +++ b/llvm/test/CodeGen/R600/fmin_legacy.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: @test_fmin_legacy_f32 +; EG: MIN * +; SI: v_min_legacy_f32_e32 +define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) nounwind { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 + %r2 = fcmp uge float %r0, %r1 + %r3 = select i1 %r2, float %r1, float %r0 + %vec = insertelement <4 x float> undef, float %r3, i32 0 + store <4 x float> %vec, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_ule_f32 +; SI: v_min_legacy_f32_e32 +define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float %a, float %b) nounwind { + %cmp = fcmp ule float %a, %b + %val = select i1 %cmp, float %a, float %b + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_ole_f32 +; SI: v_min_legacy_f32_e32 +define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float %a, float %b) nounwind { + %cmp = fcmp ole float %a, %b + %val = select i1 %cmp, float %a, float %b + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_olt_f32 +; SI: v_min_legacy_f32_e32 +define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float %a, float %b) nounwind { + %cmp = fcmp olt float %a, %b + %val = select i1 %cmp, float %a, float %b + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_ult_f32 +; SI: v_min_legacy_f32_e32 +define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float %a, float %b) nounwind { + %cmp = fcmp ult float %a, %b + %val = select i1 %cmp, float %a, float %b + store float %val, float addrspace(1)* %out, align 4 + ret void +}