From: Tom Stellard Date: Wed, 14 Jan 2015 01:13:19 +0000 (+0000) Subject: R600/SI: Define a schedule model X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ae38f30d7b79319510c15a0413cccd65474f12ac;p=platform%2Fupstream%2Fllvm.git R600/SI: Define a schedule model The machine scheduler is still disabled by default. The schedule model is not complete yet, and could be improved. llvm-svn: 225913 --- diff --git a/llvm/lib/Target/R600/Processors.td b/llvm/lib/Target/R600/Processors.td index fa78ffc..cff97cdb3 100644 --- a/llvm/lib/Target/R600/Processors.td +++ b/llvm/lib/Target/R600/Processors.td @@ -83,34 +83,38 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : Proc<"SI", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"SI", SIFullSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"tahiti", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"tahiti", SIFullSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>; -def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>; +def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>; //===----------------------------------------------------------------------===// // Sea Islands //===----------------------------------------------------------------------===// -def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"bonaire", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>; -def : Proc<"mullins", SI_Itin, [FeatureSeaIslands]>; +def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : Proc<"tonga", SI_Itin, [FeatureVolcanicIslands]>; +//===----------------------------------------------------------------------===// +// Volcanic Islands +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"tonga", SIFullSpeedModel, [FeatureVolcanicIslands]>; -def : Proc<"iceland", SI_Itin, [FeatureVolcanicIslands]>; +def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; -def : Proc<"carrizo", SI_Itin, [FeatureVolcanicIslands]>; +def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>; diff --git a/llvm/lib/Target/R600/SIInstrFormats.td b/llvm/lib/Target/R600/SIInstrFormats.td index ff8db67..99a1df3 100644 --- a/llvm/lib/Target/R600/SIInstrFormats.td +++ b/llvm/lib/Target/R600/SIInstrFormats.td @@ -68,6 +68,7 @@ class InstSI pattern> : // Most instructions require adjustments after selection to satisfy // operand requirements. let hasPostISelHook = 1; + let SchedRW = [Write32Bit]; } class Enc32 { @@ -214,9 +215,9 @@ class SMRDe op, bits<1> imm> : Enc32 { let Inst{31-27} = 0x18; //encoding } +let SchedRW = [WriteSALU] in { class SOP1 pattern> : InstSI { - let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -274,6 +275,8 @@ class SOPP op, dag ins, string asm, list pattern = []> : let UseNamedOperandTable = 1; } +} // let SchedRW = [WriteSALU] + class SMRD pattern> : InstSI { @@ -283,6 +286,7 @@ class SMRD pattern> : let mayLoad = 1; let hasSideEffects = 0; let UseNamedOperandTable = 1; + let SchedRW = [WriteSMEM]; } //===----------------------------------------------------------------------===// @@ -588,6 +592,7 @@ class DS pattern> : let DS = 1; let UseNamedOperandTable = 1; let DisableEncoding = "$m0"; + let SchedRW = [WriteLDS]; } class DS_si op, dag outs, dag ins, string asm, list pattern> : @@ -602,6 +607,7 @@ class MUBUF pattern> : let hasSideEffects = 0; let UseNamedOperandTable = 1; + let SchedRW = [WriteVMEM]; } class MTBUF pattern> : @@ -613,6 +619,7 @@ class MTBUF pattern> : let hasSideEffects = 0; let UseNamedOperandTable = 1; + let SchedRW = [WriteVMEM]; } class FLAT op, dag outs, dag ins, string asm, list pattern> : @@ -641,5 +648,4 @@ class MIMG op, dag outs, dag ins, string asm, list pattern> : } - } // End Uses = [EXEC] diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 980dba7..dd0ee40 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1191,6 +1191,8 @@ defm V_MOV_B32 : VOP1Inst , "v_mov_b32", VOP_I32_I32>; let Uses = [EXEC] in { +// FIXME: Specify SchedRW for READFIRSTLANE_B32 + def V_READFIRSTLANE_B32 : VOP1 < 0x00000002, (outs SReg_32:$vdst), @@ -1201,6 +1203,8 @@ def V_READFIRSTLANE_B32 : VOP1 < } +let SchedRW = [WriteQuarterRate32] in { + defm V_CVT_I32_F64 : VOP1Inst , "v_cvt_i32_f64", VOP_I32_F64, fp_to_sint >; @@ -1253,6 +1257,9 @@ defm V_CVT_U32_F64 : VOP1Inst , "v_cvt_u32_f64", defm V_CVT_F64_U32 : VOP1Inst , "v_cvt_f64_u32", VOP_F64_I32, uint_to_fp >; + +} // let SchedRW = [WriteQuarterRate32] + defm V_FRACT_F32 : VOP1Inst , "v_fract_f32", VOP_F32_F32, AMDGPUfract >; @@ -1271,6 +1278,9 @@ defm V_FLOOR_F32 : VOP1Inst , "v_floor_f32", defm V_EXP_F32 : VOP1Inst , "v_exp_f32", VOP_F32_F32, fexp2 >; + +let SchedRW = [WriteQuarterRate32] in { + defm V_LOG_F32 : VOP1Inst , "v_log_f32", VOP_F32_F32, flog2 >; @@ -1283,18 +1293,32 @@ defm V_RCP_IFLAG_F32 : VOP1Inst , "v_rcp_iflag_f32", defm V_RSQ_F32 : VOP1Inst , "v_rsq_f32", VOP_F32_F32, AMDGPUrsq >; + +} //let SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { + defm V_RCP_F64 : VOP1Inst , "v_rcp_f64", VOP_F64_F64, AMDGPUrcp >; defm V_RSQ_F64 : VOP1Inst , "v_rsq_f64", VOP_F64_F64, AMDGPUrsq >; + +} // let SchedRW = [WriteDouble]; + defm V_SQRT_F32 : VOP1Inst , "v_sqrt_f32", VOP_F32_F32, fsqrt >; + +let SchedRW = [WriteDouble] in { + defm V_SQRT_F64 : VOP1Inst , "v_sqrt_f64", VOP_F64_F64, fsqrt >; + +} // let SchedRW = [WriteDouble] + defm V_SIN_F32 : VOP1Inst , "v_sin_f32", VOP_F32_F32, AMDGPUsin >; @@ -1323,6 +1347,8 @@ defm V_MOVRELSD_B32 : VOP1Inst , "v_movrelsd_b32", VOP_I32_I32> // These instruction only exist on SI and CI let SubtargetPredicate = isSICI in { +let SchedRW = [WriteQuarterRate32] in { + defm V_LOG_CLAMP_F32 : VOP1InstSI , "v_log_clamp_f32", VOP_F32_F32>; defm V_RCP_CLAMP_F32 : VOP1InstSI , "v_rcp_clamp_f32", VOP_F32_F32>; defm V_RCP_LEGACY_F32 : VOP1InstSI , "v_rcp_legacy_f32", VOP_F32_F32>; @@ -1332,17 +1358,25 @@ defm V_RSQ_CLAMP_F32 : VOP1InstSI , "v_rsq_clamp_f32", defm V_RSQ_LEGACY_F32 : VOP1InstSI , "v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy >; + +} // End let SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { + defm V_RCP_CLAMP_F64 : VOP1InstSI , "v_rcp_clamp_f64", VOP_F64_F64>; defm V_RSQ_CLAMP_F64 : VOP1InstSI , "v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamped >; +} // End SchedRW = [WriteDouble] + } // End SubtargetPredicate = isSICI //===----------------------------------------------------------------------===// // VINTRP Instructions //===----------------------------------------------------------------------===// +// FIXME: Specify SchedRW for VINTRP insturctions. defm V_INTERP_P1_F32 : VINTRP_m < 0x00000000, "v_interp_p1_f32", (outs VGPR_32:$dst), @@ -1656,11 +1690,15 @@ defm V_SAD_U32 : VOP3Inst , "v_sad_u32", defm V_DIV_FIXUP_F32 : VOP3Inst < vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup >; + +let SchedRW = [WriteDouble] in { + defm V_DIV_FIXUP_F64 : VOP3Inst < vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup >; -// Only on SI +} // let SchedRW = [WriteDouble] + defm V_LSHL_B64 : VOP3Inst , "v_lshl_b64", VOP_I64_I64_I32, shl >; @@ -1675,6 +1713,7 @@ defm V_ASHR_I64 : VOP3Inst , "v_ashr_i64", VOP_I64_I64_I32, sra >; +let SchedRW = [WriteDouble] in { let isCommutable = 1 in { defm V_ADD_F64 : VOP3Inst , "v_add_f64", @@ -1697,7 +1736,9 @@ defm V_LDEXP_F64 : VOP3Inst , "v_ldexp_f64", VOP_F64_F64_I32, AMDGPUldexp >; -let isCommutable = 1 in { +} // let SchedRW = [WriteDouble] + +let isCommutable = 1, SchedRW = [WriteQuarterRate32] in { defm V_MUL_LO_U32 : VOP3Inst , "v_mul_lo_u32", VOP_I32_I32_I32 @@ -1713,30 +1754,37 @@ defm V_MUL_HI_I32 : VOP3Inst , "v_mul_hi_i32", VOP_I32_I32_I32 >; -} // isCommutable = 1 +} // isCommutable = 1, SchedRW = [WriteQuarterRate32] defm V_DIV_SCALE_F32 : VOP3b_32 , "v_div_scale_f32", []>; +let SchedRW = [WriteDouble] in { // Double precision division pre-scale. defm V_DIV_SCALE_F64 : VOP3b_64 , "v_div_scale_f64", []>; +} // let SchedRW = [WriteDouble] let isCommutable = 1 in { defm V_DIV_FMAS_F32 : VOP3Inst , "v_div_fmas_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fmas >; +let SchedRW = [WriteDouble] in { defm V_DIV_FMAS_F64 : VOP3Inst , "v_div_fmas_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fmas >; +} // End SchedRW = [WriteDouble] } // End isCommutable = 1 //def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>; //def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>; //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>; +let SchedRW = [WriteDouble] in { defm V_TRIG_PREOP_F64 : VOP3Inst < vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop >; +} // let SchedRW = [WriteDouble] + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/R600/SISchedule.td b/llvm/lib/Target/R600/SISchedule.td index 28b65b8..9b1f676 100644 --- a/llvm/lib/Target/R600/SISchedule.td +++ b/llvm/lib/Target/R600/SISchedule.td @@ -7,9 +7,85 @@ // //===----------------------------------------------------------------------===// // -// TODO: This is just a place holder for now. +// MachineModel definitions for Southern Islands (SI) // //===----------------------------------------------------------------------===// +def WriteBranch : SchedWrite; +def WriteExport : SchedWrite; +def WriteLDS : SchedWrite; +def WriteSALU : SchedWrite; +def WriteSMEM : SchedWrite; +def WriteVMEM : SchedWrite; -def SI_Itin : ProcessorItineraries <[], [], []>; +// Vector ALU instructions +def Write32Bit : SchedWrite; +def WriteQuarterRate32 : SchedWrite; + +def WriteFloatFMA : SchedWrite; + +def WriteDouble : SchedWrite; +def WriteDoubleAdd : SchedWrite; + +def SIFullSpeedModel : SchedMachineModel; +def SIQuarterSpeedModel : SchedMachineModel; + +// BufferSize = 0 means the processors are in-order. +let BufferSize = 0 in { + +// XXX: Are the resource counts correct? +def HWBranch : ProcResource<1>; +def HWExport : ProcResource<7>; // Taken from S_WAITCNT +def HWLGKM : ProcResource<31>; // Taken from S_WAITCNT +def HWSALU : ProcResource<1>; +def HWVMEM : ProcResource<15>; // Taken from S_WAITCNT +def HWVALU : ProcResource<1>; + +} + +class HWWriteRes resources, + int latency> : WriteRes { + let Latency = latency; +} + +class HWVALUWriteRes : + HWWriteRes; + + +// The latency numbers are taken from AMD Accelerated Parallel Processing +// guide. They may not be acurate. + +// The latency values are 1 / (operations / cycle) / 4. +multiclass SICommonWriteRes { + + def : HWWriteRes; // XXX: Guessed ??? + def : HWWriteRes; // XXX: Guessed ??? + def : HWWriteRes; // 2 - 64 + def : HWWriteRes; + def : HWWriteRes; // XXX: Guessed ??? + def : HWWriteRes; // 300 - 600 + + def : HWVALUWriteRes; + def : HWVALUWriteRes; +} + + +let SchedModel = SIFullSpeedModel in { + +defm : SICommonWriteRes; + +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; + +} // End SchedModel = SIFullSpeedModel + +let SchedModel = SIQuarterSpeedModel in { + +defm : SICommonWriteRes; + +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; + +} // End SchedModel = SIQuarterSpeedModel