From 61738cbcb6c40cdaf6a6c560787b07373415114c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 27 Feb 2016 08:53:46 +0000 Subject: [PATCH] AMDGPU: Implement readcyclecounter This matches the behavior of the HSAIL clock instruction. s_realmemtime is used if the subtarget supports it, and falls back to s_memtime if not. Also introduces new intrinsics for each of s_memtime / s_memrealtime. llvm-svn: 262119 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 7 +++++ llvm/lib/Target/AMDGPU/AMDGPU.td | 8 +++++- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 3 +- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 1 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 ++ llvm/lib/Target/AMDGPU/SIInstrInfo.td | 32 ++++++++++++++++++---- llvm/lib/Target/AMDGPU/SIInstructions.td | 17 +++++++++++- llvm/lib/Target/AMDGPU/VIInstructions.td | 14 +++++++++- .../CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll | 22 +++++++++++++++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll | 23 ++++++++++++++++ llvm/test/CodeGen/AMDGPU/readcyclecounter.ll | 25 +++++++++++++++++ llvm/test/MC/AMDGPU/smrd.s | 3 ++ 12 files changed, 148 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll create mode 100644 llvm/test/CodeGen/AMDGPU/readcyclecounter.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 54725c5..425f34d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -188,6 +188,10 @@ def int_amdgcn_s_dcache_inv : GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, Intrinsic<[], [], []>; +def int_amdgcn_s_memtime : + GCCBuiltin<"__builtin_amdgcn_s_memtime">, + Intrinsic<[llvm_i64_ty], [], []>; + def int_amdgcn_dispatch_ptr : GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; @@ -246,4 +250,7 @@ def int_amdgcn_s_dcache_wb_vol : GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, Intrinsic<[], [], []>; +def int_amdgcn_s_memrealtime : + GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, + Intrinsic<[llvm_i64_ty], [], []>; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5321fe1..145fadc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -149,6 +149,12 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts", "Additional intstructions for CI+" >; +def FeatureVIInsts : SubtargetFeature<"vi-insts", + "VIInsts", + "true", + "Additional intstructions for VI+" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -308,7 +314,7 @@ def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, - FeatureGCN3Encoding, FeatureCIInsts] + FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts] >; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 8e58aae..91d1aec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -81,7 +81,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), MaxPrivateElementSize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), - GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), + GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), VIInsts(false), + LDSBankCount(0), IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false), EnableSIScheduler(false), FrameLowering(nullptr), InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index c943b2c..787c04a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -88,6 +88,7 @@ private: bool GCN1Encoding; bool GCN3Encoding; bool CIInsts; + bool VIInsts; bool FeatureDisable; int LDSBankCount; unsigned IsaVersion; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ada827d..46b73f7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -135,6 +135,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); + // On SI this is s_memtime and s_memrealtime on VI. + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + for (MVT VT : MVT::integer_valuetypes()) { if (VT == MVT::i64) continue; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index d99028f..a63df88 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1077,23 +1077,31 @@ multiclass SMRD_m { - let hasSideEffects = 1, mayStore = 1 in { - def "" : SMRD_Pseudo ; +multiclass SMRD_Special pattern = []> { + let hasSideEffects = 1 in { + def "" : SMRD_Pseudo ; let sbase = 0, offset = 0 in { let sdst = 0 in { - def _si : SMRD_Real_si ; + def _si : SMRD_Real_si ; } let glc = 0, sdata = 0 in { - def _vi : SMRD_Real_vi ; + def _vi : SMRD_Real_vi ; } } } } +multiclass SMRD_Inval { + let mayStore = 1 in { + defm : SMRD_Special; + } +} + class SMEM_Inval op, string opName, SDPatternOperator node> : SMRD_Real_vi { let hasSideEffects = 1; @@ -1104,6 +1112,18 @@ class SMEM_Inval op, string opName, SDPatternOperator node> : let offset = 0; } +class SMEM_Ret op, string opName, SDPatternOperator node> : + SMRD_Real_vi { + let hasSideEffects = 1; + let mayStore = ?; + let mayLoad = ?; + let sbase = 0; + let sdata = 0; + let glc = 0; + let offset = 0; +} + multiclass SMRD_Helper { defm _IMM : SMRD_m < diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index debf9ae..def2f26 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -88,7 +88,15 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512 >; -//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>; +let mayStore = ? in { +// FIXME: mayStore = ? is a workaround for tablegen bug for different +// inferred mayStore flags for the instruction pattern vs. standalone +// Pat. Each considers the other contradictory. + +defm S_MEMTIME : SMRD_Special , "s_memtime", + (outs SReg_64:$dst), " $dst", [(set i64:$dst, (int_amdgcn_s_memtime))] +>; +} defm S_DCACHE_INV : SMRD_Inval , "s_dcache_inv", int_amdgcn_s_dcache_inv>; @@ -3151,6 +3159,13 @@ defm : BFMPatterns ; def : BFEPattern ; +let Predicates = [isSICI] in { +def : Pat < + (i64 (readcyclecounter)), + (S_MEMTIME) +>; +} + //===----------------------------------------------------------------------===// // Fract Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VIInstructions.td b/llvm/lib/Target/AMDGPU/VIInstructions.td index 807d461..b998b8a 100644 --- a/llvm/lib/Target/AMDGPU/VIInstructions.td +++ b/llvm/lib/Target/AMDGPU/VIInstructions.td @@ -103,6 +103,9 @@ def S_DCACHE_WB : SMEM_Inval <0x21, def S_DCACHE_WB_VOL : SMEM_Inval <0x23, "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; +def S_MEMREALTIME : SMEM_Ret<0x25, + "s_memrealtime", int_amdgcn_s_memrealtime>; + } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI let Predicates = [isVI] in { @@ -114,7 +117,7 @@ def : Pat < >; //===----------------------------------------------------------------------===// -// DPP Paterns +// DPP Patterns //===----------------------------------------------------------------------===// def : Pat < @@ -124,4 +127,13 @@ def : Pat < (as_i32imm $bank_mask), (as_i32imm $row_mask)) >; +//===----------------------------------------------------------------------===// +// Misc Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (i64 (readcyclecounter)), + (S_MEMREALTIME) +>; + } // End Predicates = [isVI] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll new file mode 100644 index 0000000..372cba6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.s.memrealtime() #0 + +; GCN-LABEL: {{^}}test_s_memrealtime: +; GCN-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}} +; GCN-DAG: s_load_dwordx2 +; GCN: lgkmcnt +; GCN: buffer_store_dwordx2 +; GCN-NOT: lgkmcnt +; GCN: s_memrealtime s{{\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dwordx2 +define void @test_s_memrealtime(i64 addrspace(1)* %out) #0 { + %cycle0 = call i64 @llvm.amdgcn.s.memrealtime() + store volatile i64 %cycle0, i64 addrspace(1)* %out + + %cycle1 = call i64 @llvm.amdgcn.s.memrealtime() + store volatile i64 %cycle1, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll new file mode 100644 index 0000000..8ce2d48 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.s.memtime() #0 + +; GCN-LABEL: {{^}}test_s_memtime: +; GCN-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}} +; GCN-DAG: s_load_dwordx2 +; GCN: lgkmcnt +; GCN: buffer_store_dwordx2 +; GCN-NOT: lgkmcnt +; GCN: s_memtime s{{\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dwordx2 +define void @test_s_memtime(i64 addrspace(1)* %out) #0 { + %cycle0 = call i64 @llvm.amdgcn.s.memtime() + store volatile i64 %cycle0, i64 addrspace(1)* %out + + %cycle1 = call i64 @llvm.amdgcn.s.memtime() + store volatile i64 %cycle1, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll new file mode 100644 index 0000000..e6d0efd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +declare i64 @llvm.readcyclecounter() #0 + +; GCN-LABEL: {{^}}test_readcyclecounter: +; SI-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}} +; VI-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}} +; GCN-DAG: s_load_dwordx2 +; GCN: lgkmcnt +; GCN: buffer_store_dwordx2 +; GCN-NOT: lgkmcnt +; SI: s_memtime s{{\[[0-9]+:[0-9]+\]}} +; VI: s_memrealtime s{{\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dwordx2 +define void @test_readcyclecounter(i64 addrspace(1)* %out) #0 { + %cycle0 = call i64 @llvm.readcyclecounter() + store volatile i64 %cycle0, i64 addrspace(1)* %out + + %cycle1 = call i64 @llvm.readcyclecounter() + store volatile i64 %cycle1, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/MC/AMDGPU/smrd.s b/llvm/test/MC/AMDGPU/smrd.s index 5684191..1eb9e39 100644 --- a/llvm/test/MC/AMDGPU/smrd.s +++ b/llvm/test/MC/AMDGPU/smrd.s @@ -67,3 +67,6 @@ s_dcache_inv s_dcache_inv_vol // CI: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7] // NOSI: error: instruction not supported on this GPU + +s_memtime s[0:1] +// GCN: s_memtime s[0:1] ; encoding: [0x00,0x00,0x80,0xc7] -- 2.7.4