From acfeebf883d44d9592d7beb78a809a7f72ae9917 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 23 Jul 2013 01:48:05 +0000 Subject: [PATCH] R600: Use the same compute kernel calling convention for all GPUs A side-effect of this is that now the compiler expects kernel arguments to be 4-byte aligned. Reviewed-by: Vincent Lejeune llvm-svn: 186916 --- llvm/lib/Target/R600/AMDGPUCallingConv.td | 16 +++++++++++----- llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 1 + llvm/lib/Target/R600/R600ISelLowering.cpp | 21 +++++++++++++++------ llvm/test/CodeGen/R600/128bit-kernel-args.ll | 4 ++-- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/R600/AMDGPUCallingConv.td b/llvm/lib/Target/R600/AMDGPUCallingConv.td index 29a0326..5c9a3e4 100644 --- a/llvm/lib/Target/R600/AMDGPUCallingConv.td +++ b/llvm/lib/Target/R600/AMDGPUCallingConv.td @@ -36,9 +36,9 @@ def CC_SI : CallingConv<[ ]>; -// Calling convention for SI compute kernels -def CC_SI_Kernel : CallingConv<[ - CCIfType<[v4i32, v4f32], CCAssignToStack <16, 4>>, +// Calling convention for compute kernels +def CC_AMDGPU_Kernel : CallingConv<[ + CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>, CCIfType<[i64, f64], CCAssignToStack < 8, 4>>, CCIfType<[i32, f32], CCAssignToStack < 4, 4>>, CCIfType<[i16], CCAssignToStack < 2, 4>>, @@ -46,8 +46,14 @@ def CC_SI_Kernel : CallingConv<[ ]>; def CC_AMDGPU : CallingConv<[ - CCIf<"State.getMachineFunction().getInfo()->"# - "ShaderType == ShaderType::COMPUTE", CCDelegateTo>, + CCIf<"State.getTarget().getSubtarget().getGeneration() == " + "AMDGPUSubtarget::SOUTHERN_ISLANDS && " + "State.getMachineFunction().getInfo()->"# + "ShaderType == ShaderType::COMPUTE", CCDelegateTo>, + CCIf<"State.getTarget().getSubtarget().getGeneration() < " + "AMDGPUSubtarget::SOUTHERN_ISLANDS && " + "State.getMachineFunction().getInfo()->" + "ShaderType == ShaderType::COMPUTE", CCDelegateTo>, CCIf<"State.getTarget().getSubtarget()"# ".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo> ]>; diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index d74d9f8..c90176b 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -18,6 +18,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "AMDILIntrinsicInfo.h" +#include "R600MachineFunctionInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/lib/Target/R600/R600ISelLowering.cpp b/llvm/lib/Target/R600/R600ISelLowering.cpp index ac4a81c..7f93f23 100644 --- a/llvm/lib/Target/R600/R600ISelLowering.cpp +++ b/llvm/lib/Target/R600/R600ISelLowering.cpp @@ -16,6 +16,7 @@ #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -1212,11 +1213,17 @@ SDValue R600TargetLowering::LowerFormalArguments( const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { - unsigned ParamOffsetBytes = 36; + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + AnalyzeFormalArguments(CCInfo, Ins); + Function::const_arg_iterator FuncArg = DAG.getMachineFunction().getFunction()->arg_begin(); for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) { - EVT VT = Ins[i].VT; + CCValAssign &VA = ArgLocs[i]; + EVT VT = VA.getLocVT(); Type *ArgType = FuncArg->getType(); unsigned ArgSizeInBits = ArgType->isPointerTy() ? 32 : ArgType->getPrimitiveSizeInBits(); @@ -1239,12 +1246,14 @@ SDValue R600TargetLowering::LowerFormalArguments( PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), AMDGPUAS::PARAM_I_ADDRESS); + + // The first 36 bytes of the input buffer contains information about + // thread group and global sizes. SDValue Arg = DAG.getExtLoad(LoadType, DL, VT, DAG.getRoot(), - DAG.getConstant(ParamOffsetBytes, MVT::i32), - MachinePointerInfo(UndefValue::get(PtrTy)), - ArgVT, false, false, ArgBytes); + DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), + MachinePointerInfo(UndefValue::get(PtrTy)), + ArgVT, false, false, ArgBytes); InVals.push_back(Arg); - ParamOffsetBytes += ArgBytes; } return Chain; } diff --git a/llvm/test/CodeGen/R600/128bit-kernel-args.ll b/llvm/test/CodeGen/R600/128bit-kernel-args.ll index bd60385..2fc8381 100644 --- a/llvm/test/CodeGen/R600/128bit-kernel-args.ll +++ b/llvm/test/CodeGen/R600/128bit-kernel-args.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; R600-CHECK: @v4i32_kernel_arg -; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40 +; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52 ; SI-CHECK: @v4i32_kernel_arg ; SI-CHECK: BUFFER_STORE_DWORDX4 define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { @@ -12,7 +12,7 @@ entry: } ; R600-CHECK: @v4f32_kernel_arg -; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40 +; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52 ; SI-CHECK: @v4f32_kernel_arg ; SI-CHECK: BUFFER_STORE_DWORDX4 define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) { -- 2.7.4