From fa7b080218a92aaa2165669410f5ef31f2d9ad11 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Mon, 25 Jul 2016 09:16:01 +0000 Subject: [PATCH] GPGPU: initialize GPU context and simplify the corresponding GPURuntime interface. There is no need to expose the selected device at the moment. We also pass back pointers as return values, as this simplifies the interface. llvm-svn: 276623 --- polly/lib/CodeGen/PPCGCodeGeneration.cpp | 116 +++++++++++++++++++++++++++++++ polly/test/GPGPU/double-parallel-loop.ll | 3 + polly/tools/GPURuntime/GPUJIT.c | 39 +++++------ polly/tools/GPURuntime/GPUJIT.h | 13 ++-- 4 files changed, 142 insertions(+), 29 deletions(-) diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 9bc6fde..012ae34 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -135,6 +135,12 @@ public: getExprBuilder().setIDToSAI(&IDToSAI); } + /// Create after-run-time-check initialization code. + void initializeAfterRTH(); + + /// Finalize the generated scop. + virtual void finalize(); + private: /// A vector of array base pointers for which a new ScopArrayInfo was created. /// @@ -142,6 +148,9 @@ private: /// more. std::vector LocalArrays; + /// The current GPU context. + Value *GPUContext; + /// A module containing GPU code. /// /// This pointer is only set in case we are currently generating GPU code. @@ -256,8 +265,113 @@ private: /// Free the LLVM-IR module corresponding to the kernel and -- if requested -- /// dump its IR to stderr. void finalizeKernelFunction(); + + void allocateDeviceArrays(); + + /// Create a call to initialize the GPU context. + /// + /// @returns A pointer to the newly initialized context. + Value *createCallInitContext(); + + /// Create a call to free the GPU context. + /// + /// @param Context A pointer to an initialized GPU context. + void createCallFreeContext(Value *Context); + + Value *createCallAllocateMemoryForDevice(Value *Size); }; +void GPUNodeBuilder::initializeAfterRTH() { + GPUContext = createCallInitContext(); + allocateDeviceArrays(); +} + +void GPUNodeBuilder::finalize() { + createCallFreeContext(GPUContext); + IslNodeBuilder::finalize(); +} + +void GPUNodeBuilder::allocateDeviceArrays() { + isl_ast_build *Build = isl_ast_build_from_context(S.getContext()); + + for (int i = 0; i < Prog->n_array; ++i) { + gpu_array_info *Array = &Prog->array[i]; + std::string DevPtrName("p_devptr_"); + DevPtrName.append(Array->name); + + Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size); + + if (!gpu_array_is_scalar(Array)) { + auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]); + isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero); + + for (unsigned int i = 1; i < Array->n_index; i++) { + isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]); + isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I); + Res = isl_ast_expr_mul(Res, Expr); + } + + Value *NumElements = ExprBuilder.create(Res); + ArraySize = Builder.CreateMul(ArraySize, NumElements); + } + + Value *DevPtr = createCallAllocateMemoryForDevice(ArraySize); + DevPtr->setName(DevPtrName); + } + + isl_ast_build_free(Build); +} + +Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) { + const char *Name = "polly_allocateMemoryForDevice"; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector Args; + Args.push_back(Builder.getInt64Ty()); + FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + return Builder.CreateCall(F, {Size}); +} + +Value *GPUNodeBuilder::createCallInitContext() { + const char *Name = "polly_initContext"; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector Args; + FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + return Builder.CreateCall(F, {}); +} + +void GPUNodeBuilder::createCallFreeContext(Value *Context) { + const char *Name = "polly_freeContext"; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector Args; + Args.push_back(Builder.getInt8PtrTy()); + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Builder.CreateCall(F, {Context}); +} + /// Check if one string is a prefix of another. /// /// @param String The string in which to look for the prefix. @@ -1325,6 +1439,8 @@ public: Builder.SetInsertPoint(SplitBlock->getTerminator()); NodeBuilder.addParameters(S->getContext()); Builder.SetInsertPoint(&*StartBlock->begin()); + + NodeBuilder.initializeAfterRTH(); NodeBuilder.create(Root); NodeBuilder.finalize(); } diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index 0cea456..33153ae 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -92,6 +92,9 @@ ; IR-NEXT: br i1 true, label %polly.start, label %bb2 ; IR: polly.start: +; IR-NEXT: [[GPUContext:%.*]] = call i8* @polly_initContext() +; IR-NEXT: %p_devptr_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304) +; IR-NEXT: call void @polly_freeContext(i8* [[GPUContext]]) ; IR-NEXT: br label %polly.exiting ; IR: polly.exiting: diff --git a/polly/tools/GPURuntime/GPUJIT.c b/polly/tools/GPURuntime/GPUJIT.c index 0cec97f..3b3ef9d 100644 --- a/polly/tools/GPURuntime/GPUJIT.c +++ b/polly/tools/GPURuntime/GPUJIT.c @@ -44,10 +44,6 @@ struct PollyGPUFunctionT { CUfunction Cuda; }; -struct PollyGPUDeviceT { - CUdevice Cuda; -}; - struct PollyGPUDevicePtrT { CUdeviceptr Cuda; }; @@ -219,10 +215,12 @@ static int initialDeviceAPIs() { return 1; } -void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device) { +PollyGPUContext *polly_initContext() { DebugMode = getenv("POLLY_DEBUG") != 0; dump_function(); + PollyGPUContext *Context; + CUdevice Device; int Major = 0, Minor = 0, DeviceID = 0; char DeviceName[256]; @@ -246,26 +244,22 @@ void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device) { exit(-1); } - /* We select the 1st device as default. */ - *Device = malloc(sizeof(PollyGPUDevice)); - if (*Device == 0) { - fprintf(stdout, "Allocate memory for Polly GPU device failed.\n"); - exit(-1); - } - CuDeviceGetFcnPtr(&((*Device)->Cuda), 0); + CuDeviceGetFcnPtr(&Device, 0); /* Get compute capabilities and the device name. */ - CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, (*Device)->Cuda); - CuDeviceGetNameFcnPtr(DeviceName, 256, (*Device)->Cuda); + CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, Device); + CuDeviceGetNameFcnPtr(DeviceName, 256, Device); debug_print("> Running on GPU device %d : %s.\n", DeviceID, DeviceName); /* Create context on the device. */ - *Context = malloc(sizeof(PollyGPUContext)); - if (*Context == 0) { + Context = (PollyGPUContext *)malloc(sizeof(PollyGPUContext)); + if (Context == 0) { fprintf(stdout, "Allocate memory for Polly GPU context failed.\n"); exit(-1); } - CuCtxCreateFcnPtr(&((*Context)->Cuda), 0, (*Device)->Cuda); + CuCtxCreateFcnPtr(&(Context->Cuda), 0, Device); + + return Context; } void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module) { @@ -347,7 +341,6 @@ void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth, void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData, PollyGPUModule *Module, - PollyGPUContext *Context, PollyGPUFunction *Kernel) { dump_function(); @@ -365,16 +358,18 @@ void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData, CuModuleUnloadFcnPtr(Module->Cuda); free(Module); } + if (Kernel) { + free(Kernel); + } +} + +void polly_freeContext(PollyGPUContext *Context) { if (Context->Cuda) { CuCtxDestroyFcnPtr(Context->Cuda); free(Context); } - if (Kernel) { - free(Kernel); - } - dlclose(HandleCuda); dlclose(HandleCudaRT); } diff --git a/polly/tools/GPURuntime/GPUJIT.h b/polly/tools/GPURuntime/GPUJIT.h index 0114acd..68fdc49 100644 --- a/polly/tools/GPURuntime/GPUJIT.h +++ b/polly/tools/GPURuntime/GPUJIT.h @@ -44,10 +44,9 @@ * const char *Entry = "_Z8myKernelPi"; * * int main() { - * PollyGPUContext *Context; * PollyGPUModule *Module; * PollyGPUFunction *Kernel; - * PollyGPUDevice *Device; + * PollyGPUContext *Context; * PollyGPUDevicePtr *PtrDevData; * int *HostData; * int MemSize; @@ -57,13 +56,14 @@ * int GridHeight = 8; * * MemSize = 256*64*sizeof(int); - * polly_initDevice(&Context, &Device); + * Context = polly_initContext(); * polly_getPTXModule(KernelString, &Module); * polly_getPTXKernelEntry(Entry, Module, &Kernel); * polly_setKernelParameters(Kernel, BlockWidth, BlockHeight, DevData); * polly_launchKernel(Kernel, GridWidth, GridHeight); * polly_copyFromDeviceToHost(HostData, DevData, MemSize); - * polly_cleanupGPGPUResources(HostData, DevData, Module, Context, Kernel); + * polly_cleanupGPGPUResources(HostData, DevData, Module, Kernel); + * polly_freeContext(Context); * } * */ @@ -71,10 +71,9 @@ typedef struct PollyGPUContextT PollyGPUContext; typedef struct PollyGPUModuleT PollyGPUModule; typedef struct PollyGPUFunctionT PollyGPUFunction; -typedef struct PollyGPUDeviceT PollyGPUDevice; typedef struct PollyGPUDevicePtrT PollyGPUDevicePtr; -void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device); +PollyGPUContext *polly_initContext(); void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module); void polly_getPTXKernelEntry(const char *KernelName, PollyGPUModule *Module, PollyGPUFunction **Kernel); @@ -88,6 +87,6 @@ void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth, int GridHeight); void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData, PollyGPUModule *Module, - PollyGPUContext *Context, PollyGPUFunction *Kernel); +void free_Context(PollyGPUContext *Context); #endif /* GPUJIT_H_ */ -- 2.7.4