getExprBuilder().setIDToSAI(&IDToSAI);
}
+ /// Create after-run-time-check initialization code.
+ void initializeAfterRTH();
+
+ /// Finalize the generated scop.
+ virtual void finalize();
+
private:
/// A vector of array base pointers for which a new ScopArrayInfo was created.
///
/// more.
std::vector<Value *> LocalArrays;
+ /// The current GPU context.
+ Value *GPUContext;
+
/// A module containing GPU code.
///
/// This pointer is only set in case we are currently generating GPU code.
/// Free the LLVM-IR module corresponding to the kernel and -- if requested --
/// dump its IR to stderr.
void finalizeKernelFunction();
+
+ void allocateDeviceArrays();
+
+ /// Create a call to initialize the GPU context.
+ ///
+ /// @returns A pointer to the newly initialized context.
+ Value *createCallInitContext();
+
+ /// Create a call to free the GPU context.
+ ///
+ /// @param Context A pointer to an initialized GPU context.
+ void createCallFreeContext(Value *Context);
+
+ Value *createCallAllocateMemoryForDevice(Value *Size);
};
+void GPUNodeBuilder::initializeAfterRTH() {
+ GPUContext = createCallInitContext();
+ allocateDeviceArrays();
+}
+
+void GPUNodeBuilder::finalize() {
+ createCallFreeContext(GPUContext);
+ IslNodeBuilder::finalize();
+}
+
+void GPUNodeBuilder::allocateDeviceArrays() {
+ isl_ast_build *Build = isl_ast_build_from_context(S.getContext());
+
+ for (int i = 0; i < Prog->n_array; ++i) {
+ gpu_array_info *Array = &Prog->array[i];
+ std::string DevPtrName("p_devptr_");
+ DevPtrName.append(Array->name);
+
+ Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
+
+ if (!gpu_array_is_scalar(Array)) {
+ auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]);
+ isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero);
+
+ for (unsigned int i = 1; i < Array->n_index; i++) {
+ isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]);
+ isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
+ Res = isl_ast_expr_mul(Res, Expr);
+ }
+
+ Value *NumElements = ExprBuilder.create(Res);
+ ArraySize = Builder.CreateMul(ArraySize, NumElements);
+ }
+
+ Value *DevPtr = createCallAllocateMemoryForDevice(ArraySize);
+ DevPtr->setName(DevPtrName);
+ }
+
+ isl_ast_build_free(Build);
+}
+
+Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) {
+ const char *Name = "polly_allocateMemoryForDevice";
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *F = M->getFunction(Name);
+
+ // If F is not available, declare it.
+ if (!F) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ Args.push_back(Builder.getInt64Ty());
+ FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false);
+ F = Function::Create(Ty, Linkage, Name, M);
+ }
+
+ return Builder.CreateCall(F, {Size});
+}
+
+Value *GPUNodeBuilder::createCallInitContext() {
+ const char *Name = "polly_initContext";
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *F = M->getFunction(Name);
+
+ // If F is not available, declare it.
+ if (!F) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false);
+ F = Function::Create(Ty, Linkage, Name, M);
+ }
+
+ return Builder.CreateCall(F, {});
+}
+
+void GPUNodeBuilder::createCallFreeContext(Value *Context) {
+ const char *Name = "polly_freeContext";
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *F = M->getFunction(Name);
+
+ // If F is not available, declare it.
+ if (!F) {
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+ std::vector<Type *> Args;
+ Args.push_back(Builder.getInt8PtrTy());
+ FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+ F = Function::Create(Ty, Linkage, Name, M);
+ }
+
+ Builder.CreateCall(F, {Context});
+}
+
/// Check if one string is a prefix of another.
///
/// @param String The string in which to look for the prefix.
Builder.SetInsertPoint(SplitBlock->getTerminator());
NodeBuilder.addParameters(S->getContext());
Builder.SetInsertPoint(&*StartBlock->begin());
+
+ NodeBuilder.initializeAfterRTH();
NodeBuilder.create(Root);
NodeBuilder.finalize();
}
CUfunction Cuda;
};
-struct PollyGPUDeviceT {
- CUdevice Cuda;
-};
-
struct PollyGPUDevicePtrT {
CUdeviceptr Cuda;
};
return 1;
}
-void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device) {
+PollyGPUContext *polly_initContext() {
DebugMode = getenv("POLLY_DEBUG") != 0;
dump_function();
+ PollyGPUContext *Context;
+ CUdevice Device;
int Major = 0, Minor = 0, DeviceID = 0;
char DeviceName[256];
exit(-1);
}
- /* We select the 1st device as default. */
- *Device = malloc(sizeof(PollyGPUDevice));
- if (*Device == 0) {
- fprintf(stdout, "Allocate memory for Polly GPU device failed.\n");
- exit(-1);
- }
- CuDeviceGetFcnPtr(&((*Device)->Cuda), 0);
+ CuDeviceGetFcnPtr(&Device, 0);
/* Get compute capabilities and the device name. */
- CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, (*Device)->Cuda);
- CuDeviceGetNameFcnPtr(DeviceName, 256, (*Device)->Cuda);
+ CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, Device);
+ CuDeviceGetNameFcnPtr(DeviceName, 256, Device);
debug_print("> Running on GPU device %d : %s.\n", DeviceID, DeviceName);
/* Create context on the device. */
- *Context = malloc(sizeof(PollyGPUContext));
- if (*Context == 0) {
+ Context = (PollyGPUContext *)malloc(sizeof(PollyGPUContext));
+ if (Context == 0) {
fprintf(stdout, "Allocate memory for Polly GPU context failed.\n");
exit(-1);
}
- CuCtxCreateFcnPtr(&((*Context)->Cuda), 0, (*Device)->Cuda);
+ CuCtxCreateFcnPtr(&(Context->Cuda), 0, Device);
+
+ return Context;
}
void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module) {
void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
PollyGPUModule *Module,
- PollyGPUContext *Context,
PollyGPUFunction *Kernel) {
dump_function();
CuModuleUnloadFcnPtr(Module->Cuda);
free(Module);
}
+ if (Kernel) {
+ free(Kernel);
+ }
+}
+
+void polly_freeContext(PollyGPUContext *Context) {
if (Context->Cuda) {
CuCtxDestroyFcnPtr(Context->Cuda);
free(Context);
}
- if (Kernel) {
- free(Kernel);
- }
-
dlclose(HandleCuda);
dlclose(HandleCudaRT);
}
* const char *Entry = "_Z8myKernelPi";
*
* int main() {
- * PollyGPUContext *Context;
* PollyGPUModule *Module;
* PollyGPUFunction *Kernel;
- * PollyGPUDevice *Device;
+ * PollyGPUContext *Context;
* PollyGPUDevicePtr *PtrDevData;
* int *HostData;
* int MemSize;
* int GridHeight = 8;
*
* MemSize = 256*64*sizeof(int);
- * polly_initDevice(&Context, &Device);
+ * Context = polly_initContext();
* polly_getPTXModule(KernelString, &Module);
* polly_getPTXKernelEntry(Entry, Module, &Kernel);
* polly_setKernelParameters(Kernel, BlockWidth, BlockHeight, DevData);
* polly_launchKernel(Kernel, GridWidth, GridHeight);
* polly_copyFromDeviceToHost(HostData, DevData, MemSize);
- * polly_cleanupGPGPUResources(HostData, DevData, Module, Context, Kernel);
+ * polly_cleanupGPGPUResources(HostData, DevData, Module, Kernel);
+ * polly_freeContext(Context);
* }
*
*/
typedef struct PollyGPUContextT PollyGPUContext;
typedef struct PollyGPUModuleT PollyGPUModule;
typedef struct PollyGPUFunctionT PollyGPUFunction;
-typedef struct PollyGPUDeviceT PollyGPUDevice;
typedef struct PollyGPUDevicePtrT PollyGPUDevicePtr;
-void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device);
+PollyGPUContext *polly_initContext();
void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module);
void polly_getPTXKernelEntry(const char *KernelName, PollyGPUModule *Module,
PollyGPUFunction **Kernel);
int GridHeight);
void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
PollyGPUModule *Module,
- PollyGPUContext *Context,
PollyGPUFunction *Kernel);
+void free_Context(PollyGPUContext *Context);
#endif /* GPUJIT_H_ */