int16_t RequiresDataSharing);
EXTERN void __kmpc_spmd_kernel_deinit();
EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
- void ***SharedArgs, int32_t nArgs,
int16_t IsOMPRuntimeInitialized);
-EXTERN bool __kmpc_kernel_parallel(void **WorkFn, void ***SharedArgs,
+EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
int16_t IsOMPRuntimeInitialized);
EXTERN void __kmpc_kernel_end_parallel();
EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
// Scratchpad for teams reduction.
////////////////////////////////////////////////////////////////////////////////
__device__ __shared__ void *ReductionScratchpadPtr;
-
-////////////////////////////////////////////////////////////////////////////////
-// Data sharing related variables.
-////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_sharedArgs;
PRINT(LD_IO, "call to __kmpc_kernel_init with version %f\n",
OMPTARGET_NVPTX_VERSION);
- // init parallel work arguments
- omptarget_nvptx_sharedArgs.Init();
-
if (!RequiresOMPRuntime) {
// If OMP runtime is not required don't initialize OMP state.
setExecutionParameters(Generic, RuntimeUninitialized);
}
// Done with work. Kill the workers.
omptarget_nvptx_workFn = 0;
-
- // Deinit parallel work arguments
- omptarget_nvptx_sharedArgs.DeInit();
}
EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
#define __ACTIVEMASK() __ballot(1)
#endif
-// arguments needed for L0 parallelism only.
-class omptarget_nvptx_SharedArgs {
-public:
- // All these methods must be called by the master thread only.
- INLINE void Init() {
- args = buffer;
- nArgs = MAX_SHARED_ARGS;
- }
- INLINE void DeInit() {
- // Free any memory allocated for outlined parallel function with a large
- // number of arguments.
- if (nArgs > MAX_SHARED_ARGS) {
- SafeFree(args, (char *)"new extended args");
- Init();
- }
- }
- INLINE void EnsureSize(int size) {
- if (size > nArgs) {
- if (nArgs > MAX_SHARED_ARGS) {
- SafeFree(args, (char *)"new extended args");
- }
- args = (void **) SafeMalloc(size * sizeof(void *),
- (char *)"new extended args");
- nArgs = size;
- }
- }
- // Called by all threads.
- INLINE void **GetArgs() { return args; };
-private:
- // buffer of pre-allocated arguments.
- void *buffer[MAX_SHARED_ARGS];
- // pointer to arguments buffer.
- // starts off as a pointer to 'buffer' but can be dynamically allocated.
- void **args;
- // starts off as MAX_SHARED_ARGS but can increase in size.
- uint32_t nArgs;
-};
-
-extern __device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_sharedArgs;
-
// Data sharing related quantities, need to match what is used in the compiler.
enum DATA_SHARING_SIZES {
// The maximum number of workers in a kernel.
// region to synchronize with each other.
#define L1_BARRIER (1)
-// Maximum number of preallocated arguments to an outlined parallel/simd function.
-// Anything more requires dynamic memory allocation.
-#define MAX_SHARED_ARGS 20
-
// Maximum number of omp state objects per SM allocated statically in global
// memory.
#if __CUDA_ARCH__ >= 600
//
// This routine is always called by the team master..
EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
- void ***SharedArgs, int32_t nArgs,
int16_t IsOMPRuntimeInitialized) {
PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n");
omptarget_nvptx_workFn = WorkFn;
- if (nArgs > 0) {
- omptarget_nvptx_sharedArgs.EnsureSize(nArgs);
- *SharedArgs = omptarget_nvptx_sharedArgs.GetArgs();
- }
-
if (!IsOMPRuntimeInitialized)
return;
//
// Only the worker threads call this routine.
EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
- void ***SharedArgs,
int16_t IsOMPRuntimeInitialized) {
PRINT0(LD_IO | LD_PAR, "call to __kmpc_kernel_parallel\n");
// Work function and arguments for L1 parallel region.
*WorkFn = omptarget_nvptx_workFn;
- *SharedArgs = omptarget_nvptx_sharedArgs.GetArgs();
if (!IsOMPRuntimeInitialized)
return true;