2 Copyright (C) 2010 Sony Computer Entertainment Inc.
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
18 #include "MiniCL/cl.h"
19 #define __PHYSICS_COMMON_H__ 1
21 #include "BulletMultiThreaded/Win32ThreadSupport.h"
24 #include "BulletMultiThreaded/PlatformDefinitions.h"
26 #include "BulletMultiThreaded/PosixThreadSupport.h"
30 #include "BulletMultiThreaded/SequentialThreadSupport.h"
31 #include "MiniCLTaskScheduler.h"
32 #include "MiniCLTask/MiniCLTask.h"
33 #include "LinearMath/btMinMax.h"
36 //#define DEBUG_MINICL_KERNELS 1
38 static const char* spPlatformID = "MiniCL, SCEA";
39 static const char* spDriverVersion= "1.0";
41 CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(
43 cl_platform_id * platforms,
44 cl_uint * num_platforms ) CL_API_SUFFIX__VERSION_1_0
50 return CL_INVALID_VALUE;
52 *((const char**)platforms) = spPlatformID;
54 if(num_platforms != NULL)
62 CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo(
63 cl_platform_id platform,
64 cl_platform_info param_name,
65 size_t param_value_size,
67 size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
69 char* pId = (char*)platform;
70 if(strcmp(pId, spPlatformID))
72 return CL_INVALID_PLATFORM;
76 case CL_PLATFORM_VERSION:
78 if(param_value_size < (strlen(spDriverVersion) + 1))
80 return CL_INVALID_VALUE;
82 strcpy((char*)param_value, spDriverVersion);
83 if(param_value_size_ret != NULL)
85 *param_value_size_ret = strlen(spDriverVersion) + 1;
89 case CL_PLATFORM_NAME:
90 case CL_PLATFORM_VENDOR :
91 if(param_value_size < (strlen(spPlatformID) + 1))
93 return CL_INVALID_VALUE;
95 strcpy((char*)param_value, spPlatformID);
96 if(param_value_size_ret != NULL)
98 *param_value_size_ret = strlen(spPlatformID) + 1;
102 return CL_INVALID_VALUE;
110 CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
111 cl_device_id device ,
112 cl_device_info param_name ,
113 size_t param_value_size ,
115 size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
122 char deviceName[] = "MiniCL CPU";
123 unsigned int nameLen = (unsigned int)strlen(deviceName)+1;
124 btAssert(param_value_size>strlen(deviceName));
125 if (nameLen < param_value_size)
127 const char* cpuName = "MiniCL CPU";
128 sprintf((char*)param_value,"%s",cpuName);
131 printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
132 return CL_INVALID_VALUE;
138 if (param_value_size>=sizeof(cl_device_type))
140 cl_device_type* deviceType = (cl_device_type*)param_value;
141 *deviceType = CL_DEVICE_TYPE_CPU;
144 printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
145 return CL_INVALID_VALUE;
149 case CL_DEVICE_MAX_COMPUTE_UNITS:
151 if (param_value_size>=sizeof(cl_uint))
153 cl_uint* numUnits = (cl_uint*)param_value;
157 printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
158 return CL_INVALID_VALUE;
163 case CL_DEVICE_MAX_WORK_ITEM_SIZES:
165 size_t workitem_size[3];
167 if (param_value_size>=sizeof(workitem_size))
169 size_t* workItemSize = (size_t*)param_value;
170 workItemSize[0] = 64;
171 workItemSize[1] = 24;
172 workItemSize[2] = 16;
175 printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
176 return CL_INVALID_VALUE;
180 case CL_DEVICE_MAX_CLOCK_FREQUENCY:
182 cl_uint* clock_frequency = (cl_uint*)param_value;
183 *clock_frequency = 3*1024;
187 case CL_DEVICE_VENDOR :
189 if(param_value_size < (strlen(spPlatformID) + 1))
191 return CL_INVALID_VALUE;
193 strcpy((char*)param_value, spPlatformID);
194 if(param_value_size_ret != NULL)
196 *param_value_size_ret = strlen(spPlatformID) + 1;
200 case CL_DRIVER_VERSION:
202 if(param_value_size < (strlen(spDriverVersion) + 1))
204 return CL_INVALID_VALUE;
206 strcpy((char*)param_value, spDriverVersion);
207 if(param_value_size_ret != NULL)
209 *param_value_size_ret = strlen(spDriverVersion) + 1;
214 case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
216 cl_uint* maxDimensions = (cl_uint*)param_value;
220 case CL_DEVICE_MAX_WORK_GROUP_SIZE:
222 cl_uint* maxWorkGroupSize = (cl_uint*)param_value;
223 *maxWorkGroupSize = 128;//1;
226 case CL_DEVICE_ADDRESS_BITS:
228 cl_uint* addressBits = (cl_uint*)param_value;
229 *addressBits= 32; //@todo: should this be 64 for 64bit builds?
232 case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
234 cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
235 *maxMemAlloc= 512*1024*1024; //this "should be enough for everyone" ?
238 case CL_DEVICE_GLOBAL_MEM_SIZE:
240 cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
241 *maxMemAlloc= 1024*1024*1024; //this "should be enough for everyone" ?
245 case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
247 cl_bool* error_correction_support = (cl_bool*)param_value;
248 *error_correction_support = CL_FALSE;
252 case CL_DEVICE_LOCAL_MEM_TYPE:
254 cl_device_local_mem_type* local_mem_type = (cl_device_local_mem_type*)param_value;
255 *local_mem_type = CL_GLOBAL;
258 case CL_DEVICE_LOCAL_MEM_SIZE:
260 cl_ulong* localmem = (cl_ulong*) param_value;
265 case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
267 cl_ulong* localmem = (cl_ulong*) param_value;
271 case CL_DEVICE_QUEUE_PROPERTIES:
273 cl_command_queue_properties* queueProp = (cl_command_queue_properties*) param_value;
274 memset(queueProp,0,param_value_size);
278 case CL_DEVICE_IMAGE_SUPPORT:
280 cl_bool* imageSupport = (cl_bool*) param_value;
281 *imageSupport = CL_FALSE;
285 case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
286 case CL_DEVICE_MAX_READ_IMAGE_ARGS:
288 cl_uint* imageArgs = (cl_uint*) param_value;
292 case CL_DEVICE_IMAGE3D_MAX_DEPTH:
293 case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
294 case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
295 case CL_DEVICE_IMAGE3D_MAX_WIDTH:
296 case CL_DEVICE_IMAGE2D_MAX_WIDTH:
298 size_t* maxSize = (size_t*) param_value;
303 case CL_DEVICE_EXTENSIONS:
305 char* extensions = (char*) param_value;
310 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
311 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
312 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
313 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
314 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
315 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
317 cl_uint* width = (cl_uint*) param_value;
324 printf("error: unsupported param_name:%d\n",param_name);
332 CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
339 CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
344 CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
349 CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0
355 // Enqueued Commands APIs
356 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue ,
358 cl_bool /* blocking_read */,
362 cl_uint /* num_events_in_wait_list */,
363 const cl_event * /* event_wait_list */,
364 cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
366 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
368 ///wait for all work items to be completed
371 memcpy(ptr,(char*)buffer + offset,cb);
376 CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program /* program */,
377 cl_device_id /* device */,
378 cl_program_build_info /* param_name */,
379 size_t /* param_value_size */,
380 void * /* param_value */,
381 size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
388 // Program Object APIs
389 CL_API_ENTRY cl_program
390 clCreateProgramWithSource(cl_context context ,
392 const char ** /* strings */,
393 const size_t * /* lengths */,
394 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
396 *errcode_ret = CL_SUCCESS;
397 return (cl_program)context;
400 CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue command_queue ,
402 cl_bool /* blocking_read */,
406 cl_uint /* num_events_in_wait_list */,
407 const cl_event * /* event_wait_list */,
408 cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
410 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
412 ///wait for all work items to be completed
415 memcpy((char*)buffer + offset, ptr,cb);
419 CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue)
421 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
422 ///wait for all work items to be completed
428 CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
431 const size_t * /* global_work_offset */,
432 const size_t * global_work_size ,
433 const size_t * /* local_work_size */,
434 cl_uint /* num_events_in_wait_list */,
435 const cl_event * /* event_wait_list */,
436 cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
440 MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
441 for (unsigned int ii=0;ii<work_dim;ii++)
443 int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
444 int numWorkItems = global_work_size[ii];
446 // //at minimum 64 work items per task
447 // int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
448 int numWorkItemsPerTask = numWorkItems / maxTask;
449 if (!numWorkItemsPerTask) numWorkItemsPerTask = 1;
451 for (int t=0;t<numWorkItems;)
453 //Performance Hint: tweak this number during benchmarking
454 int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
455 kernel->m_scheduler->issueTask(t, endIndex, kernel);
463 scheduler->issueTask(bla,2,3);
471 #define LOCAL_BUF_SIZE 32768
472 static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16];
473 static int* spLocalBufCurr = NULL;
474 static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call
475 static void* localBufMalloc(int size)
477 int size16 = (size + 15) >> 4; // in 16-byte units
478 if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE)
480 spLocalBufCurr = sLocalMemBuf;
481 while((unsigned long)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes
484 void* ret = spLocalBufCurr;
485 spLocalBufCurr += size16 * 4;
486 sLocalBufUsed += size;
492 CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel ,
495 const void * arg_value ) CL_API_SUFFIX__VERSION_1_0
497 MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
498 btAssert(arg_size <= MINICL_MAX_ARGLENGTH);
499 if (arg_index>MINI_CL_MAX_ARG)
501 printf("error: clSetKernelArg arg_index (%u) exceeds %u\n",arg_index,MINI_CL_MAX_ARG);
504 if (arg_size>MINICL_MAX_ARGLENGTH)
505 //if (arg_size != MINICL_MAX_ARGLENGTH)
507 printf("error: clSetKernelArg argdata too large: %zu (maximum is %zu)\n",arg_size,MINICL_MAX_ARGLENGTH);
511 if(arg_value == NULL)
512 { // this is only for __local memory qualifier
513 void* ptr = localBufMalloc(arg_size);
514 kernel->m_argData[arg_index] = ptr;
518 memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size);
520 kernel->m_argSizes[arg_index] = arg_size;
521 if(arg_index >= kernel->m_numArgs)
523 kernel->m_numArgs = arg_index + 1;
524 kernel->updateLauncher();
531 // Kernel Object APIs
532 CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program ,
533 const char * kernel_name ,
534 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
536 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
537 MiniCLKernel* kernel = new MiniCLKernel();
538 int nameLen = strlen(kernel_name);
539 if(nameLen >= MINI_CL_MAX_KERNEL_NAME)
541 *errcode_ret = CL_INVALID_KERNEL_NAME;
544 strcpy(kernel->m_name, kernel_name);
545 kernel->m_numArgs = 0;
547 //kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
548 //if (kernel->m_kernelProgramCommandId>=0)
550 // *errcode_ret = CL_SUCCESS;
553 // *errcode_ret = CL_INVALID_KERNEL_NAME;
555 kernel->m_scheduler = scheduler;
556 if(kernel->registerSelf() == NULL)
558 *errcode_ret = CL_INVALID_KERNEL_NAME;
563 *errcode_ret = CL_SUCCESS;
566 return (cl_kernel)kernel;
571 CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */,
572 cl_uint /* num_devices */,
573 const cl_device_id * /* device_list */,
574 const char * /* options */,
575 void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
576 void * /* user_data */) CL_API_SUFFIX__VERSION_1_0
581 CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context ,
582 cl_uint /* num_devices */,
583 const cl_device_id * /* device_list */,
584 const size_t * /* lengths */,
585 const unsigned char ** /* binaries */,
586 cl_int * /* binary_status */,
587 cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
589 return (cl_program)context;
593 // Memory Object APIs
594 CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */,
598 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
600 cl_mem buf = (cl_mem)malloc(size);
601 if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
603 memcpy(buf,host_ptr,size);
609 // Command Queue APIs
610 CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context ,
611 cl_device_id /* device */,
612 cl_command_queue_properties /* properties */,
613 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
616 return (cl_command_queue) context;
619 extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */,
620 cl_context_info param_name ,
621 size_t param_value_size ,
623 size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
628 case CL_CONTEXT_DEVICES:
630 if (!param_value_size)
632 *param_value_size_ret = 13;
635 const char* testName = "MiniCL_Test.";
636 sprintf((char*)param_value,"%s",testName);
642 printf("unsupported\n");
651 CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties * /* properties */,
652 cl_device_type device_type ,
653 void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
654 void * /* user_data */,
655 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
657 int maxNumOutstandingTasks = 4;
658 // int maxNumOutstandingTasks = 2;
659 // int maxNumOutstandingTasks = 1;
660 gMiniCLNumOutstandingTasks = maxNumOutstandingTasks;
661 const int maxNumOfThreadSupports = 8;
662 static int sUniqueThreadSupportIndex = 0;
663 static const char* sUniqueThreadSupportName[maxNumOfThreadSupports] =
665 "MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7"
668 btThreadSupportInterface* threadSupport = 0;
670 if (device_type==CL_DEVICE_TYPE_DEBUG)
672 SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
673 threadSupport = new SequentialThreadSupport(stc);
678 btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
679 const char* bla = "MiniCL";
680 threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
682 sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
683 processMiniCLTask, //processCollisionTask,
684 createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
685 maxNumOutstandingTasks));
689 PosixThreadSupport::ThreadConstructionInfo constructionInfo("PosixThreads",
691 createMiniCLLocalStoreMemory,
692 maxNumOutstandingTasks);
693 threadSupport = new PosixThreadSupport(constructionInfo);
696 ///todo: add posix thread support for other platforms
697 SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
698 threadSupport = new SequentialThreadSupport(stc);
699 #endif //USE_PTHREADS
705 MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
708 return (cl_context)scheduler;
711 CL_API_ENTRY cl_int CL_API_CALL
712 clGetDeviceIDs(cl_platform_id /* platform */,
713 cl_device_type /* device_type */,
714 cl_uint /* num_entries */,
715 cl_device_id * /* devices */,
716 cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0
721 CL_API_ENTRY cl_context CL_API_CALL
722 clCreateContext(const cl_context_properties * properties ,
723 cl_uint num_devices ,
724 const cl_device_id * devices ,
725 void (*pfn_notify)(const char *, const void *, size_t, void *),
727 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
730 return clCreateContextFromType(properties,CL_DEVICE_TYPE_ALL,pfn_notify,user_data,errcode_ret);
733 CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0
736 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
738 btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
740 delete threadSupport;
744 extern CL_API_ENTRY cl_int CL_API_CALL
745 clFinish(cl_command_queue command_queue ) CL_API_SUFFIX__VERSION_1_0
747 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
748 ///wait for all work items to be completed
753 extern CL_API_ENTRY cl_int CL_API_CALL
754 clGetProgramInfo(cl_program /* program */,
755 cl_program_info /* param_name */,
756 size_t /* param_value_size */,
757 void * /* param_value */,
758 size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
763 extern CL_API_ENTRY cl_int CL_API_CALL
764 clGetKernelWorkGroupInfo(cl_kernel kernel ,
765 cl_device_id /* device */,
766 cl_kernel_work_group_info wgi/* param_name */,
767 size_t sz /* param_value_size */,
768 void * ptr /* param_value */,
769 size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
771 if((wgi == CL_KERNEL_WORK_GROUP_SIZE)
772 &&(sz == sizeof(size_t))
775 MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel;
776 MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler;
777 *((size_t*)ptr) = scheduler->getMaxNumOutstandingTasks();
782 return CL_INVALID_VALUE;