2 Copyright (C) 2010 Sony Computer Entertainment Inc.
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
18 #include "MiniCL/cl.h"
19 #define __PHYSICS_COMMON_H__ 1
21 #include "BulletMultiThreaded/Win32ThreadSupport.h"
24 #include "BulletMultiThreaded/PlatformDefinitions.h"
26 #include "BulletMultiThreaded/PosixThreadSupport.h"
30 #include "BulletMultiThreaded/SequentialThreadSupport.h"
31 #include "MiniCLTaskScheduler.h"
32 #include "MiniCLTask/MiniCLTask.h"
33 #include "LinearMath/btMinMax.h"
37 //#define DEBUG_MINICL_KERNELS 1
39 static const char* spPlatformID = "MiniCL, SCEA";
40 static const char* spDriverVersion= "1.0";
42 CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(
44 cl_platform_id * platforms,
45 cl_uint * num_platforms ) CL_API_SUFFIX__VERSION_1_0
51 return CL_INVALID_VALUE;
53 *((const char**)platforms) = spPlatformID;
55 if(num_platforms != NULL)
63 CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo(
64 cl_platform_id platform,
65 cl_platform_info param_name,
66 size_t param_value_size,
68 size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
70 char* pId = (char*)platform;
71 if(strcmp(pId, spPlatformID))
73 return CL_INVALID_PLATFORM;
77 case CL_PLATFORM_VERSION:
79 if(param_value_size < (strlen(spDriverVersion) + 1))
81 return CL_INVALID_VALUE;
83 strcpy((char*)param_value, spDriverVersion);
84 if(param_value_size_ret != NULL)
86 *param_value_size_ret = strlen(spDriverVersion) + 1;
90 case CL_PLATFORM_NAME:
91 case CL_PLATFORM_VENDOR :
92 if(param_value_size < (strlen(spPlatformID) + 1))
94 return CL_INVALID_VALUE;
96 strcpy((char*)param_value, spPlatformID);
97 if(param_value_size_ret != NULL)
99 *param_value_size_ret = strlen(spPlatformID) + 1;
103 return CL_INVALID_VALUE;
111 CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
112 cl_device_id device ,
113 cl_device_info param_name ,
114 size_t param_value_size ,
116 size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
123 char deviceName[] = "MiniCL CPU";
124 unsigned int nameLen = (unsigned int)strlen(deviceName)+1;
125 btAssert(param_value_size>strlen(deviceName));
126 if (nameLen < param_value_size)
128 const char* cpuName = "MiniCL CPU";
129 sprintf((char*)param_value,"%s",cpuName);
132 printf("error: param_value_size should be at least %d, but it is %zu\n",nameLen,param_value_size);
133 return CL_INVALID_VALUE;
139 if (param_value_size>=sizeof(cl_device_type))
141 cl_device_type* deviceType = (cl_device_type*)param_value;
142 *deviceType = CL_DEVICE_TYPE_CPU;
145 printf("error: param_value_size should be at least %zu\n",sizeof(cl_device_type));
146 return CL_INVALID_VALUE;
150 case CL_DEVICE_MAX_COMPUTE_UNITS:
152 if (param_value_size>=sizeof(cl_uint))
154 cl_uint* numUnits = (cl_uint*)param_value;
158 printf("error: param_value_size should be at least %zu\n",sizeof(cl_uint));
159 return CL_INVALID_VALUE;
164 case CL_DEVICE_MAX_WORK_ITEM_SIZES:
166 size_t workitem_size[3];
168 if (param_value_size>=sizeof(workitem_size))
170 size_t* workItemSize = (size_t*)param_value;
171 workItemSize[0] = 64;
172 workItemSize[1] = 24;
173 workItemSize[2] = 16;
176 printf("error: param_value_size should be at least %zu\n",sizeof(cl_uint));
177 return CL_INVALID_VALUE;
181 case CL_DEVICE_MAX_CLOCK_FREQUENCY:
183 cl_uint* clock_frequency = (cl_uint*)param_value;
184 *clock_frequency = 3*1024;
188 case CL_DEVICE_VENDOR :
190 if(param_value_size < (strlen(spPlatformID) + 1))
192 return CL_INVALID_VALUE;
194 strcpy((char*)param_value, spPlatformID);
195 if(param_value_size_ret != NULL)
197 *param_value_size_ret = strlen(spPlatformID) + 1;
201 case CL_DRIVER_VERSION:
203 if(param_value_size < (strlen(spDriverVersion) + 1))
205 return CL_INVALID_VALUE;
207 strcpy((char*)param_value, spDriverVersion);
208 if(param_value_size_ret != NULL)
210 *param_value_size_ret = strlen(spDriverVersion) + 1;
215 case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
217 cl_uint* maxDimensions = (cl_uint*)param_value;
221 case CL_DEVICE_MAX_WORK_GROUP_SIZE:
223 cl_uint* maxWorkGroupSize = (cl_uint*)param_value;
224 *maxWorkGroupSize = 128;//1;
227 case CL_DEVICE_ADDRESS_BITS:
229 cl_uint* addressBits = (cl_uint*)param_value;
230 *addressBits= 32; //@todo: should this be 64 for 64bit builds?
233 case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
235 cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
236 *maxMemAlloc= 512*1024*1024; //this "should be enough for everyone" ?
239 case CL_DEVICE_GLOBAL_MEM_SIZE:
241 cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
242 *maxMemAlloc= 1024*1024*1024; //this "should be enough for everyone" ?
246 case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
248 cl_bool* error_correction_support = (cl_bool*)param_value;
249 *error_correction_support = CL_FALSE;
253 case CL_DEVICE_LOCAL_MEM_TYPE:
255 cl_device_local_mem_type* local_mem_type = (cl_device_local_mem_type*)param_value;
256 *local_mem_type = CL_GLOBAL;
259 case CL_DEVICE_LOCAL_MEM_SIZE:
261 cl_ulong* localmem = (cl_ulong*) param_value;
266 case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
268 cl_ulong* localmem = (cl_ulong*) param_value;
272 case CL_DEVICE_QUEUE_PROPERTIES:
274 cl_command_queue_properties* queueProp = (cl_command_queue_properties*) param_value;
275 memset(queueProp,0,param_value_size);
279 case CL_DEVICE_IMAGE_SUPPORT:
281 cl_bool* imageSupport = (cl_bool*) param_value;
282 *imageSupport = CL_FALSE;
286 case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
287 case CL_DEVICE_MAX_READ_IMAGE_ARGS:
289 cl_uint* imageArgs = (cl_uint*) param_value;
293 case CL_DEVICE_IMAGE3D_MAX_DEPTH:
294 case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
295 case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
296 case CL_DEVICE_IMAGE3D_MAX_WIDTH:
297 case CL_DEVICE_IMAGE2D_MAX_WIDTH:
299 size_t* maxSize = (size_t*) param_value;
304 case CL_DEVICE_EXTENSIONS:
306 char* extensions = (char*) param_value;
311 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
312 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
313 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
314 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
315 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
316 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
318 cl_uint* width = (cl_uint*) param_value;
325 printf("error: unsupported param_name:%d\n",param_name);
333 CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
340 CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
345 CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
350 CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0
356 // Enqueued Commands APIs
357 CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue ,
359 cl_bool /* blocking_read */,
363 cl_uint /* num_events_in_wait_list */,
364 const cl_event * /* event_wait_list */,
365 cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
367 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
369 ///wait for all work items to be completed
372 memcpy(ptr,(char*)buffer + offset,cb);
377 CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program /* program */,
378 cl_device_id /* device */,
379 cl_program_build_info /* param_name */,
380 size_t /* param_value_size */,
381 void * /* param_value */,
382 size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
389 // Program Object APIs
390 CL_API_ENTRY cl_program
391 clCreateProgramWithSource(cl_context context ,
393 const char ** /* strings */,
394 const size_t * /* lengths */,
395 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
397 *errcode_ret = CL_SUCCESS;
398 return (cl_program)context;
401 CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue command_queue ,
403 cl_bool /* blocking_read */,
407 cl_uint /* num_events_in_wait_list */,
408 const cl_event * /* event_wait_list */,
409 cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
411 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
413 ///wait for all work items to be completed
416 memcpy((char*)buffer + offset, ptr,cb);
420 CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue)
422 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
423 ///wait for all work items to be completed
429 CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
432 const size_t * /* global_work_offset */,
433 const size_t * global_work_size ,
434 const size_t * /* local_work_size */,
435 cl_uint /* num_events_in_wait_list */,
436 const cl_event * /* event_wait_list */,
437 cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0
441 MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
442 for (unsigned int ii=0;ii<work_dim;ii++)
444 int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
445 int numWorkItems = global_work_size[ii];
447 // //at minimum 64 work items per task
448 // int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
449 int numWorkItemsPerTask = numWorkItems / maxTask;
450 if (!numWorkItemsPerTask) numWorkItemsPerTask = 1;
452 for (int t=0;t<numWorkItems;)
454 //Performance Hint: tweak this number during benchmarking
455 int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
456 kernel->m_scheduler->issueTask(t, endIndex, kernel);
464 scheduler->issueTask(bla,2,3);
472 #define LOCAL_BUF_SIZE 32768
473 static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16];
474 static int* spLocalBufCurr = NULL;
475 static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call
476 static void* localBufMalloc(int size)
478 int size16 = (size + 15) >> 4; // in 16-byte units
479 if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE)
481 spLocalBufCurr = sLocalMemBuf;
482 while((size_t)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes
485 void* ret = spLocalBufCurr;
486 spLocalBufCurr += size16 * 4;
487 sLocalBufUsed += size;
493 CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel clKernel ,
496 const void * arg_value ) CL_API_SUFFIX__VERSION_1_0
498 MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
499 btAssert(arg_size <= MINICL_MAX_ARGLENGTH);
500 if (arg_index>MINI_CL_MAX_ARG)
502 printf("error: clSetKernelArg arg_index (%u) exceeds %u\n",arg_index,MINI_CL_MAX_ARG);
505 if (arg_size>MINICL_MAX_ARGLENGTH)
506 //if (arg_size != MINICL_MAX_ARGLENGTH)
508 printf("error: clSetKernelArg argdata too large: %zu (maximum is %zu)\n",arg_size,MINICL_MAX_ARGLENGTH);
512 if(arg_value == NULL)
513 { // this is only for __local memory qualifier
514 void* ptr = localBufMalloc(arg_size);
515 kernel->m_argData[arg_index] = ptr;
519 memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size);
521 kernel->m_argSizes[arg_index] = arg_size;
522 if(arg_index >= kernel->m_numArgs)
524 kernel->m_numArgs = arg_index + 1;
525 kernel->updateLauncher();
532 // Kernel Object APIs
533 CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program ,
534 const char * kernel_name ,
535 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
537 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
538 int nameLen = strlen(kernel_name);
539 if(nameLen >= MINI_CL_MAX_KERNEL_NAME)
541 *errcode_ret = CL_INVALID_KERNEL_NAME;
545 MiniCLKernel* kernel = new MiniCLKernel();
547 strcpy(kernel->m_name, kernel_name);
548 kernel->m_numArgs = 0;
550 //kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
551 //if (kernel->m_kernelProgramCommandId>=0)
553 // *errcode_ret = CL_SUCCESS;
556 // *errcode_ret = CL_INVALID_KERNEL_NAME;
558 kernel->m_scheduler = scheduler;
559 if(kernel->registerSelf() == NULL)
561 *errcode_ret = CL_INVALID_KERNEL_NAME;
567 *errcode_ret = CL_SUCCESS;
570 return (cl_kernel)kernel;
575 CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */,
576 cl_uint /* num_devices */,
577 const cl_device_id * /* device_list */,
578 const char * /* options */,
579 void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
580 void * /* user_data */) CL_API_SUFFIX__VERSION_1_0
585 CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context ,
586 cl_uint /* num_devices */,
587 const cl_device_id * /* device_list */,
588 const size_t * /* lengths */,
589 const unsigned char ** /* binaries */,
590 cl_int * /* binary_status */,
591 cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
593 return (cl_program)context;
597 // Memory Object APIs
598 CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */,
602 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
604 cl_mem buf = (cl_mem)malloc(size);
605 if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
607 memcpy(buf,host_ptr,size);
613 // Command Queue APIs
614 CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context ,
615 cl_device_id /* device */,
616 cl_command_queue_properties /* properties */,
617 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
620 return (cl_command_queue) context;
623 extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */,
624 cl_context_info param_name ,
625 size_t param_value_size ,
627 size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
632 case CL_CONTEXT_DEVICES:
634 if (!param_value_size)
636 *param_value_size_ret = 13;
639 const char* testName = "MiniCL_Test.";
640 sprintf((char*)param_value,"%s",testName);
646 printf("unsupported\n");
655 CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties * /* properties */,
656 cl_device_type device_type ,
657 void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
658 void * /* user_data */,
659 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
661 int maxNumOutstandingTasks = 4;
662 // int maxNumOutstandingTasks = 2;
663 // int maxNumOutstandingTasks = 1;
664 gMiniCLNumOutstandingTasks = maxNumOutstandingTasks;
665 const int maxNumOfThreadSupports = 8;
666 static int sUniqueThreadSupportIndex = 0;
667 static const char* sUniqueThreadSupportName[maxNumOfThreadSupports] =
669 "MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7"
672 btThreadSupportInterface* threadSupport = 0;
674 if (device_type==CL_DEVICE_TYPE_DEBUG)
676 SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
677 threadSupport = new SequentialThreadSupport(stc);
682 btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
683 const char* bla = "MiniCL";
684 threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
686 sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
687 processMiniCLTask, //processCollisionTask,
688 createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
689 maxNumOutstandingTasks));
693 PosixThreadSupport::ThreadConstructionInfo constructionInfo("PosixThreads",
695 createMiniCLLocalStoreMemory,
696 maxNumOutstandingTasks);
697 threadSupport = new PosixThreadSupport(constructionInfo);
700 ///todo: add posix thread support for other platforms
701 SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
702 threadSupport = new SequentialThreadSupport(stc);
703 #endif //USE_PTHREADS
709 MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
712 return (cl_context)scheduler;
715 CL_API_ENTRY cl_int CL_API_CALL
716 clGetDeviceIDs(cl_platform_id /* platform */,
717 cl_device_type /* device_type */,
718 cl_uint /* num_entries */,
719 cl_device_id * /* devices */,
720 cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0
725 CL_API_ENTRY cl_context CL_API_CALL
726 clCreateContext(const cl_context_properties * properties ,
727 cl_uint num_devices ,
728 const cl_device_id * devices ,
729 void (*pfn_notify)(const char *, const void *, size_t, void *),
731 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
734 return clCreateContextFromType(properties,CL_DEVICE_TYPE_ALL,pfn_notify,user_data,errcode_ret);
737 CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context ) CL_API_SUFFIX__VERSION_1_0
740 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
742 btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
744 delete threadSupport;
748 extern CL_API_ENTRY cl_int CL_API_CALL
749 clFinish(cl_command_queue command_queue ) CL_API_SUFFIX__VERSION_1_0
751 MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
752 ///wait for all work items to be completed
757 extern CL_API_ENTRY cl_int CL_API_CALL
758 clGetProgramInfo(cl_program /* program */,
759 cl_program_info /* param_name */,
760 size_t /* param_value_size */,
761 void * /* param_value */,
762 size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
767 extern CL_API_ENTRY cl_int CL_API_CALL
768 clGetKernelWorkGroupInfo(cl_kernel kernel ,
769 cl_device_id /* device */,
770 cl_kernel_work_group_info wgi/* param_name */,
771 size_t sz /* param_value_size */,
772 void * ptr /* param_value */,
773 size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
775 if((wgi == CL_KERNEL_WORK_GROUP_SIZE)
776 &&(sz == sizeof(size_t))
779 MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel;
780 MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler;
781 *((size_t*)ptr) = scheduler->getMaxNumOutstandingTasks();
786 return CL_INVALID_VALUE;