Tizen 2.1 base
[platform/upstream/libbullet.git] / Extras / RigidBodyGpuPipeline / opencl / gpu_rigidbody_pipeline2 / CLPhysicsDemo.cpp
1 /*\r
2 Copyright (c) 2012 Advanced Micro Devices, Inc.  \r
3 \r
4 This software is provided 'as-is', without any express or implied warranty.\r
5 In no event will the authors be held liable for any damages arising from the use of this software.\r
6 Permission is granted to anyone to use this software for any purpose, \r
7 including commercial applications, and to alter it and redistribute it freely, \r
8 subject to the following restrictions:\r
9 \r
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\r
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\r
12 3. This notice may not be removed or altered from any source distribution.\r
13 */\r
14 //Originally written by Erwin Coumans\r
15 \r
16 #include "OpenGLInclude.h"\r
17 \r
18 #include "CLPhysicsDemo.h"\r
19 #include "LinearMath/btAlignedObjectArray.h"\r
20 #include "DemoSettings.h"\r
21 #include "../basic_initialize/btOpenCLUtils.h"\r
22 #include "../opengl_interop/btOpenCLGLInteropBuffer.h"\r
23 #include "../broadphase_benchmark/findPairsOpenCL.h"\r
24 #include "LinearMath/btVector3.h"\r
25 #include "LinearMath/btQuaternion.h"\r
26 #include "LinearMath/btMatrix3x3.h"\r
27 #include "../../opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h"\r
28 #include "../../opencl/gpu_rigidbody_pipeline/btConvexUtility.h"\r
29 #include "../../dynamics/basic_demo/ConvexHeightFieldShape.h"\r
30 #include "../broadphase_benchmark/btGridBroadphaseCl.h"\r
31 #include "LinearMath/btQuickprof.h"\r
32 \r
33 \r
34 #define MSTRINGIFY(A) #A\r
35 static char* interopKernelString = \r
36 #include "../broadphase_benchmark/integrateKernel.cl"\r
37 \r
38 #define INTEROPKERNEL_SRC_PATH "../../opencl/broadphase_benchmark/integrateKernel.cl"\r
39         \r
40 cl_kernel g_integrateTransformsKernel;\r
41 \r
42 \r
43 \r
44 bool runOpenCLKernels = true;\r
45 \r
46 \r
47 btGpuNarrowphaseAndSolver* narrowphaseAndSolver = 0;\r
48 ConvexHeightField* s_convexHeightField = 0 ;\r
49 btOpenCLGLInteropBuffer* g_interopBuffer = 0;\r
50 \r
51 extern GLuint               cube_vbo;\r
52 extern int VBOsize;\r
53 \r
54 cl_mem clBuffer=0;\r
55 char* hostPtr=0;\r
56 cl_bool blocking=  CL_TRUE;\r
57 \r
58 \r
59 \r
60 btFindPairsIO gFpIO;\r
61 \r
62 cl_context                      g_cxMainContext;\r
63 cl_command_queue        g_cqCommandQue;\r
64 cl_device_id            g_device;\r
65 \r
66 cl_mem                          gLinVelMem=0;\r
67 cl_mem                          gAngVelMem=0;\r
68 cl_mem                          gBodyTimes=0;\r
69 \r
70 #include <Adl/Adl.h>\r
71 \r
72 adl::DeviceCL* g_deviceCL=0;\r
73 \r
74 struct  btAABBHost //keep this in sync with btAABBCL!\r
75 {\r
76         float                   fx;\r
77         float                   fy;\r
78         float                   fz;\r
79         unsigned int    uw;\r
80 };\r
81 \r
82 struct InternalData\r
83 {\r
84         adl::Buffer<btVector3>* m_linVelBuf;\r
85         adl::Buffer<btVector3>* m_angVelBuf;\r
86         adl::Buffer<float>* m_bodyTimes;\r
87         bool    m_useInterop;\r
88         btGridBroadphaseCl* m_Broadphase;\r
89 \r
90         adl::Buffer<btAABBHost>* m_localShapeAABB;\r
91 \r
92         btVector3*      m_linVelHost;\r
93         btVector3*      m_angVelHost;\r
94         float*          m_bodyTimesHost;\r
95 \r
96         InternalData():m_linVelBuf(0),m_angVelBuf(0),m_bodyTimes(0),m_useInterop(0),m_Broadphase(0)\r
97         {\r
98                 m_linVelHost= new btVector3[MAX_CONVEX_BODIES_CL];\r
99                 m_angVelHost = new btVector3[MAX_CONVEX_BODIES_CL];\r
100                 m_bodyTimesHost = new float[MAX_CONVEX_BODIES_CL];\r
101         }\r
102         ~InternalData()\r
103         {\r
104                 delete[] m_linVelHost;\r
105                 delete[] m_angVelHost;\r
106                 delete[] m_bodyTimesHost;\r
107 \r
108         }\r
109 };\r
110 \r
111 \r
112 void InitCL(int preferredDeviceIndex, int preferredPlatformIndex, bool useInterop)\r
113 {\r
114         void* glCtx=0;\r
115         void* glDC = 0;\r
116 \r
117 #ifdef _WIN32\r
118         glCtx = wglGetCurrentContext();\r
119 #else //!_WIN32\r
120         GLXContext glCtx = glXGetCurrentContext();\r
121 #endif //!_WIN32\r
122         glDC = wglGetCurrentDC();\r
123 \r
124         int ciErrNum = 0;\r
125 #ifdef CL_PLATFORM_INTEL\r
126         cl_device_type deviceType = CL_DEVICE_TYPE_ALL;\r
127 #else\r
128         cl_device_type deviceType = CL_DEVICE_TYPE_GPU;\r
129 #endif\r
130 \r
131         \r
132 \r
133         if (useInterop)\r
134         {\r
135                 g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);\r
136         } else\r
137         {\r
138                 g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);\r
139         }\r
140 \r
141 \r
142         oclCHECKERROR(ciErrNum, CL_SUCCESS);\r
143 \r
144         int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);\r
145 \r
146         if (numDev>0)\r
147         {\r
148                 g_device= btOpenCLUtils::getDevice(g_cxMainContext,0);\r
149                 btOpenCLDeviceInfo clInfo;\r
150                 btOpenCLUtils::getDeviceInfo(g_device,clInfo);\r
151                 btOpenCLUtils::printDeviceInfo(g_device);\r
152                 g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);\r
153                 oclCHECKERROR(ciErrNum, CL_SUCCESS);\r
154         }\r
155 \r
156 }\r
157 \r
158 \r
159 \r
160 \r
161 CLPhysicsDemo::CLPhysicsDemo(Win32OpenGLWindow* renderer)\r
162 {\r
163         m_numCollisionShapes=0;\r
164         m_numPhysicsInstances=0;\r
165 \r
166         m_data = new InternalData;\r
167 }\r
168 \r
169 CLPhysicsDemo::~CLPhysicsDemo()\r
170 {\r
171 \r
172 }\r
173 \r
174 \r
175 void CLPhysicsDemo::writeBodiesToGpu()\r
176 {\r
177         if (narrowphaseAndSolver)\r
178                 narrowphaseAndSolver->writeAllBodiesToGpu();\r
179 }\r
180 \r
181 int             CLPhysicsDemo::registerCollisionShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)\r
182 {\r
183         btAlignedObjectArray<btVector3> verts;\r
184         \r
185         unsigned char* vts = (unsigned char*) vertices;\r
186         for (int i=0;i<numVertices;i++)\r
187         {\r
188                 float* vertex = (float*) &vts[i*strideInBytes];\r
189                 verts.push_back(btVector3(vertex[0]*scaling[0],vertex[1]*scaling[1],vertex[2]*scaling[2]));\r
190         }\r
191 \r
192         btConvexUtility util;\r
193         bool merge = true;\r
194         util.initializePolyhedralFeatures(verts,merge);\r
195 \r
196         int numFaces= util.m_faces.size();\r
197         float4* eqn = new float4[numFaces];\r
198         for (int i=0;i<numFaces;i++)\r
199         {\r
200                 eqn[i].x = util.m_faces[i].m_plane[0];\r
201                 eqn[i].y = util.m_faces[i].m_plane[1];\r
202                 eqn[i].z = util.m_faces[i].m_plane[2];\r
203                 eqn[i].w = util.m_faces[i].m_plane[3];\r
204         }\r
205         printf("numFaces = %d\n", numFaces);\r
206 \r
207 \r
208         s_convexHeightField = new ConvexHeightField(eqn,numFaces);\r
209 \r
210         int shapeIndex=-1;\r
211 \r
212         if (narrowphaseAndSolver)\r
213                 shapeIndex = narrowphaseAndSolver->registerShape(s_convexHeightField);\r
214 \r
215         if (shapeIndex>=0)\r
216         {\r
217                 btAABBHost aabbMin, aabbMax;\r
218                 aabbMin.fx = s_convexHeightField->m_aabb.m_min.x;\r
219                 aabbMin.fy = s_convexHeightField->m_aabb.m_min.y;\r
220                 aabbMin.fz= s_convexHeightField->m_aabb.m_min.z;\r
221                 aabbMin.uw = shapeIndex;\r
222 \r
223                 aabbMax.fx = s_convexHeightField->m_aabb.m_max.x;\r
224                 aabbMax.fy = s_convexHeightField->m_aabb.m_max.y;\r
225                 aabbMax.fz= s_convexHeightField->m_aabb.m_max.z;\r
226                 aabbMax.uw = shapeIndex;\r
227 \r
228                 m_data->m_localShapeAABB->write(&aabbMin,1,shapeIndex*2);\r
229                 m_data->m_localShapeAABB->write(&aabbMax,1,shapeIndex*2+1);\r
230                 adl::DeviceUtils::waitForCompletion( g_deviceCL );\r
231         }\r
232 \r
233         m_numCollisionShapes++;\r
234         delete[] eqn;\r
235         return shapeIndex;\r
236 }\r
237 \r
238 int             CLPhysicsDemo::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, void* userPointer)\r
239 {\r
240         btVector3 aabbMin(position[0],position[0],position[0]);\r
241         btVector3 aabbMax = aabbMin;\r
242         aabbMin -= btVector3(1.f,1.f,1.f);\r
243         aabbMax += btVector3(1.f,1.f,1.f);\r
244 \r
245         if (collisionShapeIndex>=0)\r
246         {\r
247                 btBroadphaseProxy* proxy = m_data->m_Broadphase->createProxy(aabbMin,aabbMax,collisionShapeIndex,userPointer,1,1,0,0);//m_dispatcher);\r
248         }\r
249                         \r
250         bool writeToGpu = false;\r
251         int bodyIndex = -1;\r
252 \r
253         if (narrowphaseAndSolver)\r
254                 bodyIndex = narrowphaseAndSolver->registerRigidBody(collisionShapeIndex,mass,position,orientation,writeToGpu);\r
255 \r
256         m_numPhysicsInstances++;\r
257         return bodyIndex;\r
258 }\r
259 \r
260 \r
261 \r
262 void    CLPhysicsDemo::init(int preferredDevice, int preferredPlatform, bool useInterop)\r
263 {\r
264         \r
265         InitCL(-1,-1,useInterop);\r
266 \r
267 #define CUSTOM_CL_INITIALIZATION\r
268 #ifdef CUSTOM_CL_INITIALIZATION\r
269         g_deviceCL = new adl::DeviceCL();\r
270         g_deviceCL->m_deviceIdx = g_device;\r
271         g_deviceCL->m_context = g_cxMainContext;\r
272         g_deviceCL->m_commandQueue = g_cqCommandQue;\r
273         g_deviceCL->m_kernelManager = new adl::KernelManager;\r
274 \r
275 #else\r
276         DeviceUtils::Config cfg;\r
277         cfg.m_type = DeviceUtils::Config::DEVICE_CPU;\r
278         g_deviceCL = DeviceUtils::allocate( TYPE_CL, cfg );\r
279 #endif\r
280 \r
281         //adl::Solver<adl::TYPE_CL>::allocate(g_deviceCL->allocate(\r
282         m_data->m_linVelBuf = new adl::Buffer<btVector3>(g_deviceCL,MAX_CONVEX_BODIES_CL);\r
283         m_data->m_angVelBuf = new adl::Buffer<btVector3>(g_deviceCL,MAX_CONVEX_BODIES_CL);\r
284         m_data->m_bodyTimes = new adl::Buffer<float>(g_deviceCL,MAX_CONVEX_BODIES_CL);\r
285 \r
286         m_data->m_localShapeAABB = new adl::Buffer<btAABBHost>(g_deviceCL,MAX_CONVEX_SHAPES_CL);\r
287         \r
288         gLinVelMem = (cl_mem)m_data->m_linVelBuf->m_ptr;\r
289         gAngVelMem = (cl_mem)m_data->m_angVelBuf->m_ptr;\r
290         gBodyTimes = (cl_mem)m_data->m_bodyTimes->m_ptr;\r
291 \r
292         \r
293 \r
294 \r
295         narrowphaseAndSolver = new btGpuNarrowphaseAndSolver(g_deviceCL);\r
296 \r
297         \r
298         \r
299         int maxObjects = btMax(256,MAX_CONVEX_BODIES_CL);\r
300         int maxPairsSmallProxy = 32;\r
301         btOverlappingPairCache* overlappingPairCache=0;\r
302 \r
303         m_data->m_Broadphase = new btGridBroadphaseCl(overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,\r
304                 g_cxMainContext ,g_device,g_cqCommandQue, g_deviceCL);\r
305 \r
306         \r
307 \r
308         cl_program prog = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,interopKernelString,0,"",INTEROPKERNEL_SRC_PATH);\r
309         g_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,interopKernelString, "integrateTransformsKernel" ,0,prog);\r
310         \r
311 \r
312         initFindPairs(gFpIO, g_cxMainContext, g_device, g_cqCommandQue, MAX_CONVEX_BODIES_CL);\r
313 \r
314         \r
315 \r
316 \r
317 }\r
318         \r
319 \r
320 \r
321 void CLPhysicsDemo::writeVelocitiesToGpu()\r
322 {\r
323         m_data->m_linVelBuf->write(m_data->m_linVelHost,MAX_CONVEX_BODIES_CL);\r
324         m_data->m_angVelBuf->write(m_data->m_angVelHost,MAX_CONVEX_BODIES_CL);\r
325         m_data->m_bodyTimes->write(m_data->m_bodyTimesHost,MAX_CONVEX_BODIES_CL);\r
326         adl::DeviceUtils::waitForCompletion( g_deviceCL );\r
327 }\r
328 \r
329 \r
330 void CLPhysicsDemo::setupInterop()\r
331 {\r
332         m_data->m_useInterop = true;\r
333 \r
334         g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo);\r
335         clFinish(g_cqCommandQue);\r
336 }\r
337 \r
338 void    CLPhysicsDemo::cleanup()\r
339 {\r
340         delete narrowphaseAndSolver;\r
341 \r
342         delete m_data->m_linVelBuf;\r
343         delete m_data->m_angVelBuf;\r
344         delete m_data->m_bodyTimes;\r
345         delete m_data->m_localShapeAABB;\r
346 \r
347         delete m_data->m_Broadphase;\r
348         delete m_data;\r
349 \r
350         delete g_deviceCL->m_kernelManager;\r
351         delete g_deviceCL;\r
352 \r
353         m_data=0;\r
354         g_deviceCL=0;\r
355         delete g_interopBuffer;\r
356         delete s_convexHeightField;\r
357 }\r
358 \r
359 \r
360 \r
361 \r
362 \r
363 void    CLPhysicsDemo::stepSimulation()\r
364 {\r
365         BT_PROFILE("simulationLoop");\r
366         \r
367         {\r
368                 BT_PROFILE("glFinish");\r
369                 glFinish();\r
370         }\r
371         cl_int ciErrNum = CL_SUCCESS;\r
372 \r
373 \r
374         if(m_data->m_useInterop)\r
375         {\r
376                 clBuffer = g_interopBuffer->getCLBUffer();\r
377                 BT_PROFILE("clEnqueueAcquireGLObjects");\r
378                 ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);\r
379                 adl::DeviceUtils::waitForCompletion( g_deviceCL );\r
380         } else\r
381         {\r
382 \r
383                 glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);\r
384                 glFlush();\r
385 \r
386                 BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");\r
387 \r
388                 blocking=  CL_TRUE;\r
389                 hostPtr=  (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY\r
390                 if (!clBuffer)\r
391                 {\r
392                         clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);\r
393                 } \r
394                 adl::DeviceUtils::waitForCompletion( g_deviceCL );\r
395                         oclCHECKERROR(ciErrNum, CL_SUCCESS);\r
396 \r
397                 ciErrNum = clEnqueueWriteBuffer (       g_cqCommandQue,\r
398                         clBuffer,\r
399                         blocking,\r
400                         0,\r
401                         VBOsize,\r
402                         hostPtr,0,0,0\r
403                 );\r
404                 adl::DeviceUtils::waitForCompletion( g_deviceCL );\r
405         }\r
406 \r
407 \r
408 \r
409         oclCHECKERROR(ciErrNum, CL_SUCCESS);\r
410         if (runOpenCLKernels && m_numPhysicsInstances)\r
411         {\r
412 \r
413                 gFpIO.m_numObjects = m_numPhysicsInstances;\r
414                 gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;\r
415                 gFpIO.m_clObjectsBuffer = clBuffer;\r
416                 gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB;\r
417                 gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr;\r
418                 gFpIO.m_numOverlap = 0;\r
419                 {\r
420                         BT_PROFILE("setupGpuAabbs");\r
421                         setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() );\r
422                 }\r
423                 if (1)\r
424                 {\r
425                         BT_PROFILE("calculateOverlappingPairs");\r
426                         m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances);\r
427                         gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs;\r
428                         gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum;\r
429                 }\r
430                 \r
431                 //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );\r
432                 if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)\r
433                 {\r
434                         colorPairsOpenCL(gFpIO);\r
435 \r
436                         if (1)\r
437                         {\r
438                                 {\r
439                                         //BT_PROFILE("setupBodies");\r
440                                         if (narrowphaseAndSolver)\r
441                                                 setupBodies(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());\r
442                                 }\r
443                                 if (gFpIO.m_numOverlap)\r
444                                 {\r
445                                         BT_PROFILE("computeContactsAndSolver");\r
446                                         if (narrowphaseAndSolver)\r
447                                                 narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap);\r
448                                 }\r
449 \r
450                                 {\r
451                                         BT_PROFILE("copyBodyVelocities");\r
452                                         if (narrowphaseAndSolver)\r
453                                                 copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());\r
454                                 }\r
455                         }\r
456 \r
457                 } else\r
458                 {\r
459                         printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);\r
460                         btAssert(0);\r
461                 }\r
462 \r
463 \r
464                 {\r
465                         BT_PROFILE("integrateTransforms");\r
466 \r
467                         if (runOpenCLKernels)\r
468                         {\r
469                                 int numObjects = m_numPhysicsInstances;\r
470                                 int offset = SHAPE_VERTEX_BUFFER_SIZE/4;\r
471 \r
472                                 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);\r
473                                 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);\r
474                                 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );\r
475 \r
476                                 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem);\r
477                                 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem);\r
478                                 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes);\r
479                                         \r
480                                         \r
481                                         \r
482 \r
483                                 size_t workGroupSize = 64;\r
484                                 size_t  numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);\r
485                                 \r
486                                 if (workGroupSize>numWorkItems)\r
487                                         workGroupSize=numWorkItems;\r
488 \r
489                                 ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);\r
490                                 oclCHECKERROR(ciErrNum, CL_SUCCESS);\r
491                         }\r
492                 }\r
493                         \r
494 \r
495         }\r
496 \r
497         if(m_data->m_useInterop)\r
498         {\r
499                 BT_PROFILE("clEnqueueReleaseGLObjects");\r
500                 ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);\r
501                 adl::DeviceUtils::waitForCompletion( g_deviceCL );\r
502         }\r
503         else\r
504         {\r
505                 BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");\r
506                 ciErrNum = clEnqueueReadBuffer (        g_cqCommandQue,\r
507                 clBuffer,\r
508                 blocking,\r
509                 0,\r
510                 VBOsize,\r
511                 hostPtr,0,0,0);\r
512 \r
513                 //clReleaseMemObject(clBuffer);\r
514                 adl::DeviceUtils::waitForCompletion( g_deviceCL );\r
515                 glUnmapBuffer( GL_ARRAY_BUFFER);\r
516                 glFlush();\r
517         }\r
518 \r
519         oclCHECKERROR(ciErrNum, CL_SUCCESS);\r
520 \r
521 \r
522         if (runOpenCLKernels)\r
523         {\r
524                 BT_PROFILE("clFinish");\r
525                 clFinish(g_cqCommandQue);\r
526         }\r
527 \r
528         \r
529 }\r