2 Copyright (c) 2012 Advanced Micro Devices, Inc.
\r
4 This software is provided 'as-is', without any express or implied warranty.
\r
5 In no event will the authors be held liable for any damages arising from the use of this software.
\r
6 Permission is granted to anyone to use this software for any purpose,
\r
7 including commercial applications, and to alter it and redistribute it freely,
\r
8 subject to the following restrictions:
\r
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
\r
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
\r
12 3. This notice may not be removed or altered from any source distribution.
\r
14 //Originally written by Erwin Coumans
\r
16 #include "OpenGLInclude.h"
\r
18 #include "CLPhysicsDemo.h"
\r
19 #include "LinearMath/btAlignedObjectArray.h"
\r
20 #include "DemoSettings.h"
\r
21 #include "../basic_initialize/btOpenCLUtils.h"
\r
22 #include "../opengl_interop/btOpenCLGLInteropBuffer.h"
\r
23 #include "../broadphase_benchmark/findPairsOpenCL.h"
\r
24 #include "LinearMath/btVector3.h"
\r
25 #include "LinearMath/btQuaternion.h"
\r
26 #include "LinearMath/btMatrix3x3.h"
\r
27 #include "../../opencl/gpu_rigidbody_pipeline/btGpuNarrowPhaseAndSolver.h"
\r
28 #include "../../opencl/gpu_rigidbody_pipeline/btConvexUtility.h"
\r
29 #include "../../dynamics/basic_demo/ConvexHeightFieldShape.h"
\r
30 #include "../broadphase_benchmark/btGridBroadphaseCl.h"
\r
31 #include "LinearMath/btQuickprof.h"
\r
34 #define MSTRINGIFY(A) #A
\r
35 static char* interopKernelString =
\r
36 #include "../broadphase_benchmark/integrateKernel.cl"
\r
38 #define INTEROPKERNEL_SRC_PATH "../../opencl/broadphase_benchmark/integrateKernel.cl"
\r
40 cl_kernel g_integrateTransformsKernel;
\r
44 bool runOpenCLKernels = true;
\r
47 btGpuNarrowphaseAndSolver* narrowphaseAndSolver = 0;
\r
48 ConvexHeightField* s_convexHeightField = 0 ;
\r
49 btOpenCLGLInteropBuffer* g_interopBuffer = 0;
\r
51 extern GLuint cube_vbo;
\r
56 cl_bool blocking= CL_TRUE;
\r
60 btFindPairsIO gFpIO;
\r
62 cl_context g_cxMainContext;
\r
63 cl_command_queue g_cqCommandQue;
\r
64 cl_device_id g_device;
\r
66 cl_mem gLinVelMem=0;
\r
67 cl_mem gAngVelMem=0;
\r
68 cl_mem gBodyTimes=0;
\r
70 #include <Adl/Adl.h>
\r
72 adl::DeviceCL* g_deviceCL=0;
\r
74 struct btAABBHost //keep this in sync with btAABBCL!
\r
84 adl::Buffer<btVector3>* m_linVelBuf;
\r
85 adl::Buffer<btVector3>* m_angVelBuf;
\r
86 adl::Buffer<float>* m_bodyTimes;
\r
88 btGridBroadphaseCl* m_Broadphase;
\r
90 adl::Buffer<btAABBHost>* m_localShapeAABB;
\r
92 btVector3* m_linVelHost;
\r
93 btVector3* m_angVelHost;
\r
94 float* m_bodyTimesHost;
\r
96 InternalData():m_linVelBuf(0),m_angVelBuf(0),m_bodyTimes(0),m_useInterop(0),m_Broadphase(0)
\r
98 m_linVelHost= new btVector3[MAX_CONVEX_BODIES_CL];
\r
99 m_angVelHost = new btVector3[MAX_CONVEX_BODIES_CL];
\r
100 m_bodyTimesHost = new float[MAX_CONVEX_BODIES_CL];
\r
104 delete[] m_linVelHost;
\r
105 delete[] m_angVelHost;
\r
106 delete[] m_bodyTimesHost;
\r
112 void InitCL(int preferredDeviceIndex, int preferredPlatformIndex, bool useInterop)
\r
118 glCtx = wglGetCurrentContext();
\r
120 GLXContext glCtx = glXGetCurrentContext();
\r
122 glDC = wglGetCurrentDC();
\r
125 #ifdef CL_PLATFORM_INTEL
\r
126 cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
\r
128 cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
\r
135 g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
\r
138 g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
\r
142 oclCHECKERROR(ciErrNum, CL_SUCCESS);
\r
144 int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
\r
148 g_device= btOpenCLUtils::getDevice(g_cxMainContext,0);
\r
149 btOpenCLDeviceInfo clInfo;
\r
150 btOpenCLUtils::getDeviceInfo(g_device,clInfo);
\r
151 btOpenCLUtils::printDeviceInfo(g_device);
\r
152 g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);
\r
153 oclCHECKERROR(ciErrNum, CL_SUCCESS);
\r
161 CLPhysicsDemo::CLPhysicsDemo(Win32OpenGLWindow* renderer)
\r
163 m_numCollisionShapes=0;
\r
164 m_numPhysicsInstances=0;
\r
166 m_data = new InternalData;
\r
169 CLPhysicsDemo::~CLPhysicsDemo()
\r
175 void CLPhysicsDemo::writeBodiesToGpu()
\r
177 if (narrowphaseAndSolver)
\r
178 narrowphaseAndSolver->writeAllBodiesToGpu();
\r
181 int CLPhysicsDemo::registerCollisionShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
\r
183 btAlignedObjectArray<btVector3> verts;
\r
185 unsigned char* vts = (unsigned char*) vertices;
\r
186 for (int i=0;i<numVertices;i++)
\r
188 float* vertex = (float*) &vts[i*strideInBytes];
\r
189 verts.push_back(btVector3(vertex[0]*scaling[0],vertex[1]*scaling[1],vertex[2]*scaling[2]));
\r
192 btConvexUtility util;
\r
194 util.initializePolyhedralFeatures(verts,merge);
\r
196 int numFaces= util.m_faces.size();
\r
197 float4* eqn = new float4[numFaces];
\r
198 for (int i=0;i<numFaces;i++)
\r
200 eqn[i].x = util.m_faces[i].m_plane[0];
\r
201 eqn[i].y = util.m_faces[i].m_plane[1];
\r
202 eqn[i].z = util.m_faces[i].m_plane[2];
\r
203 eqn[i].w = util.m_faces[i].m_plane[3];
\r
205 printf("numFaces = %d\n", numFaces);
\r
208 s_convexHeightField = new ConvexHeightField(eqn,numFaces);
\r
212 if (narrowphaseAndSolver)
\r
213 shapeIndex = narrowphaseAndSolver->registerShape(s_convexHeightField);
\r
217 btAABBHost aabbMin, aabbMax;
\r
218 aabbMin.fx = s_convexHeightField->m_aabb.m_min.x;
\r
219 aabbMin.fy = s_convexHeightField->m_aabb.m_min.y;
\r
220 aabbMin.fz= s_convexHeightField->m_aabb.m_min.z;
\r
221 aabbMin.uw = shapeIndex;
\r
223 aabbMax.fx = s_convexHeightField->m_aabb.m_max.x;
\r
224 aabbMax.fy = s_convexHeightField->m_aabb.m_max.y;
\r
225 aabbMax.fz= s_convexHeightField->m_aabb.m_max.z;
\r
226 aabbMax.uw = shapeIndex;
\r
228 m_data->m_localShapeAABB->write(&aabbMin,1,shapeIndex*2);
\r
229 m_data->m_localShapeAABB->write(&aabbMax,1,shapeIndex*2+1);
\r
230 adl::DeviceUtils::waitForCompletion( g_deviceCL );
\r
233 m_numCollisionShapes++;
\r
238 int CLPhysicsDemo::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, void* userPointer)
\r
240 btVector3 aabbMin(position[0],position[0],position[0]);
\r
241 btVector3 aabbMax = aabbMin;
\r
242 aabbMin -= btVector3(1.f,1.f,1.f);
\r
243 aabbMax += btVector3(1.f,1.f,1.f);
\r
245 if (collisionShapeIndex>=0)
\r
247 btBroadphaseProxy* proxy = m_data->m_Broadphase->createProxy(aabbMin,aabbMax,collisionShapeIndex,userPointer,1,1,0,0);//m_dispatcher);
\r
250 bool writeToGpu = false;
\r
251 int bodyIndex = -1;
\r
253 if (narrowphaseAndSolver)
\r
254 bodyIndex = narrowphaseAndSolver->registerRigidBody(collisionShapeIndex,mass,position,orientation,writeToGpu);
\r
256 m_numPhysicsInstances++;
\r
262 void CLPhysicsDemo::init(int preferredDevice, int preferredPlatform, bool useInterop)
\r
265 InitCL(-1,-1,useInterop);
\r
267 #define CUSTOM_CL_INITIALIZATION
\r
268 #ifdef CUSTOM_CL_INITIALIZATION
\r
269 g_deviceCL = new adl::DeviceCL();
\r
270 g_deviceCL->m_deviceIdx = g_device;
\r
271 g_deviceCL->m_context = g_cxMainContext;
\r
272 g_deviceCL->m_commandQueue = g_cqCommandQue;
\r
273 g_deviceCL->m_kernelManager = new adl::KernelManager;
\r
276 DeviceUtils::Config cfg;
\r
277 cfg.m_type = DeviceUtils::Config::DEVICE_CPU;
\r
278 g_deviceCL = DeviceUtils::allocate( TYPE_CL, cfg );
\r
281 //adl::Solver<adl::TYPE_CL>::allocate(g_deviceCL->allocate(
\r
282 m_data->m_linVelBuf = new adl::Buffer<btVector3>(g_deviceCL,MAX_CONVEX_BODIES_CL);
\r
283 m_data->m_angVelBuf = new adl::Buffer<btVector3>(g_deviceCL,MAX_CONVEX_BODIES_CL);
\r
284 m_data->m_bodyTimes = new adl::Buffer<float>(g_deviceCL,MAX_CONVEX_BODIES_CL);
\r
286 m_data->m_localShapeAABB = new adl::Buffer<btAABBHost>(g_deviceCL,MAX_CONVEX_SHAPES_CL);
\r
288 gLinVelMem = (cl_mem)m_data->m_linVelBuf->m_ptr;
\r
289 gAngVelMem = (cl_mem)m_data->m_angVelBuf->m_ptr;
\r
290 gBodyTimes = (cl_mem)m_data->m_bodyTimes->m_ptr;
\r
295 narrowphaseAndSolver = new btGpuNarrowphaseAndSolver(g_deviceCL);
\r
299 int maxObjects = btMax(256,MAX_CONVEX_BODIES_CL);
\r
300 int maxPairsSmallProxy = 32;
\r
301 btOverlappingPairCache* overlappingPairCache=0;
\r
303 m_data->m_Broadphase = new btGridBroadphaseCl(overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
\r
304 g_cxMainContext ,g_device,g_cqCommandQue, g_deviceCL);
\r
308 cl_program prog = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,interopKernelString,0,"",INTEROPKERNEL_SRC_PATH);
\r
309 g_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,interopKernelString, "integrateTransformsKernel" ,0,prog);
\r
312 initFindPairs(gFpIO, g_cxMainContext, g_device, g_cqCommandQue, MAX_CONVEX_BODIES_CL);
\r
321 void CLPhysicsDemo::writeVelocitiesToGpu()
\r
323 m_data->m_linVelBuf->write(m_data->m_linVelHost,MAX_CONVEX_BODIES_CL);
\r
324 m_data->m_angVelBuf->write(m_data->m_angVelHost,MAX_CONVEX_BODIES_CL);
\r
325 m_data->m_bodyTimes->write(m_data->m_bodyTimesHost,MAX_CONVEX_BODIES_CL);
\r
326 adl::DeviceUtils::waitForCompletion( g_deviceCL );
\r
330 void CLPhysicsDemo::setupInterop()
\r
332 m_data->m_useInterop = true;
\r
334 g_interopBuffer = new btOpenCLGLInteropBuffer(g_cxMainContext,g_cqCommandQue,cube_vbo);
\r
335 clFinish(g_cqCommandQue);
\r
338 void CLPhysicsDemo::cleanup()
\r
340 delete narrowphaseAndSolver;
\r
342 delete m_data->m_linVelBuf;
\r
343 delete m_data->m_angVelBuf;
\r
344 delete m_data->m_bodyTimes;
\r
345 delete m_data->m_localShapeAABB;
\r
347 delete m_data->m_Broadphase;
\r
350 delete g_deviceCL->m_kernelManager;
\r
355 delete g_interopBuffer;
\r
356 delete s_convexHeightField;
\r
363 void CLPhysicsDemo::stepSimulation()
\r
365 BT_PROFILE("simulationLoop");
\r
368 BT_PROFILE("glFinish");
\r
371 cl_int ciErrNum = CL_SUCCESS;
\r
374 if(m_data->m_useInterop)
\r
376 clBuffer = g_interopBuffer->getCLBUffer();
\r
377 BT_PROFILE("clEnqueueAcquireGLObjects");
\r
378 ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
\r
379 adl::DeviceUtils::waitForCompletion( g_deviceCL );
\r
383 glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
\r
386 BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");
\r
389 hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
\r
392 clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);
\r
394 adl::DeviceUtils::waitForCompletion( g_deviceCL );
\r
395 oclCHECKERROR(ciErrNum, CL_SUCCESS);
\r
397 ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue,
\r
404 adl::DeviceUtils::waitForCompletion( g_deviceCL );
\r
409 oclCHECKERROR(ciErrNum, CL_SUCCESS);
\r
410 if (runOpenCLKernels && m_numPhysicsInstances)
\r
413 gFpIO.m_numObjects = m_numPhysicsInstances;
\r
414 gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;
\r
415 gFpIO.m_clObjectsBuffer = clBuffer;
\r
416 gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB;
\r
417 gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr;
\r
418 gFpIO.m_numOverlap = 0;
\r
420 BT_PROFILE("setupGpuAabbs");
\r
421 setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() );
\r
425 BT_PROFILE("calculateOverlappingPairs");
\r
426 m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances);
\r
427 gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs;
\r
428 gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum;
\r
431 //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
\r
432 if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
\r
434 colorPairsOpenCL(gFpIO);
\r
439 //BT_PROFILE("setupBodies");
\r
440 if (narrowphaseAndSolver)
\r
441 setupBodies(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
\r
443 if (gFpIO.m_numOverlap)
\r
445 BT_PROFILE("computeContactsAndSolver");
\r
446 if (narrowphaseAndSolver)
\r
447 narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap);
\r
451 BT_PROFILE("copyBodyVelocities");
\r
452 if (narrowphaseAndSolver)
\r
453 copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
\r
459 printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
\r
465 BT_PROFILE("integrateTransforms");
\r
467 if (runOpenCLKernels)
\r
469 int numObjects = m_numPhysicsInstances;
\r
470 int offset = SHAPE_VERTEX_BUFFER_SIZE/4;
\r
472 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);
\r
473 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);
\r
474 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
\r
476 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem);
\r
477 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem);
\r
478 ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes);
\r
483 size_t workGroupSize = 64;
\r
484 size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
\r
486 if (workGroupSize>numWorkItems)
\r
487 workGroupSize=numWorkItems;
\r
489 ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
\r
490 oclCHECKERROR(ciErrNum, CL_SUCCESS);
\r
497 if(m_data->m_useInterop)
\r
499 BT_PROFILE("clEnqueueReleaseGLObjects");
\r
500 ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
\r
501 adl::DeviceUtils::waitForCompletion( g_deviceCL );
\r
505 BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
\r
506 ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue,
\r
513 //clReleaseMemObject(clBuffer);
\r
514 adl::DeviceUtils::waitForCompletion( g_deviceCL );
\r
515 glUnmapBuffer( GL_ARRAY_BUFFER);
\r
519 oclCHECKERROR(ciErrNum, CL_SUCCESS);
\r
522 if (runOpenCLKernels)
\r
524 BT_PROFILE("clFinish");
\r
525 clFinish(g_cqCommandQue);
\r