[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / RigidBody / b3GpuRigidBodyPipeline.cpp
1 /*
2 Copyright (c) 2013 Advanced Micro Devices, Inc.  
3
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose, 
7 including commercial applications, and to alter it and redistribute it freely, 
8 subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 //Originally written by Erwin Coumans
15
16 #include "b3GpuRigidBodyPipeline.h"
17 #include "b3GpuRigidBodyPipelineInternalData.h"
18 #include "kernels/integrateKernel.h"
19 #include "kernels/updateAabbsKernel.h"
20
21 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
22 #include "b3GpuNarrowPhase.h"
23 #include "Bullet3Geometry/b3AabbUtil.h"
24 #include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
25 #include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
26 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
27 #include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h"
28 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h"
29 #include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h"
30
31 //#define TEST_OTHER_GPU_SOLVER
32
33 #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl"
34 #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl"
35
36 bool useBullet2CpuSolver = true;
37
38 //choice of contact solver
39 bool gUseJacobi = false;
40 bool gUseDbvt = false;
41 bool gDumpContactStats = false;
42 bool gCalcWorldSpaceAabbOnCpu = false;
43 bool gUseCalculateOverlappingPairsHost = false;
44 bool gIntegrateOnCpu = false;
45 bool gClearPairsOnGpu = true;
46
47 #define TEST_OTHER_GPU_SOLVER 1
48 #ifdef TEST_OTHER_GPU_SOLVER
49 #include "b3GpuJacobiContactSolver.h"
50 #endif  //TEST_OTHER_GPU_SOLVER
51
52 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
53 #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
54 #include "Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h"
55
56 #include "b3GpuPgsContactSolver.h"
57 #include "b3Solver.h"
58
59 #include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"
60 #include "Bullet3OpenCL/Raycast/b3GpuRaycast.h"
61
62 #include "Bullet3Dynamics/shared/b3IntegrateTransforms.h"
63 #include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
64
65 b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx, cl_device_id device, cl_command_queue q, class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config)
66 {
67         m_data = new b3GpuRigidBodyPipelineInternalData;
68         m_data->m_constraintUid = 0;
69         m_data->m_config = config;
70         m_data->m_context = ctx;
71         m_data->m_device = device;
72         m_data->m_queue = q;
73
74         m_data->m_solver = new b3PgsJacobiSolver(true);                            //new b3PgsJacobiSolver(true);
75         m_data->m_gpuSolver = new b3GpuPgsConstraintSolver(ctx, device, q, true);  //new b3PgsJacobiSolver(true);
76
77         m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx, q, config.m_maxConvexBodies);
78         m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx, q, config.m_maxBroadphasePairs);
79
80         m_data->m_gpuConstraints = new b3OpenCLArray<b3GpuGenericConstraint>(ctx, q);
81 #ifdef TEST_OTHER_GPU_SOLVER
82         m_data->m_solver3 = new b3GpuJacobiContactSolver(ctx, device, q, config.m_maxBroadphasePairs);
83 #endif  //      TEST_OTHER_GPU_SOLVER
84
85         m_data->m_solver2 = new b3GpuPgsContactSolver(ctx, device, q, config.m_maxBroadphasePairs);
86
87         m_data->m_raycaster = new b3GpuRaycast(ctx, device, q);
88
89         m_data->m_broadphaseDbvt = broadphaseDbvt;
90         m_data->m_broadphaseSap = broadphaseSap;
91         m_data->m_narrowphase = narrowphase;
92         m_data->m_gravity.setValue(0.f, -9.8f, 0.f);
93
94         cl_int errNum = 0;
95
96         {
97                 cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, integrateKernelCL, &errNum, "", B3_RIGIDBODY_INTEGRATE_PATH);
98                 b3Assert(errNum == CL_SUCCESS);
99                 m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, integrateKernelCL, "integrateTransformsKernel", &errNum, prog);
100                 b3Assert(errNum == CL_SUCCESS);
101                 clReleaseProgram(prog);
102         }
103         {
104                 cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, &errNum, "", B3_RIGIDBODY_UPDATEAABB_PATH);
105                 b3Assert(errNum == CL_SUCCESS);
106                 m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "initializeGpuAabbsFull", &errNum, prog);
107                 b3Assert(errNum == CL_SUCCESS);
108
109                 m_data->m_clearOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "clearOverlappingPairsKernel", &errNum, prog);
110                 b3Assert(errNum == CL_SUCCESS);
111
112                 clReleaseProgram(prog);
113         }
114 }
115
116 b3GpuRigidBodyPipeline::~b3GpuRigidBodyPipeline()
117 {
118         if (m_data->m_integrateTransformsKernel)
119                 clReleaseKernel(m_data->m_integrateTransformsKernel);
120
121         if (m_data->m_updateAabbsKernel)
122                 clReleaseKernel(m_data->m_updateAabbsKernel);
123
124         if (m_data->m_clearOverlappingPairsKernel)
125                 clReleaseKernel(m_data->m_clearOverlappingPairsKernel);
126         delete m_data->m_raycaster;
127         delete m_data->m_solver;
128         delete m_data->m_allAabbsGPU;
129         delete m_data->m_gpuConstraints;
130         delete m_data->m_overlappingPairsGPU;
131
132 #ifdef TEST_OTHER_GPU_SOLVER
133         delete m_data->m_solver3;
134 #endif  //TEST_OTHER_GPU_SOLVER
135
136         delete m_data->m_solver2;
137
138         delete m_data;
139 }
140
141 void b3GpuRigidBodyPipeline::reset()
142 {
143         m_data->m_gpuConstraints->resize(0);
144         m_data->m_cpuConstraints.resize(0);
145         m_data->m_allAabbsGPU->resize(0);
146         m_data->m_allAabbsCPU.resize(0);
147 }
148
149 void b3GpuRigidBodyPipeline::addConstraint(b3TypedConstraint* constraint)
150 {
151         m_data->m_joints.push_back(constraint);
152 }
153
154 void b3GpuRigidBodyPipeline::removeConstraint(b3TypedConstraint* constraint)
155 {
156         m_data->m_joints.remove(constraint);
157 }
158
159 void b3GpuRigidBodyPipeline::removeConstraintByUid(int uid)
160 {
161         m_data->m_gpuSolver->recomputeBatches();
162         //slow linear search
163         m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints);
164         //remove
165         for (int i = 0; i < m_data->m_cpuConstraints.size(); i++)
166         {
167                 if (m_data->m_cpuConstraints[i].m_uid == uid)
168                 {
169                         //m_data->m_cpuConstraints.remove(m_data->m_cpuConstraints[i]);
170                         m_data->m_cpuConstraints.swap(i, m_data->m_cpuConstraints.size() - 1);
171                         m_data->m_cpuConstraints.pop_back();
172
173                         break;
174                 }
175         }
176
177         if (m_data->m_cpuConstraints.size())
178         {
179                 m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints);
180         }
181         else
182         {
183                 m_data->m_gpuConstraints->resize(0);
184         }
185 }
186 int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold)
187 {
188         m_data->m_gpuSolver->recomputeBatches();
189         b3GpuGenericConstraint c;
190         c.m_uid = m_data->m_constraintUid;
191         m_data->m_constraintUid++;
192         c.m_flags = B3_CONSTRAINT_FLAG_ENABLED;
193         c.m_rbA = bodyA;
194         c.m_rbB = bodyB;
195         c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]);
196         c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]);
197         c.m_breakingImpulseThreshold = breakingThreshold;
198         c.m_constraintType = B3_GPU_POINT2POINT_CONSTRAINT_TYPE;
199         m_data->m_cpuConstraints.push_back(c);
200         return c.m_uid;
201 }
202 int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold)
203 {
204         m_data->m_gpuSolver->recomputeBatches();
205         b3GpuGenericConstraint c;
206         c.m_uid = m_data->m_constraintUid;
207         m_data->m_constraintUid++;
208         c.m_flags = B3_CONSTRAINT_FLAG_ENABLED;
209         c.m_rbA = bodyA;
210         c.m_rbB = bodyB;
211         c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]);
212         c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]);
213         c.m_relTargetAB.setValue(relTargetAB[0], relTargetAB[1], relTargetAB[2], relTargetAB[3]);
214         c.m_breakingImpulseThreshold = breakingThreshold;
215         c.m_constraintType = B3_GPU_FIXED_CONSTRAINT_TYPE;
216
217         m_data->m_cpuConstraints.push_back(c);
218         return c.m_uid;
219 }
220
221 void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
222 {
223         //update worldspace AABBs from local AABB/worldtransform
224         {
225                 B3_PROFILE("setupGpuAabbs");
226                 setupGpuAabbsFull();
227         }
228
229         int numPairs = 0;
230
231         //compute overlapping pairs
232         {
233                 if (gUseDbvt)
234                 {
235                         {
236                                 B3_PROFILE("setAabb");
237                                 m_data->m_allAabbsGPU->copyToHost(m_data->m_allAabbsCPU);
238                                 for (int i = 0; i < m_data->m_allAabbsCPU.size(); i++)
239                                 {
240                                         b3Vector3 aabbMin = b3MakeVector3(m_data->m_allAabbsCPU[i].m_min[0], m_data->m_allAabbsCPU[i].m_min[1], m_data->m_allAabbsCPU[i].m_min[2]);
241                                         b3Vector3 aabbMax = b3MakeVector3(m_data->m_allAabbsCPU[i].m_max[0], m_data->m_allAabbsCPU[i].m_max[1], m_data->m_allAabbsCPU[i].m_max[2]);
242                                         m_data->m_broadphaseDbvt->setAabb(i, aabbMin, aabbMax, 0);
243                                 }
244                         }
245
246                         {
247                                 B3_PROFILE("calculateOverlappingPairs");
248                                 m_data->m_broadphaseDbvt->calculateOverlappingPairs();
249                         }
250                         numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs();
251                 }
252                 else
253                 {
254                         if (gUseCalculateOverlappingPairsHost)
255                         {
256                                 m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs);
257                         }
258                         else
259                         {
260                                 m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs);
261                         }
262                         numPairs = m_data->m_broadphaseSap->getNumOverlap();
263                 }
264         }
265
266         //compute contact points
267         //      printf("numPairs=%d\n",numPairs);
268
269         int numContacts = 0;
270
271         int numBodies = m_data->m_narrowphase->getNumRigidBodies();
272
273         if (numPairs)
274         {
275                 cl_mem pairs = 0;
276                 cl_mem aabbsWS = 0;
277                 if (gUseDbvt)
278                 {
279                         B3_PROFILE("m_overlappingPairsGPU->copyFromHost");
280                         m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
281                         pairs = m_data->m_overlappingPairsGPU->getBufferCL();
282                         aabbsWS = m_data->m_allAabbsGPU->getBufferCL();
283                 }
284                 else
285                 {
286                         pairs = m_data->m_broadphaseSap->getOverlappingPairBuffer();
287                         aabbsWS = m_data->m_broadphaseSap->getAabbBufferWS();
288                 }
289
290                 m_data->m_overlappingPairsGPU->resize(numPairs);
291
292                 //mark the contacts for each pair as 'unused'
293                 if (numPairs)
294                 {
295                         b3OpenCLArray<b3BroadphasePair> gpuPairs(this->m_data->m_context, m_data->m_queue);
296                         gpuPairs.setFromOpenCLBuffer(pairs, numPairs);
297
298                         if (gClearPairsOnGpu)
299                         {
300                                 //b3AlignedObjectArray<b3BroadphasePair> hostPairs;//just for debugging
301                                 //gpuPairs.copyToHost(hostPairs);
302
303                                 b3LauncherCL launcher(m_data->m_queue, m_data->m_clearOverlappingPairsKernel, "clearOverlappingPairsKernel");
304                                 launcher.setBuffer(pairs);
305                                 launcher.setConst(numPairs);
306                                 launcher.launch1D(numPairs);
307
308                                 //gpuPairs.copyToHost(hostPairs);
309                         }
310                         else
311                         {
312                                 b3AlignedObjectArray<b3BroadphasePair> hostPairs;
313                                 gpuPairs.copyToHost(hostPairs);
314
315                                 for (int i = 0; i < hostPairs.size(); i++)
316                                 {
317                                         hostPairs[i].z = 0xffffffff;
318                                 }
319
320                                 gpuPairs.copyFromHost(hostPairs);
321                         }
322                 }
323
324                 m_data->m_narrowphase->computeContacts(pairs, numPairs, aabbsWS, numBodies);
325                 numContacts = m_data->m_narrowphase->getNumContactsGpu();
326
327                 if (gUseDbvt)
328                 {
329                         ///store the cached information (contact locations in the 'z' component)
330                         B3_PROFILE("m_overlappingPairsGPU->copyToHost");
331                         m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
332                 }
333                 if (gDumpContactStats && numContacts)
334                 {
335                         m_data->m_narrowphase->getContactsGpu();
336
337                         printf("numContacts = %d\n", numContacts);
338
339                         int totalPoints = 0;
340                         const b3Contact4* contacts = m_data->m_narrowphase->getContactsCPU();
341
342                         for (int i = 0; i < numContacts; i++)
343                         {
344                                 totalPoints += contacts->getNPoints();
345                         }
346                         printf("totalPoints=%d\n", totalPoints);
347                 }
348         }
349
350         //convert contact points to contact constraints
351
352         //solve constraints
353
354         b3OpenCLArray<b3RigidBodyData> gpuBodies(m_data->m_context, m_data->m_queue, 0, true);
355         gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(), m_data->m_narrowphase->getNumRigidBodies());
356         b3OpenCLArray<b3InertiaData> gpuInertias(m_data->m_context, m_data->m_queue, 0, true);
357         gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(), m_data->m_narrowphase->getNumRigidBodies());
358         b3OpenCLArray<b3Contact4> gpuContacts(m_data->m_context, m_data->m_queue, 0, true);
359         gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(), m_data->m_narrowphase->getNumContactsGpu());
360
361         int numJoints = m_data->m_joints.size() ? m_data->m_joints.size() : m_data->m_cpuConstraints.size();
362         if (useBullet2CpuSolver && numJoints)
363         {
364                 //      b3AlignedObjectArray<b3Contact4> hostContacts;
365                 //gpuContacts.copyToHost(hostContacts);
366                 {
367                         bool useGpu = m_data->m_joints.size() == 0;
368
369                         //                      b3Contact4* contacts = numContacts? &hostContacts[0]: 0;
370                         //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints);
371                         if (useGpu)
372                         {
373                                 m_data->m_gpuSolver->solveJoints(m_data->m_narrowphase->getNumRigidBodies(), &gpuBodies, &gpuInertias, numJoints, m_data->m_gpuConstraints);
374                         }
375                         else
376                         {
377                                 b3AlignedObjectArray<b3RigidBodyData> hostBodies;
378                                 gpuBodies.copyToHost(hostBodies);
379                                 b3AlignedObjectArray<b3InertiaData> hostInertias;
380                                 gpuInertias.copyToHost(hostInertias);
381
382                                 b3TypedConstraint** joints = numJoints ? &m_data->m_joints[0] : 0;
383                                 m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(), &hostBodies[0], &hostInertias[0], 0, 0, numJoints, joints);
384                                 gpuBodies.copyFromHost(hostBodies);
385                         }
386                 }
387         }
388
389         if (numContacts)
390         {
391 #ifdef TEST_OTHER_GPU_SOLVER
392
393                 if (gUseJacobi)
394                 {
395                         bool useGpu = true;
396                         if (useGpu)
397                         {
398                                 bool forceHost = false;
399                                 if (forceHost)
400                                 {
401                                         b3AlignedObjectArray<b3RigidBodyData> hostBodies;
402                                         b3AlignedObjectArray<b3InertiaData> hostInertias;
403                                         b3AlignedObjectArray<b3Contact4> hostContacts;
404
405                                         {
406                                                 B3_PROFILE("copyToHost");
407                                                 gpuBodies.copyToHost(hostBodies);
408                                                 gpuInertias.copyToHost(hostInertias);
409                                                 gpuContacts.copyToHost(hostContacts);
410                                         }
411
412                                         {
413                                                 b3JacobiSolverInfo solverInfo;
414                                                 m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(), &hostContacts[0], hostContacts.size(), solverInfo);
415                                         }
416                                         {
417                                                 B3_PROFILE("copyFromHost");
418                                                 gpuBodies.copyFromHost(hostBodies);
419                                         }
420                                 }
421                                 else
422
423                                 {
424                                         int static0Index = m_data->m_narrowphase->getStatic0Index();
425                                         b3JacobiSolverInfo solverInfo;
426                                         //m_data->m_solver3->solveContacts(    >solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo);
427                                         //m_data->m_solver3->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
428                                         m_data->m_solver3->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index);
429                                 }
430                         }
431                         else
432                         {
433                                 b3AlignedObjectArray<b3RigidBodyData> hostBodies;
434                                 gpuBodies.copyToHost(hostBodies);
435                                 b3AlignedObjectArray<b3InertiaData> hostInertias;
436                                 gpuInertias.copyToHost(hostInertias);
437                                 b3AlignedObjectArray<b3Contact4> hostContacts;
438                                 gpuContacts.copyToHost(hostContacts);
439                                 {
440                                         //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
441                                 }
442                                 gpuBodies.copyFromHost(hostBodies);
443                         }
444                 }
445                 else
446 #endif  //TEST_OTHER_GPU_SOLVER
447                 {
448                         int static0Index = m_data->m_narrowphase->getStatic0Index();
449                         m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index);
450
451                         //m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL());
452
453                         /*m_data->m_solver3->solveContactConstraintHost(
454                         (b3OpenCLArray<RigidBodyBase::Body>*)&gpuBodies,
455                         (b3OpenCLArray<RigidBodyBase::Inertia>*)&gpuInertias,
456                         (b3OpenCLArray<Constraint4>*) &gpuContacts,
457                         0,numContacts,256);
458                         */
459                 }
460         }
461
462         integrate(deltaTime);
463 }
464
465 void b3GpuRigidBodyPipeline::integrate(float timeStep)
466 {
467         //integrate
468         int numBodies = m_data->m_narrowphase->getNumRigidBodies();
469         float angularDamp = 0.99f;
470
471         if (gIntegrateOnCpu)
472         {
473                 if (numBodies)
474                 {
475                         b3GpuNarrowPhaseInternalData* npData = m_data->m_narrowphase->getInternalData();
476                         npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU);
477
478                         b3RigidBodyData_t* bodies = &npData->m_bodyBufferCPU->at(0);
479
480                         for (int nodeID = 0; nodeID < numBodies; nodeID++)
481                         {
482                                 integrateSingleTransform(bodies, nodeID, timeStep, angularDamp, m_data->m_gravity);
483                         }
484                         npData->m_bodyBufferGPU->copyFromHost(*npData->m_bodyBufferCPU);
485                 }
486         }
487         else
488         {
489                 b3LauncherCL launcher(m_data->m_queue, m_data->m_integrateTransformsKernel, "m_integrateTransformsKernel");
490                 launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu());
491
492                 launcher.setConst(numBodies);
493                 launcher.setConst(timeStep);
494                 launcher.setConst(angularDamp);
495                 launcher.setConst(m_data->m_gravity);
496                 launcher.launch1D(numBodies);
497         }
498 }
499
500 void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
501 {
502         cl_int ciErrNum = 0;
503
504         int numBodies = m_data->m_narrowphase->getNumRigidBodies();
505         if (!numBodies)
506                 return;
507
508         if (gCalcWorldSpaceAabbOnCpu)
509         {
510                 if (numBodies)
511                 {
512                         if (gUseDbvt)
513                         {
514                                 m_data->m_allAabbsCPU.resize(numBodies);
515                                 m_data->m_narrowphase->readbackAllBodiesToCpu();
516                                 for (int i = 0; i < numBodies; i++)
517                                 {
518                                         b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_allAabbsCPU[0]);
519                                 }
520                                 m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
521                         }
522                         else
523                         {
524                                 m_data->m_broadphaseSap->getAllAabbsCPU().resize(numBodies);
525                                 m_data->m_narrowphase->readbackAllBodiesToCpu();
526                                 for (int i = 0; i < numBodies; i++)
527                                 {
528                                         b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_broadphaseSap->getAllAabbsCPU()[0]);
529                                 }
530                                 m_data->m_broadphaseSap->getAllAabbsGPU().copyFromHost(m_data->m_broadphaseSap->getAllAabbsCPU());
531                                 //m_data->m_broadphaseSap->writeAabbsToGpu();
532                         }
533                 }
534         }
535         else
536         {
537                 //__kernel void initializeGpuAabbsFull(  const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB)
538                 b3LauncherCL launcher(m_data->m_queue, m_data->m_updateAabbsKernel, "m_updateAabbsKernel");
539                 launcher.setConst(numBodies);
540                 cl_mem bodies = m_data->m_narrowphase->getBodiesGpu();
541                 launcher.setBuffer(bodies);
542                 cl_mem collidables = m_data->m_narrowphase->getCollidablesGpu();
543                 launcher.setBuffer(collidables);
544                 cl_mem localAabbs = m_data->m_narrowphase->getAabbLocalSpaceBufferGpu();
545                 launcher.setBuffer(localAabbs);
546
547                 cl_mem worldAabbs = 0;
548                 if (gUseDbvt)
549                 {
550                         worldAabbs = m_data->m_allAabbsGPU->getBufferCL();
551                 }
552                 else
553                 {
554                         worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS();
555                 }
556                 launcher.setBuffer(worldAabbs);
557                 launcher.launch1D(numBodies);
558
559                 oclCHECKERROR(ciErrNum, CL_SUCCESS);
560         }
561
562         /*
563         b3AlignedObjectArray<b3SapAabb> aabbs;
564         m_data->m_broadphaseSap->m_allAabbsGPU.copyToHost(aabbs);
565
566         printf("numAabbs = %d\n",  aabbs.size());
567
568         for (int i=0;i<aabbs.size();i++)
569         {
570                 printf("aabb[%d].m_min=%f,%f,%f,%d\n",i,aabbs[i].m_minVec[0],aabbs[i].m_minVec[1],aabbs[i].m_minVec[2],aabbs[i].m_minIndices[3]);
571                 printf("aabb[%d].m_max=%f,%f,%f,%d\n",i,aabbs[i].m_maxVec[0],aabbs[i].m_maxVec[1],aabbs[i].m_maxVec[2],aabbs[i].m_signedMaxIndices[3]);
572
573         };
574         */
575 }
576
577 cl_mem b3GpuRigidBodyPipeline::getBodyBuffer()
578 {
579         return m_data->m_narrowphase->getBodiesGpu();
580 }
581
582 int b3GpuRigidBodyPipeline::getNumBodies() const
583 {
584         return m_data->m_narrowphase->getNumRigidBodies();
585 }
586
587 void b3GpuRigidBodyPipeline::setGravity(const float* grav)
588 {
589         m_data->m_gravity.setValue(grav[0], grav[1], grav[2]);
590 }
591
592 void b3GpuRigidBodyPipeline::copyConstraintsToHost()
593 {
594         m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints);
595 }
596
597 void b3GpuRigidBodyPipeline::writeAllInstancesToGpu()
598 {
599         m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
600         m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints);
601 }
602
603 int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex, bool writeInstanceToGpu)
604 {
605         b3Vector3 aabbMin = b3MakeVector3(0, 0, 0), aabbMax = b3MakeVector3(0, 0, 0);
606
607         if (collidableIndex >= 0)
608         {
609                 b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex);
610                 b3Vector3 localAabbMin = b3MakeVector3(localAabb.m_min[0], localAabb.m_min[1], localAabb.m_min[2]);
611                 b3Vector3 localAabbMax = b3MakeVector3(localAabb.m_max[0], localAabb.m_max[1], localAabb.m_max[2]);
612
613                 b3Scalar margin = 0.01f;
614                 b3Transform t;
615                 t.setIdentity();
616                 t.setOrigin(b3MakeVector3(position[0], position[1], position[2]));
617                 t.setRotation(b3Quaternion(orientation[0], orientation[1], orientation[2], orientation[3]));
618                 b3TransformAabb(localAabbMin, localAabbMax, margin, t, aabbMin, aabbMax);
619         }
620         else
621         {
622                 b3Error("registerPhysicsInstance using invalid collidableIndex\n");
623                 return -1;
624         }
625
626         bool writeToGpu = false;
627         int bodyIndex = m_data->m_narrowphase->getNumRigidBodies();
628         bodyIndex = m_data->m_narrowphase->registerRigidBody(collidableIndex, mass, position, orientation, &aabbMin.getX(), &aabbMax.getX(), writeToGpu);
629
630         if (bodyIndex >= 0)
631         {
632                 if (gUseDbvt)
633                 {
634                         m_data->m_broadphaseDbvt->createProxy(aabbMin, aabbMax, bodyIndex, 0, 1, 1);
635                         b3SapAabb aabb;
636                         for (int i = 0; i < 3; i++)
637                         {
638                                 aabb.m_min[i] = aabbMin[i];
639                                 aabb.m_max[i] = aabbMax[i];
640                                 aabb.m_minIndices[3] = bodyIndex;
641                         }
642                         m_data->m_allAabbsCPU.push_back(aabb);
643                         if (writeInstanceToGpu)
644                         {
645                                 m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
646                         }
647                 }
648                 else
649                 {
650                         if (mass)
651                         {
652                                 m_data->m_broadphaseSap->createProxy(aabbMin, aabbMax, bodyIndex, 1, 1);  //m_dispatcher);
653                         }
654                         else
655                         {
656                                 m_data->m_broadphaseSap->createLargeProxy(aabbMin, aabbMax, bodyIndex, 1, 1);  //m_dispatcher);
657                         }
658                 }
659         }
660
661         /*
662         if (mass>0.f)
663                 m_numDynamicPhysicsInstances++;
664
665         m_numPhysicsInstances++;
666         */
667
668         return bodyIndex;
669 }
670
671 void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults)
672 {
673         this->m_data->m_raycaster->castRays(rays, hitResults,
674                                                                                 getNumBodies(), this->m_data->m_narrowphase->getBodiesCpu(),
675                                                                                 m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(),
676                                                                                 m_data->m_narrowphase->getInternalData(), m_data->m_broadphaseSap);
677 }