2 #include "b3GpuJacobiContactSolver.h"
3 #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
4 #include "Bullet3Common/b3AlignedObjectArray.h"
5 #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2
7 #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
8 #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
9 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
10 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
11 #include "Bullet3OpenCL/RigidBody/kernels/solverUtils.h"
12 #include "Bullet3Common/b3Logging.h"
13 #include "b3GpuConstraint4.h"
14 #include "Bullet3Common/shared/b3Int2.h"
15 #include "Bullet3Common/shared/b3Int4.h"
16 #define SOLVER_UTILS_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl"
18 struct b3GpuJacobiSolverInternalData
20 //btRadixSort32CL* m_sort32;
21 //btBoundSearchCL* m_search;
22 b3PrefixScanCL* m_scan;
24 b3OpenCLArray<unsigned int>* m_bodyCount;
25 b3OpenCLArray<b3Int2>* m_contactConstraintOffsets;
26 b3OpenCLArray<unsigned int>* m_offsetSplitBodies;
28 b3OpenCLArray<b3Vector3>* m_deltaLinearVelocities;
29 b3OpenCLArray<b3Vector3>* m_deltaAngularVelocities;
31 b3AlignedObjectArray<b3Vector3> m_deltaLinearVelocitiesCPU;
32 b3AlignedObjectArray<b3Vector3> m_deltaAngularVelocitiesCPU;
34 b3OpenCLArray<b3GpuConstraint4>* m_contactConstraints;
38 cl_kernel m_countBodiesKernel;
39 cl_kernel m_contactToConstraintSplitKernel;
40 cl_kernel m_clearVelocitiesKernel;
41 cl_kernel m_averageVelocitiesKernel;
42 cl_kernel m_updateBodyVelocitiesKernel;
43 cl_kernel m_solveContactKernel;
44 cl_kernel m_solveFrictionKernel;
47 b3GpuJacobiContactSolver::b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity)
52 m_data = new b3GpuJacobiSolverInternalData;
53 m_data->m_scan = new b3PrefixScanCL(m_context, m_device, m_queue);
54 m_data->m_bodyCount = new b3OpenCLArray<unsigned int>(m_context, m_queue);
55 m_data->m_filler = new b3FillCL(m_context, m_device, m_queue);
56 m_data->m_contactConstraintOffsets = new b3OpenCLArray<b3Int2>(m_context, m_queue);
57 m_data->m_offsetSplitBodies = new b3OpenCLArray<unsigned int>(m_context, m_queue);
58 m_data->m_contactConstraints = new b3OpenCLArray<b3GpuConstraint4>(m_context, m_queue);
59 m_data->m_deltaLinearVelocities = new b3OpenCLArray<b3Vector3>(m_context, m_queue);
60 m_data->m_deltaAngularVelocities = new b3OpenCLArray<b3Vector3>(m_context, m_queue);
63 const char* additionalMacros = "";
64 const char* solverUtilsSource = solverUtilsCL;
66 cl_program solverUtilsProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverUtilsSource, &pErrNum, additionalMacros, SOLVER_UTILS_KERNEL_PATH);
67 b3Assert(solverUtilsProg);
68 m_data->m_countBodiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "CountBodiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
69 b3Assert(m_data->m_countBodiesKernel);
71 m_data->m_contactToConstraintSplitKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ContactToConstraintSplitKernel", &pErrNum, solverUtilsProg, additionalMacros);
72 b3Assert(m_data->m_contactToConstraintSplitKernel);
73 m_data->m_clearVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
74 b3Assert(m_data->m_clearVelocitiesKernel);
76 m_data->m_averageVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
77 b3Assert(m_data->m_averageVelocitiesKernel);
79 m_data->m_updateBodyVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "UpdateBodyVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
80 b3Assert(m_data->m_updateBodyVelocitiesKernel);
82 m_data->m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros);
83 b3Assert(m_data->m_solveContactKernel);
85 m_data->m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveFrictionJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros);
86 b3Assert(m_data->m_solveFrictionKernel);
90 b3GpuJacobiContactSolver::~b3GpuJacobiContactSolver()
92 clReleaseKernel(m_data->m_solveContactKernel);
93 clReleaseKernel(m_data->m_solveFrictionKernel);
94 clReleaseKernel(m_data->m_countBodiesKernel);
95 clReleaseKernel(m_data->m_contactToConstraintSplitKernel);
96 clReleaseKernel(m_data->m_averageVelocitiesKernel);
97 clReleaseKernel(m_data->m_updateBodyVelocitiesKernel);
98 clReleaseKernel(m_data->m_clearVelocitiesKernel);
100 delete m_data->m_deltaLinearVelocities;
101 delete m_data->m_deltaAngularVelocities;
102 delete m_data->m_contactConstraints;
103 delete m_data->m_offsetSplitBodies;
104 delete m_data->m_contactConstraintOffsets;
105 delete m_data->m_bodyCount;
106 delete m_data->m_filler;
107 delete m_data->m_scan;
111 b3Vector3 make_float4(float v)
113 return b3MakeVector3(v, v, v);
116 b3Vector4 make_float4(float x, float y, float z, float w)
118 return b3MakeVector4(x, y, z, w);
121 static inline float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1,
122 const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1)
124 return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1);
127 static inline void setLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1,
128 b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1)
131 angular0 = b3Cross(r0, n);
132 angular1 = -b3Cross(r1, n);
135 static __inline void solveContact(b3GpuConstraint4& cs,
136 const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA,
137 const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB,
138 float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB)
140 for (int ic = 0; ic < 4; ic++)
142 // dont necessary because this makes change to 0
143 if (cs.m_jacCoeffInv[ic] == 0.f) continue;
146 b3Vector3 angular0, angular1, linear;
147 b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA;
148 b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB;
149 setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, linear, angular0, angular1);
151 float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1,
152 linVelARO + dLinVelA, angVelARO + dAngVelA, linVelBRO + dLinVelB, angVelBRO + dAngVelB) +
154 rambdaDt *= cs.m_jacCoeffInv[ic];
157 float prevSum = cs.m_appliedRambdaDt[ic];
158 float updated = prevSum;
160 updated = b3Max(updated, minRambdaDt[ic]);
161 updated = b3Min(updated, maxRambdaDt[ic]);
162 rambdaDt = updated - prevSum;
163 cs.m_appliedRambdaDt[ic] = updated;
166 b3Vector3 linImp0 = invMassA * linear * rambdaDt;
167 b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
168 b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
169 b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
171 b3Assert(_finite(linImp0.getX()));
172 b3Assert(_finite(linImp1.getX()));
189 void solveContact3(b3GpuConstraint4* cs,
190 b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
191 b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
192 b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB)
194 float minRambdaDt = 0;
195 float maxRambdaDt = FLT_MAX;
197 for (int ic = 0; ic < 4; ic++)
199 if (cs->m_jacCoeffInv[ic] == 0.f) continue;
201 b3Vector3 angular0, angular1, linear;
202 b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr;
203 b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr;
204 setLinearAndAngular(cs->m_linear, r0, r1, linear, angular0, angular1);
206 float rambdaDt = calcRelVel(cs->m_linear, -cs->m_linear, angular0, angular1,
207 *linVelA + *dLinVelA, *angVelA + *dAngVelA, *linVelB + *dLinVelB, *angVelB + *dAngVelB) +
209 rambdaDt *= cs->m_jacCoeffInv[ic];
212 float prevSum = cs->m_appliedRambdaDt[ic];
213 float updated = prevSum;
215 updated = b3Max(updated, minRambdaDt);
216 updated = b3Min(updated, maxRambdaDt);
217 rambdaDt = updated - prevSum;
218 cs->m_appliedRambdaDt[ic] = updated;
221 b3Vector3 linImp0 = invMassA * linear * rambdaDt;
222 b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
223 b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
224 b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
228 *dLinVelA += linImp0;
229 *dAngVelA += angImp0;
233 *dLinVelB += linImp1;
234 *dAngVelB += angImp1;
239 static inline void solveFriction(b3GpuConstraint4& cs,
240 const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA,
241 const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB,
242 float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB)
244 b3Vector3 linVelA = linVelARO + dLinVelA;
245 b3Vector3 linVelB = linVelBRO + dLinVelB;
246 b3Vector3 angVelA = angVelARO + dAngVelA;
247 b3Vector3 angVelB = angVelBRO + dAngVelB;
249 if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return;
250 const b3Vector3& center = (const b3Vector3&)cs.m_center;
252 b3Vector3 n = -(const b3Vector3&)cs.m_linear;
254 b3Vector3 tangent[2];
256 b3PlaneSpace1(n, tangent[0], tangent[1]);
258 b3Vector3 r = cs.m_worldPos[0] - center;
259 tangent[0] = cross3(n, r);
260 tangent[1] = cross3(tangent[0], n);
261 tangent[0] = normalize3(tangent[0]);
262 tangent[1] = normalize3(tangent[1]);
265 b3Vector3 angular0, angular1, linear;
266 b3Vector3 r0 = center - posA;
267 b3Vector3 r1 = center - posB;
268 for (int i = 0; i < 2; i++)
270 setLinearAndAngular(tangent[i], r0, r1, linear, angular0, angular1);
271 float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
272 linVelA, angVelA, linVelB, angVelB);
273 rambdaDt *= cs.m_fJacCoeffInv[i];
276 float prevSum = cs.m_fAppliedRambdaDt[i];
277 float updated = prevSum;
279 updated = b3Max(updated, minRambdaDt[i]);
280 updated = b3Min(updated, maxRambdaDt[i]);
281 rambdaDt = updated - prevSum;
282 cs.m_fAppliedRambdaDt[i] = updated;
285 b3Vector3 linImp0 = invMassA * linear * rambdaDt;
286 b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
287 b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
288 b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
290 b3Assert(_finite(linImp0.getX()));
291 b3Assert(_finite(linImp1.getX()));
305 { // angular damping for point constraint
306 b3Vector3 ab = (posB - posA).normalized();
307 b3Vector3 ac = (center - posA).normalized();
308 if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
310 float angNA = b3Dot(n, angVelA);
311 float angNB = b3Dot(n, angVelB);
314 dAngVelA -= (angNA * 0.1f) * n;
316 dAngVelB -= (angNB * 0.1f) * n;
321 float calcJacCoeff(const b3Vector3& linear0, const b3Vector3& linear1, const b3Vector3& angular0, const b3Vector3& angular1,
322 float invMass0, const b3Matrix3x3* invInertia0, float invMass1, const b3Matrix3x3* invInertia1, float countA, float countB)
324 // linear0,1 are normlized
325 float jmj0 = invMass0; //dot3F4(linear0, linear0)*invMass0;
327 float jmj1 = b3Dot(mtMul3(angular0, *invInertia0), angular0);
328 float jmj2 = invMass1; //dot3F4(linear1, linear1)*invMass1;
329 float jmj3 = b3Dot(mtMul3(angular1, *invInertia1), angular1);
330 return -1.f / ((jmj0 + jmj1) * countA + (jmj2 + jmj3) * countB);
331 // return -1.f/((jmj0+jmj1)+(jmj2+jmj3));
334 void setConstraint4(const b3Vector3& posA, const b3Vector3& linVelA, const b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
335 const b3Vector3& posB, const b3Vector3& linVelB, const b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
336 b3Contact4* src, float dt, float positionDrift, float positionConstraintCoeff, float countA, float countB,
337 b3GpuConstraint4* dstC)
339 dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
340 dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
342 float dtInv = 1.f / dt;
343 for (int ic = 0; ic < 4; ic++)
345 dstC->m_appliedRambdaDt[ic] = 0.f;
347 dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
349 dstC->m_linear = src->m_worldNormalOnB;
350 dstC->m_linear[3] = 0.7f; //src->getFrictionCoeff() );
351 for (int ic = 0; ic < 4; ic++)
353 b3Vector3 r0 = src->m_worldPosB[ic] - posA;
354 b3Vector3 r1 = src->m_worldPosB[ic] - posB;
356 if (ic >= src->m_worldNormalOnB[3]) //npoints
358 dstC->m_jacCoeffInv[ic] = 0.f;
364 b3Vector3 linear, angular0, angular1;
365 setLinearAndAngular(src->m_worldNormalOnB, r0, r1, linear, angular0, angular1);
367 dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
368 invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB);
370 relVelN = calcRelVel(linear, -linear, angular0, angular1,
371 linVelA, angVelA, linVelB, angVelB);
373 float e = 0.f; //src->getRestituitionCoeff();
374 if (relVelN * relVelN < 0.004f)
379 dstC->m_b[ic] = e * relVelN;
380 //float penetration = src->m_worldPos[ic].w;
381 dstC->m_b[ic] += (src->m_worldPosB[ic][3] + positionDrift) * positionConstraintCoeff * dtInv;
382 dstC->m_appliedRambdaDt[ic] = 0.f;
386 if (src->m_worldNormalOnB[3] > 0) //npoints
387 { // prepare friction
388 b3Vector3 center = make_float4(0.f);
389 for (int i = 0; i < src->m_worldNormalOnB[3]; i++)
390 center += src->m_worldPosB[i];
391 center /= (float)src->m_worldNormalOnB[3];
393 b3Vector3 tangent[2];
394 b3PlaneSpace1(src->m_worldNormalOnB, tangent[0], tangent[1]);
397 r[0] = center - posA;
398 r[1] = center - posB;
400 for (int i = 0; i < 2; i++)
402 b3Vector3 linear, angular0, angular1;
403 setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1);
405 dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
406 invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB);
407 dstC->m_fAppliedRambdaDt[i] = 0.f;
409 dstC->m_center = center;
412 for (int i = 0; i < 4; i++)
414 if (i < src->m_worldNormalOnB[3])
416 dstC->m_worldPos[i] = src->m_worldPosB[i];
420 dstC->m_worldPos[i] = make_float4(0.f);
425 void ContactToConstraintKernel(b3Contact4* gContact, b3RigidBodyData* gBodies, b3InertiaData* gShapes, b3GpuConstraint4* gConstraintOut, int nContacts,
428 float positionConstraintCoeff, int gIdx, b3AlignedObjectArray<unsigned int>& bodyCount)
430 //int gIdx = 0;//GET_GLOBAL_IDX;
432 if (gIdx < nContacts)
434 int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);
435 int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
437 b3Vector3 posA = gBodies[aIdx].m_pos;
438 b3Vector3 linVelA = gBodies[aIdx].m_linVel;
439 b3Vector3 angVelA = gBodies[aIdx].m_angVel;
440 float invMassA = gBodies[aIdx].m_invMass;
441 b3Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertiaWorld; //.m_invInertia;
443 b3Vector3 posB = gBodies[bIdx].m_pos;
444 b3Vector3 linVelB = gBodies[bIdx].m_linVel;
445 b3Vector3 angVelB = gBodies[bIdx].m_angVel;
446 float invMassB = gBodies[bIdx].m_invMass;
447 b3Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld; //m_invInertia;
450 float countA = invMassA ? (float)(bodyCount[aIdx]) : 1;
451 float countB = invMassB ? (float)(bodyCount[bIdx]) : 1;
452 setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
453 &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, countA, countB,
456 cs.m_batchIdx = gContact[gIdx].m_batchIdx;
458 gConstraintOut[gIdx] = cs;
462 void b3GpuJacobiContactSolver::solveGroupHost(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, const b3JacobiSolverInfo& solverInfo)
464 B3_PROFILE("b3GpuJacobiContactSolver::solveGroup");
466 b3AlignedObjectArray<unsigned int> bodyCount;
467 bodyCount.resize(numBodies);
468 for (int i = 0; i < numBodies; i++)
471 b3AlignedObjectArray<b3Int2> contactConstraintOffsets;
472 contactConstraintOffsets.resize(numManifolds);
474 for (int i = 0; i < numManifolds; i++)
476 int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;
477 int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;
479 bool isFixedA = (pa < 0) || (pa == solverInfo.m_fixedBodyIndex);
480 bool isFixedB = (pb < 0) || (pb == solverInfo.m_fixedBodyIndex);
482 int bodyIndexA = manifoldPtr[i].getBodyA();
483 int bodyIndexB = manifoldPtr[i].getBodyB();
487 contactConstraintOffsets[i].x = bodyCount[bodyIndexA];
488 bodyCount[bodyIndexA]++;
492 contactConstraintOffsets[i].y = bodyCount[bodyIndexB];
493 bodyCount[bodyIndexB]++;
497 b3AlignedObjectArray<unsigned int> offsetSplitBodies;
498 offsetSplitBodies.resize(numBodies);
499 unsigned int totalNumSplitBodies;
500 m_data->m_scan->executeHost(bodyCount, offsetSplitBodies, numBodies, &totalNumSplitBodies);
501 int numlastBody = bodyCount[numBodies - 1];
502 totalNumSplitBodies += numlastBody;
503 printf("totalNumSplitBodies = %d\n", totalNumSplitBodies);
505 b3AlignedObjectArray<b3GpuConstraint4> contactConstraints;
506 contactConstraints.resize(numManifolds);
508 for (int i = 0; i < numManifolds; i++)
510 ContactToConstraintKernel(&manifoldPtr[0], bodies, inertias, &contactConstraints[0], numManifolds,
511 solverInfo.m_deltaTime,
512 solverInfo.m_positionDrift,
513 solverInfo.m_positionConstraintCoeff,
516 int maxIter = solverInfo.m_numIterations;
518 b3AlignedObjectArray<b3Vector3> deltaLinearVelocities;
519 b3AlignedObjectArray<b3Vector3> deltaAngularVelocities;
520 deltaLinearVelocities.resize(totalNumSplitBodies);
521 deltaAngularVelocities.resize(totalNumSplitBodies);
522 for (unsigned int i = 0; i < totalNumSplitBodies; i++)
524 deltaLinearVelocities[i].setZero();
525 deltaAngularVelocities[i].setZero();
528 for (int iter = 0; iter < maxIter; iter++)
531 for (i = 0; i < numManifolds; i++)
533 //float frictionCoeff = contactConstraints[i].getFrictionCoeff();
534 int aIdx = (int)contactConstraints[i].m_bodyA;
535 int bIdx = (int)contactConstraints[i].m_bodyB;
536 b3RigidBodyData& bodyA = bodies[aIdx];
537 b3RigidBodyData& bodyB = bodies[bIdx];
539 b3Vector3 zero = b3MakeVector3(0, 0, 0);
541 b3Vector3* dlvAPtr = &zero;
542 b3Vector3* davAPtr = &zero;
543 b3Vector3* dlvBPtr = &zero;
544 b3Vector3* davBPtr = &zero;
548 int bodyOffsetA = offsetSplitBodies[aIdx];
549 int constraintOffsetA = contactConstraintOffsets[i].x;
550 int splitIndexA = bodyOffsetA + constraintOffsetA;
551 dlvAPtr = &deltaLinearVelocities[splitIndexA];
552 davAPtr = &deltaAngularVelocities[splitIndexA];
557 int bodyOffsetB = offsetSplitBodies[bIdx];
558 int constraintOffsetB = contactConstraintOffsets[i].y;
559 int splitIndexB = bodyOffsetB + constraintOffsetB;
560 dlvBPtr = &deltaLinearVelocities[splitIndexB];
561 davBPtr = &deltaAngularVelocities[splitIndexB];
565 float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
566 float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
568 solveContact(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld,
569 (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld,
570 maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr);
575 for (int i = 0; i < numBodies; i++)
577 if (bodies[i].m_invMass)
579 int bodyOffset = offsetSplitBodies[i];
580 int count = bodyCount[i];
581 float factor = 1.f / float(count);
582 b3Vector3 averageLinVel;
583 averageLinVel.setZero();
584 b3Vector3 averageAngVel;
585 averageAngVel.setZero();
586 for (int j = 0; j < count; j++)
588 averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor;
589 averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor;
591 for (int j = 0; j < count; j++)
593 deltaLinearVelocities[bodyOffset + j] = averageLinVel;
594 deltaAngularVelocities[bodyOffset + j] = averageAngVel;
599 for (int iter = 0; iter < maxIter; iter++)
605 for (int i = 0; i < numManifolds; i++)
607 float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
608 float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
611 for (int j = 0; j < 4; j++)
613 sum += contactConstraints[i].m_appliedRambdaDt[j];
615 float frictionCoeff = contactConstraints[i].getFrictionCoeff();
616 int aIdx = (int)contactConstraints[i].m_bodyA;
617 int bIdx = (int)contactConstraints[i].m_bodyB;
618 b3RigidBodyData& bodyA = bodies[aIdx];
619 b3RigidBodyData& bodyB = bodies[bIdx];
621 b3Vector3 zero = b3MakeVector3(0, 0, 0);
623 b3Vector3* dlvAPtr = &zero;
624 b3Vector3* davAPtr = &zero;
625 b3Vector3* dlvBPtr = &zero;
626 b3Vector3* davBPtr = &zero;
630 int bodyOffsetA = offsetSplitBodies[aIdx];
631 int constraintOffsetA = contactConstraintOffsets[i].x;
632 int splitIndexA = bodyOffsetA + constraintOffsetA;
633 dlvAPtr = &deltaLinearVelocities[splitIndexA];
634 davAPtr = &deltaAngularVelocities[splitIndexA];
639 int bodyOffsetB = offsetSplitBodies[bIdx];
640 int constraintOffsetB = contactConstraintOffsets[i].y;
641 int splitIndexB = bodyOffsetB + constraintOffsetB;
642 dlvBPtr = &deltaLinearVelocities[splitIndexB];
643 davBPtr = &deltaAngularVelocities[splitIndexB];
646 for (int j = 0; j < 4; j++)
648 maxRambdaDt[j] = frictionCoeff * sum;
649 minRambdaDt[j] = -maxRambdaDt[j];
652 solveFriction(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld,
653 (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld,
654 maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr);
658 for (int i = 0; i < numBodies; i++)
660 if (bodies[i].m_invMass)
662 int bodyOffset = offsetSplitBodies[i];
663 int count = bodyCount[i];
664 float factor = 1.f / float(count);
665 b3Vector3 averageLinVel;
666 averageLinVel.setZero();
667 b3Vector3 averageAngVel;
668 averageAngVel.setZero();
669 for (int j = 0; j < count; j++)
671 averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor;
672 averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor;
674 for (int j = 0; j < count; j++)
676 deltaLinearVelocities[bodyOffset + j] = averageLinVel;
677 deltaAngularVelocities[bodyOffset + j] = averageAngVel;
684 for (int i = 0; i < numBodies; i++)
686 if (bodies[i].m_invMass)
688 int bodyOffset = offsetSplitBodies[i];
689 int count = bodyCount[i];
692 bodies[i].m_linVel += deltaLinearVelocities[bodyOffset];
693 bodies[i].m_angVel += deltaAngularVelocities[bodyOffset];
699 void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index)
702 //void b3GpuJacobiContactSolver::solveGroup(b3OpenCLArray<b3RigidBodyData>* bodies,b3OpenCLArray<b3InertiaData>* inertias,b3OpenCLArray<b3Contact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo)
704 b3JacobiSolverInfo solverInfo;
705 solverInfo.m_fixedBodyIndex = static0Index;
707 B3_PROFILE("b3GpuJacobiContactSolver::solveGroup");
709 //int numBodies = bodies->size();
710 int numManifolds = numContacts; //manifoldPtr->size();
713 B3_PROFILE("resize");
714 m_data->m_bodyCount->resize(numBodies);
717 unsigned int val = 0;
723 B3_PROFILE("m_filler");
724 m_data->m_contactConstraintOffsets->resize(numManifolds);
725 m_data->m_filler->execute(*m_data->m_bodyCount, val, numBodies);
727 m_data->m_filler->execute(*m_data->m_contactConstraintOffsets, val2, numManifolds);
731 B3_PROFILE("m_countBodiesKernel");
732 b3LauncherCL launcher(this->m_queue, m_data->m_countBodiesKernel, "m_countBodiesKernel");
733 launcher.setBuffer(contactBuf); //manifoldPtr->getBufferCL());
734 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
735 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
736 launcher.setConst(numManifolds);
737 launcher.setConst(solverInfo.m_fixedBodyIndex);
738 launcher.launch1D(numManifolds);
740 unsigned int totalNumSplitBodies = 0;
742 B3_PROFILE("m_scan->execute");
744 m_data->m_offsetSplitBodies->resize(numBodies);
745 m_data->m_scan->execute(*m_data->m_bodyCount, *m_data->m_offsetSplitBodies, numBodies, &totalNumSplitBodies);
746 totalNumSplitBodies += m_data->m_bodyCount->at(numBodies - 1);
750 B3_PROFILE("m_data->m_contactConstraints->resize");
751 //int numContacts = manifoldPtr->size();
752 m_data->m_contactConstraints->resize(numContacts);
756 B3_PROFILE("contactToConstraintSplitKernel");
757 b3LauncherCL launcher(m_queue, m_data->m_contactToConstraintSplitKernel, "m_contactToConstraintSplitKernel");
758 launcher.setBuffer(contactBuf);
759 launcher.setBuffer(bodyBuf);
760 launcher.setBuffer(inertiaBuf);
761 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
762 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
763 launcher.setConst(numContacts);
764 launcher.setConst(solverInfo.m_deltaTime);
765 launcher.setConst(solverInfo.m_positionDrift);
766 launcher.setConst(solverInfo.m_positionConstraintCoeff);
767 launcher.launch1D(numContacts, 64);
771 B3_PROFILE("m_data->m_deltaLinearVelocities->resize");
772 m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies);
773 m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies);
777 B3_PROFILE("m_clearVelocitiesKernel");
778 b3LauncherCL launch(m_queue, m_data->m_clearVelocitiesKernel, "m_clearVelocitiesKernel");
779 launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
780 launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
781 launch.setConst(totalNumSplitBodies);
782 launch.launch1D(totalNumSplitBodies);
786 int maxIter = solverInfo.m_numIterations;
788 for (int iter = 0; iter < maxIter; iter++)
791 B3_PROFILE("m_solveContactKernel");
792 b3LauncherCL launcher(m_queue, m_data->m_solveContactKernel, "m_solveContactKernel");
793 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
794 launcher.setBuffer(bodyBuf);
795 launcher.setBuffer(inertiaBuf);
796 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
797 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
798 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
799 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
800 launcher.setConst(solverInfo.m_deltaTime);
801 launcher.setConst(solverInfo.m_positionDrift);
802 launcher.setConst(solverInfo.m_positionConstraintCoeff);
803 launcher.setConst(solverInfo.m_fixedBodyIndex);
804 launcher.setConst(numManifolds);
806 launcher.launch1D(numManifolds);
811 B3_PROFILE("average velocities");
812 b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel");
813 launcher.setBuffer(bodyBuf);
814 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
815 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
816 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
817 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
818 launcher.setConst(numBodies);
819 launcher.launch1D(numBodies);
824 B3_PROFILE("m_solveFrictionKernel");
825 b3LauncherCL launcher(m_queue, m_data->m_solveFrictionKernel, "m_solveFrictionKernel");
826 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
827 launcher.setBuffer(bodyBuf);
828 launcher.setBuffer(inertiaBuf);
829 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
830 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
831 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
832 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
833 launcher.setConst(solverInfo.m_deltaTime);
834 launcher.setConst(solverInfo.m_positionDrift);
835 launcher.setConst(solverInfo.m_positionConstraintCoeff);
836 launcher.setConst(solverInfo.m_fixedBodyIndex);
837 launcher.setConst(numManifolds);
839 launcher.launch1D(numManifolds);
844 B3_PROFILE("average velocities");
845 b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel");
846 launcher.setBuffer(bodyBuf);
847 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
848 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
849 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
850 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
851 launcher.setConst(numBodies);
852 launcher.launch1D(numBodies);
858 B3_PROFILE("update body velocities");
859 b3LauncherCL launcher(m_queue, m_data->m_updateBodyVelocitiesKernel, "m_updateBodyVelocitiesKernel");
860 launcher.setBuffer(bodyBuf);
861 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
862 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
863 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
864 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
865 launcher.setConst(numBodies);
866 launcher.launch1D(numBodies);
873 void b3GpuJacobiContactSolver::solveGroupMixed(b3OpenCLArray<b3RigidBodyData>* bodiesGPU,b3OpenCLArray<b3InertiaData>* inertiasGPU,b3OpenCLArray<b3Contact4>* manifoldPtrGPU,const btJacobiSolverInfo& solverInfo)
876 b3AlignedObjectArray<b3RigidBodyData> bodiesCPU;
877 bodiesGPU->copyToHost(bodiesCPU);
878 b3AlignedObjectArray<b3InertiaData> inertiasCPU;
879 inertiasGPU->copyToHost(inertiasCPU);
880 b3AlignedObjectArray<b3Contact4> manifoldPtrCPU;
881 manifoldPtrGPU->copyToHost(manifoldPtrCPU);
883 int numBodiesCPU = bodiesGPU->size();
884 int numManifoldsCPU = manifoldPtrGPU->size();
885 B3_PROFILE("b3GpuJacobiContactSolver::solveGroupMixed");
887 b3AlignedObjectArray<unsigned int> bodyCount;
888 bodyCount.resize(numBodiesCPU);
889 for (int i=0;i<numBodiesCPU;i++)
892 b3AlignedObjectArray<b3Int2> contactConstraintOffsets;
893 contactConstraintOffsets.resize(numManifoldsCPU);
896 for (int i=0;i<numManifoldsCPU;i++)
898 int pa = manifoldPtrCPU[i].m_bodyAPtrAndSignBit;
899 int pb = manifoldPtrCPU[i].m_bodyBPtrAndSignBit;
901 bool isFixedA = (pa <0) || (pa == solverInfo.m_fixedBodyIndex);
902 bool isFixedB = (pb <0) || (pb == solverInfo.m_fixedBodyIndex);
904 int bodyIndexA = manifoldPtrCPU[i].getBodyA();
905 int bodyIndexB = manifoldPtrCPU[i].getBodyB();
909 contactConstraintOffsets[i].x = bodyCount[bodyIndexA];
910 bodyCount[bodyIndexA]++;
914 contactConstraintOffsets[i].y = bodyCount[bodyIndexB];
915 bodyCount[bodyIndexB]++;
919 b3AlignedObjectArray<unsigned int> offsetSplitBodies;
920 offsetSplitBodies.resize(numBodiesCPU);
921 unsigned int totalNumSplitBodiesCPU;
922 m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodiesCPU,&totalNumSplitBodiesCPU);
923 int numlastBody = bodyCount[numBodiesCPU-1];
924 totalNumSplitBodiesCPU += numlastBody;
926 int numBodies = bodiesGPU->size();
927 int numManifolds = manifoldPtrGPU->size();
929 m_data->m_bodyCount->resize(numBodies);
937 B3_PROFILE("m_filler");
938 m_data->m_contactConstraintOffsets->resize(numManifolds);
939 m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies);
942 m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds);
946 B3_PROFILE("m_countBodiesKernel");
947 b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel);
948 launcher.setBuffer(manifoldPtrGPU->getBufferCL());
949 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
950 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
951 launcher.setConst(numManifolds);
952 launcher.setConst(solverInfo.m_fixedBodyIndex);
953 launcher.launch1D(numManifolds);
956 unsigned int totalNumSplitBodies=0;
957 m_data->m_offsetSplitBodies->resize(numBodies);
958 m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies);
959 totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1);
961 if (totalNumSplitBodies != totalNumSplitBodiesCPU)
963 printf("error in totalNumSplitBodies!\n");
966 int numContacts = manifoldPtrGPU->size();
967 m_data->m_contactConstraints->resize(numContacts);
971 B3_PROFILE("contactToConstraintSplitKernel");
972 b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel);
973 launcher.setBuffer(manifoldPtrGPU->getBufferCL());
974 launcher.setBuffer(bodiesGPU->getBufferCL());
975 launcher.setBuffer(inertiasGPU->getBufferCL());
976 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
977 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
978 launcher.setConst(numContacts);
979 launcher.setConst(solverInfo.m_deltaTime);
980 launcher.setConst(solverInfo.m_positionDrift);
981 launcher.setConst(solverInfo.m_positionConstraintCoeff);
982 launcher.launch1D( numContacts, 64 );
988 b3AlignedObjectArray<b3GpuConstraint4> contactConstraints;
989 contactConstraints.resize(numManifoldsCPU);
991 for (int i=0;i<numManifoldsCPU;i++)
993 ContactToConstraintKernel(&manifoldPtrCPU[0],&bodiesCPU[0],&inertiasCPU[0],&contactConstraints[0],numManifoldsCPU,
994 solverInfo.m_deltaTime,
995 solverInfo.m_positionDrift,
996 solverInfo.m_positionConstraintCoeff,
999 int maxIter = solverInfo.m_numIterations;
1002 b3AlignedObjectArray<b3Vector3> deltaLinearVelocities;
1003 b3AlignedObjectArray<b3Vector3> deltaAngularVelocities;
1004 deltaLinearVelocities.resize(totalNumSplitBodiesCPU);
1005 deltaAngularVelocities.resize(totalNumSplitBodiesCPU);
1006 for (int i=0;i<totalNumSplitBodiesCPU;i++)
1008 deltaLinearVelocities[i].setZero();
1009 deltaAngularVelocities[i].setZero();
1012 m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies);
1013 m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies);
1018 B3_PROFILE("m_clearVelocitiesKernel");
1019 b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel);
1020 launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1021 launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1022 launch.setConst(totalNumSplitBodies);
1023 launch.launch1D(totalNumSplitBodies);
1027 ///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1030 m_data->m_contactConstraints->copyToHost(contactConstraints);
1031 m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies);
1032 m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets);
1033 m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
1034 m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
1036 for (int iter = 0;iter<maxIter;iter++)
1040 B3_PROFILE("m_solveContactKernel");
1041 b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel );
1042 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
1043 launcher.setBuffer(bodiesGPU->getBufferCL());
1044 launcher.setBuffer(inertiasGPU->getBufferCL());
1045 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
1046 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1047 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1048 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1049 launcher.setConst(solverInfo.m_deltaTime);
1050 launcher.setConst(solverInfo.m_positionDrift);
1051 launcher.setConst(solverInfo.m_positionConstraintCoeff);
1052 launcher.setConst(solverInfo.m_fixedBodyIndex);
1053 launcher.setConst(numManifolds);
1055 launcher.launch1D(numManifolds);
1061 for( i=0; i<numManifoldsCPU; i++)
1064 float frictionCoeff = contactConstraints[i].getFrictionCoeff();
1065 int aIdx = (int)contactConstraints[i].m_bodyA;
1066 int bIdx = (int)contactConstraints[i].m_bodyB;
1067 b3RigidBodyData& bodyA = bodiesCPU[aIdx];
1068 b3RigidBodyData& bodyB = bodiesCPU[bIdx];
1070 b3Vector3 zero(0,0,0);
1072 b3Vector3* dlvAPtr=&zero;
1073 b3Vector3* davAPtr=&zero;
1074 b3Vector3* dlvBPtr=&zero;
1075 b3Vector3* davBPtr=&zero;
1077 if (bodyA.m_invMass)
1079 int bodyOffsetA = offsetSplitBodies[aIdx];
1080 int constraintOffsetA = contactConstraintOffsets[i].x;
1081 int splitIndexA = bodyOffsetA+constraintOffsetA;
1082 dlvAPtr = &deltaLinearVelocities[splitIndexA];
1083 davAPtr = &deltaAngularVelocities[splitIndexA];
1086 if (bodyB.m_invMass)
1088 int bodyOffsetB = offsetSplitBodies[bIdx];
1089 int constraintOffsetB = contactConstraintOffsets[i].y;
1090 int splitIndexB= bodyOffsetB+constraintOffsetB;
1091 dlvBPtr =&deltaLinearVelocities[splitIndexB];
1092 davBPtr = &deltaAngularVelocities[splitIndexB];
1098 float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
1099 float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
1101 solveContact( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertiasCPU[aIdx].m_invInertiaWorld,
1102 (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
1103 maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr );
1111 B3_PROFILE("average velocities");
1112 b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
1113 launcher.setBuffer(bodiesGPU->getBufferCL());
1114 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1115 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
1116 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1117 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1118 launcher.setConst(numBodies);
1119 launcher.launch1D(numBodies);
1124 for (int i=0;i<numBodiesCPU;i++)
1126 if (bodiesCPU[i].m_invMass)
1128 int bodyOffset = offsetSplitBodies[i];
1129 int count = bodyCount[i];
1130 float factor = 1.f/float(count);
1131 b3Vector3 averageLinVel;
1132 averageLinVel.setZero();
1133 b3Vector3 averageAngVel;
1134 averageAngVel.setZero();
1135 for (int j=0;j<count;j++)
1137 averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
1138 averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
1140 for (int j=0;j<count;j++)
1142 deltaLinearVelocities[bodyOffset+j] = averageLinVel;
1143 deltaAngularVelocities[bodyOffset+j] = averageAngVel;
1147 // m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities);
1148 //m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities);
1149 m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
1150 m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
1155 B3_PROFILE("m_solveFrictionKernel");
1156 b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel);
1157 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
1158 launcher.setBuffer(bodiesGPU->getBufferCL());
1159 launcher.setBuffer(inertiasGPU->getBufferCL());
1160 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
1161 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1162 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1163 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1164 launcher.setConst(solverInfo.m_deltaTime);
1165 launcher.setConst(solverInfo.m_positionDrift);
1166 launcher.setConst(solverInfo.m_positionConstraintCoeff);
1167 launcher.setConst(solverInfo.m_fixedBodyIndex);
1168 launcher.setConst(numManifolds);
1170 launcher.launch1D(numManifolds);
1176 for(int i=0; i<numManifoldsCPU; i++)
1178 float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
1179 float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
1182 for(int j=0; j<4; j++)
1184 sum +=contactConstraints[i].m_appliedRambdaDt[j];
1186 float frictionCoeff = contactConstraints[i].getFrictionCoeff();
1187 int aIdx = (int)contactConstraints[i].m_bodyA;
1188 int bIdx = (int)contactConstraints[i].m_bodyB;
1189 b3RigidBodyData& bodyA = bodiesCPU[aIdx];
1190 b3RigidBodyData& bodyB = bodiesCPU[bIdx];
1192 b3Vector3 zero(0,0,0);
1194 b3Vector3* dlvAPtr=&zero;
1195 b3Vector3* davAPtr=&zero;
1196 b3Vector3* dlvBPtr=&zero;
1197 b3Vector3* davBPtr=&zero;
1199 if (bodyA.m_invMass)
1201 int bodyOffsetA = offsetSplitBodies[aIdx];
1202 int constraintOffsetA = contactConstraintOffsets[i].x;
1203 int splitIndexA = bodyOffsetA+constraintOffsetA;
1204 dlvAPtr = &deltaLinearVelocities[splitIndexA];
1205 davAPtr = &deltaAngularVelocities[splitIndexA];
1208 if (bodyB.m_invMass)
1210 int bodyOffsetB = offsetSplitBodies[bIdx];
1211 int constraintOffsetB = contactConstraintOffsets[i].y;
1212 int splitIndexB= bodyOffsetB+constraintOffsetB;
1213 dlvBPtr =&deltaLinearVelocities[splitIndexB];
1214 davBPtr = &deltaAngularVelocities[splitIndexB];
1217 for(int j=0; j<4; j++)
1219 maxRambdaDt[j] = frictionCoeff*sum;
1220 minRambdaDt[j] = -maxRambdaDt[j];
1223 solveFriction( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,inertiasCPU[aIdx].m_invInertiaWorld,
1224 (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
1225 maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr);
1230 B3_PROFILE("average velocities");
1231 b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
1232 launcher.setBuffer(bodiesGPU->getBufferCL());
1233 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1234 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
1235 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1236 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1237 launcher.setConst(numBodies);
1238 launcher.launch1D(numBodies);
1243 for (int i=0;i<numBodiesCPU;i++)
1245 if (bodiesCPU[i].m_invMass)
1247 int bodyOffset = offsetSplitBodies[i];
1248 int count = bodyCount[i];
1249 float factor = 1.f/float(count);
1250 b3Vector3 averageLinVel;
1251 averageLinVel.setZero();
1252 b3Vector3 averageAngVel;
1253 averageAngVel.setZero();
1254 for (int j=0;j<count;j++)
1256 averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
1257 averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
1259 for (int j=0;j<count;j++)
1261 deltaLinearVelocities[bodyOffset+j] = averageLinVel;
1262 deltaAngularVelocities[bodyOffset+j] = averageAngVel;
1272 B3_PROFILE("update body velocities");
1273 b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel);
1274 launcher.setBuffer(bodiesGPU->getBufferCL());
1275 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1276 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
1277 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1278 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1279 launcher.setConst(numBodies);
1280 launcher.launch1D(numBodies);
1286 for (int i=0;i<numBodiesCPU;i++)
1288 if (bodiesCPU[i].m_invMass)
1290 int bodyOffset = offsetSplitBodies[i];
1291 int count = bodyCount[i];
1294 bodiesCPU[i].m_linVel += deltaLinearVelocities[bodyOffset];
1295 bodiesCPU[i].m_angVel += deltaAngularVelocities[bodyOffset];
1301 // bodiesGPU->copyFromHost(bodiesCPU);