[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / RigidBody / b3GpuJacobiContactSolver.cpp
1
2 #include "b3GpuJacobiContactSolver.h"
3 #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
4 #include "Bullet3Common/b3AlignedObjectArray.h"
5 #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"  //b3Int2
6 class b3Vector3;
7 #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
8 #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
9 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
10 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
11 #include "Bullet3OpenCL/RigidBody/kernels/solverUtils.h"
12 #include "Bullet3Common/b3Logging.h"
13 #include "b3GpuConstraint4.h"
14 #include "Bullet3Common/shared/b3Int2.h"
15 #include "Bullet3Common/shared/b3Int4.h"
16 #define SOLVER_UTILS_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl"
17
18 struct b3GpuJacobiSolverInternalData
19 {
20         //btRadixSort32CL*      m_sort32;
21         //btBoundSearchCL*      m_search;
22         b3PrefixScanCL* m_scan;
23
24         b3OpenCLArray<unsigned int>* m_bodyCount;
25         b3OpenCLArray<b3Int2>* m_contactConstraintOffsets;
26         b3OpenCLArray<unsigned int>* m_offsetSplitBodies;
27
28         b3OpenCLArray<b3Vector3>* m_deltaLinearVelocities;
29         b3OpenCLArray<b3Vector3>* m_deltaAngularVelocities;
30
31         b3AlignedObjectArray<b3Vector3> m_deltaLinearVelocitiesCPU;
32         b3AlignedObjectArray<b3Vector3> m_deltaAngularVelocitiesCPU;
33
34         b3OpenCLArray<b3GpuConstraint4>* m_contactConstraints;
35
36         b3FillCL* m_filler;
37
38         cl_kernel m_countBodiesKernel;
39         cl_kernel m_contactToConstraintSplitKernel;
40         cl_kernel m_clearVelocitiesKernel;
41         cl_kernel m_averageVelocitiesKernel;
42         cl_kernel m_updateBodyVelocitiesKernel;
43         cl_kernel m_solveContactKernel;
44         cl_kernel m_solveFrictionKernel;
45 };
46
47 b3GpuJacobiContactSolver::b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity)
48         : m_context(ctx),
49           m_device(device),
50           m_queue(queue)
51 {
52         m_data = new b3GpuJacobiSolverInternalData;
53         m_data->m_scan = new b3PrefixScanCL(m_context, m_device, m_queue);
54         m_data->m_bodyCount = new b3OpenCLArray<unsigned int>(m_context, m_queue);
55         m_data->m_filler = new b3FillCL(m_context, m_device, m_queue);
56         m_data->m_contactConstraintOffsets = new b3OpenCLArray<b3Int2>(m_context, m_queue);
57         m_data->m_offsetSplitBodies = new b3OpenCLArray<unsigned int>(m_context, m_queue);
58         m_data->m_contactConstraints = new b3OpenCLArray<b3GpuConstraint4>(m_context, m_queue);
59         m_data->m_deltaLinearVelocities = new b3OpenCLArray<b3Vector3>(m_context, m_queue);
60         m_data->m_deltaAngularVelocities = new b3OpenCLArray<b3Vector3>(m_context, m_queue);
61
62         cl_int pErrNum;
63         const char* additionalMacros = "";
64         const char* solverUtilsSource = solverUtilsCL;
65         {
66                 cl_program solverUtilsProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverUtilsSource, &pErrNum, additionalMacros, SOLVER_UTILS_KERNEL_PATH);
67                 b3Assert(solverUtilsProg);
68                 m_data->m_countBodiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "CountBodiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
69                 b3Assert(m_data->m_countBodiesKernel);
70
71                 m_data->m_contactToConstraintSplitKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ContactToConstraintSplitKernel", &pErrNum, solverUtilsProg, additionalMacros);
72                 b3Assert(m_data->m_contactToConstraintSplitKernel);
73                 m_data->m_clearVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
74                 b3Assert(m_data->m_clearVelocitiesKernel);
75
76                 m_data->m_averageVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
77                 b3Assert(m_data->m_averageVelocitiesKernel);
78
79                 m_data->m_updateBodyVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "UpdateBodyVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
80                 b3Assert(m_data->m_updateBodyVelocitiesKernel);
81
82                 m_data->m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros);
83                 b3Assert(m_data->m_solveContactKernel);
84
85                 m_data->m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveFrictionJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros);
86                 b3Assert(m_data->m_solveFrictionKernel);
87         }
88 }
89
90 b3GpuJacobiContactSolver::~b3GpuJacobiContactSolver()
91 {
92         clReleaseKernel(m_data->m_solveContactKernel);
93         clReleaseKernel(m_data->m_solveFrictionKernel);
94         clReleaseKernel(m_data->m_countBodiesKernel);
95         clReleaseKernel(m_data->m_contactToConstraintSplitKernel);
96         clReleaseKernel(m_data->m_averageVelocitiesKernel);
97         clReleaseKernel(m_data->m_updateBodyVelocitiesKernel);
98         clReleaseKernel(m_data->m_clearVelocitiesKernel);
99
100         delete m_data->m_deltaLinearVelocities;
101         delete m_data->m_deltaAngularVelocities;
102         delete m_data->m_contactConstraints;
103         delete m_data->m_offsetSplitBodies;
104         delete m_data->m_contactConstraintOffsets;
105         delete m_data->m_bodyCount;
106         delete m_data->m_filler;
107         delete m_data->m_scan;
108         delete m_data;
109 }
110
111 b3Vector3 make_float4(float v)
112 {
113         return b3MakeVector3(v, v, v);
114 }
115
116 b3Vector4 make_float4(float x, float y, float z, float w)
117 {
118         return b3MakeVector4(x, y, z, w);
119 }
120
121 static inline float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1,
122                                                            const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1)
123 {
124         return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1);
125 }
126
127 static inline void setLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1,
128                                                                            b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1)
129 {
130         linear = n;
131         angular0 = b3Cross(r0, n);
132         angular1 = -b3Cross(r1, n);
133 }
134
135 static __inline void solveContact(b3GpuConstraint4& cs,
136                                                                   const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA,
137                                                                   const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB,
138                                                                   float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB)
139 {
140         for (int ic = 0; ic < 4; ic++)
141         {
142                 //      dont necessary because this makes change to 0
143                 if (cs.m_jacCoeffInv[ic] == 0.f) continue;
144
145                 {
146                         b3Vector3 angular0, angular1, linear;
147                         b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA;
148                         b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB;
149                         setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, linear, angular0, angular1);
150
151                         float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1,
152                                                                                 linVelARO + dLinVelA, angVelARO + dAngVelA, linVelBRO + dLinVelB, angVelBRO + dAngVelB) +
153                                                          cs.m_b[ic];
154                         rambdaDt *= cs.m_jacCoeffInv[ic];
155
156                         {
157                                 float prevSum = cs.m_appliedRambdaDt[ic];
158                                 float updated = prevSum;
159                                 updated += rambdaDt;
160                                 updated = b3Max(updated, minRambdaDt[ic]);
161                                 updated = b3Min(updated, maxRambdaDt[ic]);
162                                 rambdaDt = updated - prevSum;
163                                 cs.m_appliedRambdaDt[ic] = updated;
164                         }
165
166                         b3Vector3 linImp0 = invMassA * linear * rambdaDt;
167                         b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
168                         b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
169                         b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
170 #ifdef _WIN32
171                         b3Assert(_finite(linImp0.getX()));
172                         b3Assert(_finite(linImp1.getX()));
173 #endif
174
175                         if (invMassA)
176                         {
177                                 dLinVelA += linImp0;
178                                 dAngVelA += angImp0;
179                         }
180                         if (invMassB)
181                         {
182                                 dLinVelB += linImp1;
183                                 dAngVelB += angImp1;
184                         }
185                 }
186         }
187 }
188
189 void solveContact3(b3GpuConstraint4* cs,
190                                    b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
191                                    b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
192                                    b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB)
193 {
194         float minRambdaDt = 0;
195         float maxRambdaDt = FLT_MAX;
196
197         for (int ic = 0; ic < 4; ic++)
198         {
199                 if (cs->m_jacCoeffInv[ic] == 0.f) continue;
200
201                 b3Vector3 angular0, angular1, linear;
202                 b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr;
203                 b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr;
204                 setLinearAndAngular(cs->m_linear, r0, r1, linear, angular0, angular1);
205
206                 float rambdaDt = calcRelVel(cs->m_linear, -cs->m_linear, angular0, angular1,
207                                                                         *linVelA + *dLinVelA, *angVelA + *dAngVelA, *linVelB + *dLinVelB, *angVelB + *dAngVelB) +
208                                                  cs->m_b[ic];
209                 rambdaDt *= cs->m_jacCoeffInv[ic];
210
211                 {
212                         float prevSum = cs->m_appliedRambdaDt[ic];
213                         float updated = prevSum;
214                         updated += rambdaDt;
215                         updated = b3Max(updated, minRambdaDt);
216                         updated = b3Min(updated, maxRambdaDt);
217                         rambdaDt = updated - prevSum;
218                         cs->m_appliedRambdaDt[ic] = updated;
219                 }
220
221                 b3Vector3 linImp0 = invMassA * linear * rambdaDt;
222                 b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
223                 b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
224                 b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
225
226                 if (invMassA)
227                 {
228                         *dLinVelA += linImp0;
229                         *dAngVelA += angImp0;
230                 }
231                 if (invMassB)
232                 {
233                         *dLinVelB += linImp1;
234                         *dAngVelB += angImp1;
235                 }
236         }
237 }
238
239 static inline void solveFriction(b3GpuConstraint4& cs,
240                                                                  const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA,
241                                                                  const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB,
242                                                                  float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB)
243 {
244         b3Vector3 linVelA = linVelARO + dLinVelA;
245         b3Vector3 linVelB = linVelBRO + dLinVelB;
246         b3Vector3 angVelA = angVelARO + dAngVelA;
247         b3Vector3 angVelB = angVelBRO + dAngVelB;
248
249         if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return;
250         const b3Vector3& center = (const b3Vector3&)cs.m_center;
251
252         b3Vector3 n = -(const b3Vector3&)cs.m_linear;
253
254         b3Vector3 tangent[2];
255 #if 1
256         b3PlaneSpace1(n, tangent[0], tangent[1]);
257 #else
258         b3Vector3 r = cs.m_worldPos[0] - center;
259         tangent[0] = cross3(n, r);
260         tangent[1] = cross3(tangent[0], n);
261         tangent[0] = normalize3(tangent[0]);
262         tangent[1] = normalize3(tangent[1]);
263 #endif
264
265         b3Vector3 angular0, angular1, linear;
266         b3Vector3 r0 = center - posA;
267         b3Vector3 r1 = center - posB;
268         for (int i = 0; i < 2; i++)
269         {
270                 setLinearAndAngular(tangent[i], r0, r1, linear, angular0, angular1);
271                 float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
272                                                                         linVelA, angVelA, linVelB, angVelB);
273                 rambdaDt *= cs.m_fJacCoeffInv[i];
274
275                 {
276                         float prevSum = cs.m_fAppliedRambdaDt[i];
277                         float updated = prevSum;
278                         updated += rambdaDt;
279                         updated = b3Max(updated, minRambdaDt[i]);
280                         updated = b3Min(updated, maxRambdaDt[i]);
281                         rambdaDt = updated - prevSum;
282                         cs.m_fAppliedRambdaDt[i] = updated;
283                 }
284
285                 b3Vector3 linImp0 = invMassA * linear * rambdaDt;
286                 b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
287                 b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
288                 b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
289 #ifdef _WIN32
290                 b3Assert(_finite(linImp0.getX()));
291                 b3Assert(_finite(linImp1.getX()));
292 #endif
293                 if (invMassA)
294                 {
295                         dLinVelA += linImp0;
296                         dAngVelA += angImp0;
297                 }
298                 if (invMassB)
299                 {
300                         dLinVelB += linImp1;
301                         dAngVelB += angImp1;
302                 }
303         }
304
305         {  //   angular damping for point constraint
306                 b3Vector3 ab = (posB - posA).normalized();
307                 b3Vector3 ac = (center - posA).normalized();
308                 if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
309                 {
310                         float angNA = b3Dot(n, angVelA);
311                         float angNB = b3Dot(n, angVelB);
312
313                         if (invMassA)
314                                 dAngVelA -= (angNA * 0.1f) * n;
315                         if (invMassB)
316                                 dAngVelB -= (angNB * 0.1f) * n;
317                 }
318         }
319 }
320
321 float calcJacCoeff(const b3Vector3& linear0, const b3Vector3& linear1, const b3Vector3& angular0, const b3Vector3& angular1,
322                                    float invMass0, const b3Matrix3x3* invInertia0, float invMass1, const b3Matrix3x3* invInertia1, float countA, float countB)
323 {
324         //      linear0,1 are normlized
325         float jmj0 = invMass0;  //dot3F4(linear0, linear0)*invMass0;
326
327         float jmj1 = b3Dot(mtMul3(angular0, *invInertia0), angular0);
328         float jmj2 = invMass1;  //dot3F4(linear1, linear1)*invMass1;
329         float jmj3 = b3Dot(mtMul3(angular1, *invInertia1), angular1);
330         return -1.f / ((jmj0 + jmj1) * countA + (jmj2 + jmj3) * countB);
331         //      return -1.f/((jmj0+jmj1)+(jmj2+jmj3));
332 }
333
334 void setConstraint4(const b3Vector3& posA, const b3Vector3& linVelA, const b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
335                                         const b3Vector3& posB, const b3Vector3& linVelB, const b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
336                                         b3Contact4* src, float dt, float positionDrift, float positionConstraintCoeff, float countA, float countB,
337                                         b3GpuConstraint4* dstC)
338 {
339         dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
340         dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
341
342         float dtInv = 1.f / dt;
343         for (int ic = 0; ic < 4; ic++)
344         {
345                 dstC->m_appliedRambdaDt[ic] = 0.f;
346         }
347         dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
348
349         dstC->m_linear = src->m_worldNormalOnB;
350         dstC->m_linear[3] = 0.7f;  //src->getFrictionCoeff() );
351         for (int ic = 0; ic < 4; ic++)
352         {
353                 b3Vector3 r0 = src->m_worldPosB[ic] - posA;
354                 b3Vector3 r1 = src->m_worldPosB[ic] - posB;
355
356                 if (ic >= src->m_worldNormalOnB[3])  //npoints
357                 {
358                         dstC->m_jacCoeffInv[ic] = 0.f;
359                         continue;
360                 }
361
362                 float relVelN;
363                 {
364                         b3Vector3 linear, angular0, angular1;
365                         setLinearAndAngular(src->m_worldNormalOnB, r0, r1, linear, angular0, angular1);
366
367                         dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
368                                                                                                    invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB);
369
370                         relVelN = calcRelVel(linear, -linear, angular0, angular1,
371                                                                  linVelA, angVelA, linVelB, angVelB);
372
373                         float e = 0.f;  //src->getRestituitionCoeff();
374                         if (relVelN * relVelN < 0.004f)
375                         {
376                                 e = 0.f;
377                         }
378
379                         dstC->m_b[ic] = e * relVelN;
380                         //float penetration = src->m_worldPos[ic].w;
381                         dstC->m_b[ic] += (src->m_worldPosB[ic][3] + positionDrift) * positionConstraintCoeff * dtInv;
382                         dstC->m_appliedRambdaDt[ic] = 0.f;
383                 }
384         }
385
386         if (src->m_worldNormalOnB[3] > 0)  //npoints
387         {                                  //   prepare friction
388                 b3Vector3 center = make_float4(0.f);
389                 for (int i = 0; i < src->m_worldNormalOnB[3]; i++)
390                         center += src->m_worldPosB[i];
391                 center /= (float)src->m_worldNormalOnB[3];
392
393                 b3Vector3 tangent[2];
394                 b3PlaneSpace1(src->m_worldNormalOnB, tangent[0], tangent[1]);
395
396                 b3Vector3 r[2];
397                 r[0] = center - posA;
398                 r[1] = center - posB;
399
400                 for (int i = 0; i < 2; i++)
401                 {
402                         b3Vector3 linear, angular0, angular1;
403                         setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1);
404
405                         dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
406                                                                                                    invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB);
407                         dstC->m_fAppliedRambdaDt[i] = 0.f;
408                 }
409                 dstC->m_center = center;
410         }
411
412         for (int i = 0; i < 4; i++)
413         {
414                 if (i < src->m_worldNormalOnB[3])
415                 {
416                         dstC->m_worldPos[i] = src->m_worldPosB[i];
417                 }
418                 else
419                 {
420                         dstC->m_worldPos[i] = make_float4(0.f);
421                 }
422         }
423 }
424
425 void ContactToConstraintKernel(b3Contact4* gContact, b3RigidBodyData* gBodies, b3InertiaData* gShapes, b3GpuConstraint4* gConstraintOut, int nContacts,
426                                                            float dt,
427                                                            float positionDrift,
428                                                            float positionConstraintCoeff, int gIdx, b3AlignedObjectArray<unsigned int>& bodyCount)
429 {
430         //int gIdx = 0;//GET_GLOBAL_IDX;
431
432         if (gIdx < nContacts)
433         {
434                 int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);
435                 int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
436
437                 b3Vector3 posA = gBodies[aIdx].m_pos;
438                 b3Vector3 linVelA = gBodies[aIdx].m_linVel;
439                 b3Vector3 angVelA = gBodies[aIdx].m_angVel;
440                 float invMassA = gBodies[aIdx].m_invMass;
441                 b3Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertiaWorld;  //.m_invInertia;
442
443                 b3Vector3 posB = gBodies[bIdx].m_pos;
444                 b3Vector3 linVelB = gBodies[bIdx].m_linVel;
445                 b3Vector3 angVelB = gBodies[bIdx].m_angVel;
446                 float invMassB = gBodies[bIdx].m_invMass;
447                 b3Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld;  //m_invInertia;
448
449                 b3GpuConstraint4 cs;
450                 float countA = invMassA ? (float)(bodyCount[aIdx]) : 1;
451                 float countB = invMassB ? (float)(bodyCount[bIdx]) : 1;
452                 setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
453                                            &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, countA, countB,
454                                            &cs);
455
456                 cs.m_batchIdx = gContact[gIdx].m_batchIdx;
457
458                 gConstraintOut[gIdx] = cs;
459         }
460 }
461
462 void b3GpuJacobiContactSolver::solveGroupHost(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, const b3JacobiSolverInfo& solverInfo)
463 {
464         B3_PROFILE("b3GpuJacobiContactSolver::solveGroup");
465
466         b3AlignedObjectArray<unsigned int> bodyCount;
467         bodyCount.resize(numBodies);
468         for (int i = 0; i < numBodies; i++)
469                 bodyCount[i] = 0;
470
471         b3AlignedObjectArray<b3Int2> contactConstraintOffsets;
472         contactConstraintOffsets.resize(numManifolds);
473
474         for (int i = 0; i < numManifolds; i++)
475         {
476                 int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;
477                 int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;
478
479                 bool isFixedA = (pa < 0) || (pa == solverInfo.m_fixedBodyIndex);
480                 bool isFixedB = (pb < 0) || (pb == solverInfo.m_fixedBodyIndex);
481
482                 int bodyIndexA = manifoldPtr[i].getBodyA();
483                 int bodyIndexB = manifoldPtr[i].getBodyB();
484
485                 if (!isFixedA)
486                 {
487                         contactConstraintOffsets[i].x = bodyCount[bodyIndexA];
488                         bodyCount[bodyIndexA]++;
489                 }
490                 if (!isFixedB)
491                 {
492                         contactConstraintOffsets[i].y = bodyCount[bodyIndexB];
493                         bodyCount[bodyIndexB]++;
494                 }
495         }
496
497         b3AlignedObjectArray<unsigned int> offsetSplitBodies;
498         offsetSplitBodies.resize(numBodies);
499         unsigned int totalNumSplitBodies;
500         m_data->m_scan->executeHost(bodyCount, offsetSplitBodies, numBodies, &totalNumSplitBodies);
501         int numlastBody = bodyCount[numBodies - 1];
502         totalNumSplitBodies += numlastBody;
503         printf("totalNumSplitBodies = %d\n", totalNumSplitBodies);
504
505         b3AlignedObjectArray<b3GpuConstraint4> contactConstraints;
506         contactConstraints.resize(numManifolds);
507
508         for (int i = 0; i < numManifolds; i++)
509         {
510                 ContactToConstraintKernel(&manifoldPtr[0], bodies, inertias, &contactConstraints[0], numManifolds,
511                                                                   solverInfo.m_deltaTime,
512                                                                   solverInfo.m_positionDrift,
513                                                                   solverInfo.m_positionConstraintCoeff,
514                                                                   i, bodyCount);
515         }
516         int maxIter = solverInfo.m_numIterations;
517
518         b3AlignedObjectArray<b3Vector3> deltaLinearVelocities;
519         b3AlignedObjectArray<b3Vector3> deltaAngularVelocities;
520         deltaLinearVelocities.resize(totalNumSplitBodies);
521         deltaAngularVelocities.resize(totalNumSplitBodies);
522         for (unsigned int i = 0; i < totalNumSplitBodies; i++)
523         {
524                 deltaLinearVelocities[i].setZero();
525                 deltaAngularVelocities[i].setZero();
526         }
527
528         for (int iter = 0; iter < maxIter; iter++)
529         {
530                 int i = 0;
531                 for (i = 0; i < numManifolds; i++)
532                 {
533                         //float frictionCoeff = contactConstraints[i].getFrictionCoeff();
534                         int aIdx = (int)contactConstraints[i].m_bodyA;
535                         int bIdx = (int)contactConstraints[i].m_bodyB;
536                         b3RigidBodyData& bodyA = bodies[aIdx];
537                         b3RigidBodyData& bodyB = bodies[bIdx];
538
539                         b3Vector3 zero = b3MakeVector3(0, 0, 0);
540
541                         b3Vector3* dlvAPtr = &zero;
542                         b3Vector3* davAPtr = &zero;
543                         b3Vector3* dlvBPtr = &zero;
544                         b3Vector3* davBPtr = &zero;
545
546                         if (bodyA.m_invMass)
547                         {
548                                 int bodyOffsetA = offsetSplitBodies[aIdx];
549                                 int constraintOffsetA = contactConstraintOffsets[i].x;
550                                 int splitIndexA = bodyOffsetA + constraintOffsetA;
551                                 dlvAPtr = &deltaLinearVelocities[splitIndexA];
552                                 davAPtr = &deltaAngularVelocities[splitIndexA];
553                         }
554
555                         if (bodyB.m_invMass)
556                         {
557                                 int bodyOffsetB = offsetSplitBodies[bIdx];
558                                 int constraintOffsetB = contactConstraintOffsets[i].y;
559                                 int splitIndexB = bodyOffsetB + constraintOffsetB;
560                                 dlvBPtr = &deltaLinearVelocities[splitIndexB];
561                                 davBPtr = &deltaAngularVelocities[splitIndexB];
562                         }
563
564                         {
565                                 float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
566                                 float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
567
568                                 solveContact(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld,
569                                                          (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld,
570                                                          maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr);
571                         }
572                 }
573
574                 //easy
575                 for (int i = 0; i < numBodies; i++)
576                 {
577                         if (bodies[i].m_invMass)
578                         {
579                                 int bodyOffset = offsetSplitBodies[i];
580                                 int count = bodyCount[i];
581                                 float factor = 1.f / float(count);
582                                 b3Vector3 averageLinVel;
583                                 averageLinVel.setZero();
584                                 b3Vector3 averageAngVel;
585                                 averageAngVel.setZero();
586                                 for (int j = 0; j < count; j++)
587                                 {
588                                         averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor;
589                                         averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor;
590                                 }
591                                 for (int j = 0; j < count; j++)
592                                 {
593                                         deltaLinearVelocities[bodyOffset + j] = averageLinVel;
594                                         deltaAngularVelocities[bodyOffset + j] = averageAngVel;
595                                 }
596                         }
597                 }
598         }
599         for (int iter = 0; iter < maxIter; iter++)
600         {
601                 //int i=0;
602
603                 //solve friction
604
605                 for (int i = 0; i < numManifolds; i++)
606                 {
607                         float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
608                         float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
609
610                         float sum = 0;
611                         for (int j = 0; j < 4; j++)
612                         {
613                                 sum += contactConstraints[i].m_appliedRambdaDt[j];
614                         }
615                         float frictionCoeff = contactConstraints[i].getFrictionCoeff();
616                         int aIdx = (int)contactConstraints[i].m_bodyA;
617                         int bIdx = (int)contactConstraints[i].m_bodyB;
618                         b3RigidBodyData& bodyA = bodies[aIdx];
619                         b3RigidBodyData& bodyB = bodies[bIdx];
620
621                         b3Vector3 zero = b3MakeVector3(0, 0, 0);
622
623                         b3Vector3* dlvAPtr = &zero;
624                         b3Vector3* davAPtr = &zero;
625                         b3Vector3* dlvBPtr = &zero;
626                         b3Vector3* davBPtr = &zero;
627
628                         if (bodyA.m_invMass)
629                         {
630                                 int bodyOffsetA = offsetSplitBodies[aIdx];
631                                 int constraintOffsetA = contactConstraintOffsets[i].x;
632                                 int splitIndexA = bodyOffsetA + constraintOffsetA;
633                                 dlvAPtr = &deltaLinearVelocities[splitIndexA];
634                                 davAPtr = &deltaAngularVelocities[splitIndexA];
635                         }
636
637                         if (bodyB.m_invMass)
638                         {
639                                 int bodyOffsetB = offsetSplitBodies[bIdx];
640                                 int constraintOffsetB = contactConstraintOffsets[i].y;
641                                 int splitIndexB = bodyOffsetB + constraintOffsetB;
642                                 dlvBPtr = &deltaLinearVelocities[splitIndexB];
643                                 davBPtr = &deltaAngularVelocities[splitIndexB];
644                         }
645
646                         for (int j = 0; j < 4; j++)
647                         {
648                                 maxRambdaDt[j] = frictionCoeff * sum;
649                                 minRambdaDt[j] = -maxRambdaDt[j];
650                         }
651
652                         solveFriction(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld,
653                                                   (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld,
654                                                   maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr);
655                 }
656
657                 //easy
658                 for (int i = 0; i < numBodies; i++)
659                 {
660                         if (bodies[i].m_invMass)
661                         {
662                                 int bodyOffset = offsetSplitBodies[i];
663                                 int count = bodyCount[i];
664                                 float factor = 1.f / float(count);
665                                 b3Vector3 averageLinVel;
666                                 averageLinVel.setZero();
667                                 b3Vector3 averageAngVel;
668                                 averageAngVel.setZero();
669                                 for (int j = 0; j < count; j++)
670                                 {
671                                         averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor;
672                                         averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor;
673                                 }
674                                 for (int j = 0; j < count; j++)
675                                 {
676                                         deltaLinearVelocities[bodyOffset + j] = averageLinVel;
677                                         deltaAngularVelocities[bodyOffset + j] = averageAngVel;
678                                 }
679                         }
680                 }
681         }
682
683         //easy
684         for (int i = 0; i < numBodies; i++)
685         {
686                 if (bodies[i].m_invMass)
687                 {
688                         int bodyOffset = offsetSplitBodies[i];
689                         int count = bodyCount[i];
690                         if (count)
691                         {
692                                 bodies[i].m_linVel += deltaLinearVelocities[bodyOffset];
693                                 bodies[i].m_angVel += deltaAngularVelocities[bodyOffset];
694                         }
695                 }
696         }
697 }
698
699 void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index)
700 //
701 //
702 //void  b3GpuJacobiContactSolver::solveGroup(b3OpenCLArray<b3RigidBodyData>* bodies,b3OpenCLArray<b3InertiaData>* inertias,b3OpenCLArray<b3Contact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo)
703 {
704         b3JacobiSolverInfo solverInfo;
705         solverInfo.m_fixedBodyIndex = static0Index;
706
707         B3_PROFILE("b3GpuJacobiContactSolver::solveGroup");
708
709         //int numBodies = bodies->size();
710         int numManifolds = numContacts;  //manifoldPtr->size();
711
712         {
713                 B3_PROFILE("resize");
714                 m_data->m_bodyCount->resize(numBodies);
715         }
716
717         unsigned int val = 0;
718         b3Int2 val2;
719         val2.x = 0;
720         val2.y = 0;
721
722         {
723                 B3_PROFILE("m_filler");
724                 m_data->m_contactConstraintOffsets->resize(numManifolds);
725                 m_data->m_filler->execute(*m_data->m_bodyCount, val, numBodies);
726
727                 m_data->m_filler->execute(*m_data->m_contactConstraintOffsets, val2, numManifolds);
728         }
729
730         {
731                 B3_PROFILE("m_countBodiesKernel");
732                 b3LauncherCL launcher(this->m_queue, m_data->m_countBodiesKernel, "m_countBodiesKernel");
733                 launcher.setBuffer(contactBuf);  //manifoldPtr->getBufferCL());
734                 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
735                 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
736                 launcher.setConst(numManifolds);
737                 launcher.setConst(solverInfo.m_fixedBodyIndex);
738                 launcher.launch1D(numManifolds);
739         }
740         unsigned int totalNumSplitBodies = 0;
741         {
742                 B3_PROFILE("m_scan->execute");
743
744                 m_data->m_offsetSplitBodies->resize(numBodies);
745                 m_data->m_scan->execute(*m_data->m_bodyCount, *m_data->m_offsetSplitBodies, numBodies, &totalNumSplitBodies);
746                 totalNumSplitBodies += m_data->m_bodyCount->at(numBodies - 1);
747         }
748
749         {
750                 B3_PROFILE("m_data->m_contactConstraints->resize");
751                 //int numContacts = manifoldPtr->size();
752                 m_data->m_contactConstraints->resize(numContacts);
753         }
754
755         {
756                 B3_PROFILE("contactToConstraintSplitKernel");
757                 b3LauncherCL launcher(m_queue, m_data->m_contactToConstraintSplitKernel, "m_contactToConstraintSplitKernel");
758                 launcher.setBuffer(contactBuf);
759                 launcher.setBuffer(bodyBuf);
760                 launcher.setBuffer(inertiaBuf);
761                 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
762                 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
763                 launcher.setConst(numContacts);
764                 launcher.setConst(solverInfo.m_deltaTime);
765                 launcher.setConst(solverInfo.m_positionDrift);
766                 launcher.setConst(solverInfo.m_positionConstraintCoeff);
767                 launcher.launch1D(numContacts, 64);
768         }
769
770         {
771                 B3_PROFILE("m_data->m_deltaLinearVelocities->resize");
772                 m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies);
773                 m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies);
774         }
775
776         {
777                 B3_PROFILE("m_clearVelocitiesKernel");
778                 b3LauncherCL launch(m_queue, m_data->m_clearVelocitiesKernel, "m_clearVelocitiesKernel");
779                 launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
780                 launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
781                 launch.setConst(totalNumSplitBodies);
782                 launch.launch1D(totalNumSplitBodies);
783                 clFinish(m_queue);
784         }
785
786         int maxIter = solverInfo.m_numIterations;
787
788         for (int iter = 0; iter < maxIter; iter++)
789         {
790                 {
791                         B3_PROFILE("m_solveContactKernel");
792                         b3LauncherCL launcher(m_queue, m_data->m_solveContactKernel, "m_solveContactKernel");
793                         launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
794                         launcher.setBuffer(bodyBuf);
795                         launcher.setBuffer(inertiaBuf);
796                         launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
797                         launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
798                         launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
799                         launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
800                         launcher.setConst(solverInfo.m_deltaTime);
801                         launcher.setConst(solverInfo.m_positionDrift);
802                         launcher.setConst(solverInfo.m_positionConstraintCoeff);
803                         launcher.setConst(solverInfo.m_fixedBodyIndex);
804                         launcher.setConst(numManifolds);
805
806                         launcher.launch1D(numManifolds);
807                         clFinish(m_queue);
808                 }
809
810                 {
811                         B3_PROFILE("average velocities");
812                         b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel");
813                         launcher.setBuffer(bodyBuf);
814                         launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
815                         launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
816                         launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
817                         launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
818                         launcher.setConst(numBodies);
819                         launcher.launch1D(numBodies);
820                         clFinish(m_queue);
821                 }
822
823                 {
824                         B3_PROFILE("m_solveFrictionKernel");
825                         b3LauncherCL launcher(m_queue, m_data->m_solveFrictionKernel, "m_solveFrictionKernel");
826                         launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
827                         launcher.setBuffer(bodyBuf);
828                         launcher.setBuffer(inertiaBuf);
829                         launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
830                         launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
831                         launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
832                         launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
833                         launcher.setConst(solverInfo.m_deltaTime);
834                         launcher.setConst(solverInfo.m_positionDrift);
835                         launcher.setConst(solverInfo.m_positionConstraintCoeff);
836                         launcher.setConst(solverInfo.m_fixedBodyIndex);
837                         launcher.setConst(numManifolds);
838
839                         launcher.launch1D(numManifolds);
840                         clFinish(m_queue);
841                 }
842
843                 {
844                         B3_PROFILE("average velocities");
845                         b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel");
846                         launcher.setBuffer(bodyBuf);
847                         launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
848                         launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
849                         launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
850                         launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
851                         launcher.setConst(numBodies);
852                         launcher.launch1D(numBodies);
853                         clFinish(m_queue);
854                 }
855         }
856
857         {
858                 B3_PROFILE("update body velocities");
859                 b3LauncherCL launcher(m_queue, m_data->m_updateBodyVelocitiesKernel, "m_updateBodyVelocitiesKernel");
860                 launcher.setBuffer(bodyBuf);
861                 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
862                 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
863                 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
864                 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
865                 launcher.setConst(numBodies);
866                 launcher.launch1D(numBodies);
867                 clFinish(m_queue);
868         }
869 }
870
871 #if 0
872
873 void  b3GpuJacobiContactSolver::solveGroupMixed(b3OpenCLArray<b3RigidBodyData>* bodiesGPU,b3OpenCLArray<b3InertiaData>* inertiasGPU,b3OpenCLArray<b3Contact4>* manifoldPtrGPU,const btJacobiSolverInfo& solverInfo)
874 {
875
876         b3AlignedObjectArray<b3RigidBodyData> bodiesCPU;
877         bodiesGPU->copyToHost(bodiesCPU);
878         b3AlignedObjectArray<b3InertiaData> inertiasCPU;
879         inertiasGPU->copyToHost(inertiasCPU);
880         b3AlignedObjectArray<b3Contact4> manifoldPtrCPU;
881         manifoldPtrGPU->copyToHost(manifoldPtrCPU);
882         
883         int numBodiesCPU = bodiesGPU->size();
884         int numManifoldsCPU = manifoldPtrGPU->size();
885         B3_PROFILE("b3GpuJacobiContactSolver::solveGroupMixed");
886
887         b3AlignedObjectArray<unsigned int> bodyCount;
888         bodyCount.resize(numBodiesCPU);
889         for (int i=0;i<numBodiesCPU;i++)
890                 bodyCount[i] = 0;
891
892         b3AlignedObjectArray<b3Int2> contactConstraintOffsets;
893         contactConstraintOffsets.resize(numManifoldsCPU);
894
895
896         for (int i=0;i<numManifoldsCPU;i++)
897         {
898                 int pa = manifoldPtrCPU[i].m_bodyAPtrAndSignBit;
899                 int pb = manifoldPtrCPU[i].m_bodyBPtrAndSignBit;
900
901                 bool isFixedA = (pa <0) || (pa == solverInfo.m_fixedBodyIndex);
902                 bool isFixedB = (pb <0) || (pb == solverInfo.m_fixedBodyIndex);
903
904                 int bodyIndexA = manifoldPtrCPU[i].getBodyA();
905                 int bodyIndexB = manifoldPtrCPU[i].getBodyB();
906
907                 if (!isFixedA)
908                 {
909                         contactConstraintOffsets[i].x = bodyCount[bodyIndexA];
910                         bodyCount[bodyIndexA]++;
911                 }
912                 if (!isFixedB)
913                 {
914                         contactConstraintOffsets[i].y = bodyCount[bodyIndexB];
915                         bodyCount[bodyIndexB]++;
916                 } 
917         }
918
919         b3AlignedObjectArray<unsigned int> offsetSplitBodies;
920         offsetSplitBodies.resize(numBodiesCPU);
921         unsigned int totalNumSplitBodiesCPU;
922         m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodiesCPU,&totalNumSplitBodiesCPU);
923         int numlastBody = bodyCount[numBodiesCPU-1];
924         totalNumSplitBodiesCPU += numlastBody;
925
926                 int numBodies = bodiesGPU->size();
927         int numManifolds = manifoldPtrGPU->size();
928
929         m_data->m_bodyCount->resize(numBodies);
930         
931         unsigned int val=0;
932         b3Int2 val2;
933         val2.x=0;
934         val2.y=0;
935
936          {
937                 B3_PROFILE("m_filler");
938                 m_data->m_contactConstraintOffsets->resize(numManifolds);
939                 m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies);
940                 
941         
942                 m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds);
943         }
944
945         {
946                 B3_PROFILE("m_countBodiesKernel");
947                 b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel);
948                 launcher.setBuffer(manifoldPtrGPU->getBufferCL());
949                 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
950                 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
951                 launcher.setConst(numManifolds);
952                 launcher.setConst(solverInfo.m_fixedBodyIndex);
953                 launcher.launch1D(numManifolds);
954         }
955
956         unsigned int totalNumSplitBodies=0;
957         m_data->m_offsetSplitBodies->resize(numBodies);
958         m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies);
959         totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1);
960
961         if (totalNumSplitBodies != totalNumSplitBodiesCPU)
962         {
963                 printf("error in totalNumSplitBodies!\n");
964         }
965
966         int numContacts = manifoldPtrGPU->size();
967         m_data->m_contactConstraints->resize(numContacts);
968
969         
970         {
971                 B3_PROFILE("contactToConstraintSplitKernel");
972                 b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel);
973                 launcher.setBuffer(manifoldPtrGPU->getBufferCL());
974                 launcher.setBuffer(bodiesGPU->getBufferCL());
975                 launcher.setBuffer(inertiasGPU->getBufferCL());
976                 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
977                 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
978         launcher.setConst(numContacts);
979                 launcher.setConst(solverInfo.m_deltaTime);
980                 launcher.setConst(solverInfo.m_positionDrift);
981                 launcher.setConst(solverInfo.m_positionConstraintCoeff);
982                 launcher.launch1D( numContacts, 64 );
983                 clFinish(m_queue);
984         }
985
986
987
988         b3AlignedObjectArray<b3GpuConstraint4> contactConstraints;
989         contactConstraints.resize(numManifoldsCPU);
990
991         for (int i=0;i<numManifoldsCPU;i++)
992         {
993                 ContactToConstraintKernel(&manifoldPtrCPU[0],&bodiesCPU[0],&inertiasCPU[0],&contactConstraints[0],numManifoldsCPU,
994                         solverInfo.m_deltaTime,
995                         solverInfo.m_positionDrift,
996                         solverInfo.m_positionConstraintCoeff,
997                         i, bodyCount);
998         }
999         int maxIter = solverInfo.m_numIterations;
1000
1001
1002         b3AlignedObjectArray<b3Vector3> deltaLinearVelocities;
1003         b3AlignedObjectArray<b3Vector3> deltaAngularVelocities;
1004         deltaLinearVelocities.resize(totalNumSplitBodiesCPU);
1005         deltaAngularVelocities.resize(totalNumSplitBodiesCPU);
1006         for (int i=0;i<totalNumSplitBodiesCPU;i++)
1007         {
1008                 deltaLinearVelocities[i].setZero();
1009                 deltaAngularVelocities[i].setZero();
1010         }
1011
1012         m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies);
1013         m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies);
1014
1015
1016         
1017         {
1018                 B3_PROFILE("m_clearVelocitiesKernel");
1019                 b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel);
1020                 launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1021                 launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1022                 launch.setConst(totalNumSplitBodies);
1023                 launch.launch1D(totalNumSplitBodies);
1024         }
1025         
1026
1027                 ///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1028
1029
1030         m_data->m_contactConstraints->copyToHost(contactConstraints);
1031         m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies);
1032         m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets);
1033         m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
1034         m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
1035
1036         for (int iter = 0;iter<maxIter;iter++)
1037         {
1038
1039                                 {
1040                         B3_PROFILE("m_solveContactKernel");
1041                         b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel );
1042                         launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
1043                         launcher.setBuffer(bodiesGPU->getBufferCL());
1044                         launcher.setBuffer(inertiasGPU->getBufferCL());
1045                         launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
1046                         launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1047                         launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1048                         launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1049                         launcher.setConst(solverInfo.m_deltaTime);
1050                         launcher.setConst(solverInfo.m_positionDrift);
1051                         launcher.setConst(solverInfo.m_positionConstraintCoeff);
1052                         launcher.setConst(solverInfo.m_fixedBodyIndex);
1053                         launcher.setConst(numManifolds);
1054
1055                         launcher.launch1D(numManifolds);
1056                         clFinish(m_queue);
1057                 }
1058
1059
1060                 int i=0;
1061                 for( i=0; i<numManifoldsCPU; i++)
1062                 {
1063
1064                         float frictionCoeff = contactConstraints[i].getFrictionCoeff();
1065                         int aIdx = (int)contactConstraints[i].m_bodyA;
1066                         int bIdx = (int)contactConstraints[i].m_bodyB;
1067                         b3RigidBodyData& bodyA = bodiesCPU[aIdx];
1068                         b3RigidBodyData& bodyB = bodiesCPU[bIdx];
1069
1070                         b3Vector3 zero(0,0,0);
1071                         
1072                         b3Vector3* dlvAPtr=&zero;
1073                         b3Vector3* davAPtr=&zero;
1074                         b3Vector3* dlvBPtr=&zero;
1075                         b3Vector3* davBPtr=&zero;
1076                         
1077                         if (bodyA.m_invMass)
1078                         {
1079                                 int bodyOffsetA = offsetSplitBodies[aIdx];
1080                                 int constraintOffsetA = contactConstraintOffsets[i].x;
1081                                 int splitIndexA = bodyOffsetA+constraintOffsetA;
1082                                 dlvAPtr = &deltaLinearVelocities[splitIndexA];
1083                                 davAPtr = &deltaAngularVelocities[splitIndexA];
1084                         }
1085
1086                         if (bodyB.m_invMass)
1087                         {
1088                                 int bodyOffsetB = offsetSplitBodies[bIdx];
1089                                 int constraintOffsetB = contactConstraintOffsets[i].y;
1090                                 int splitIndexB= bodyOffsetB+constraintOffsetB;
1091                                 dlvBPtr =&deltaLinearVelocities[splitIndexB];
1092                                 davBPtr = &deltaAngularVelocities[splitIndexB];
1093                         }
1094
1095
1096
1097                         {
1098                                 float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
1099                                 float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
1100
1101                                 solveContact( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertiasCPU[aIdx].m_invInertiaWorld, 
1102                                         (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
1103                                         maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr          );
1104
1105
1106                         }
1107                 }
1108
1109                 
1110                 {
1111                         B3_PROFILE("average velocities");
1112                         b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
1113                         launcher.setBuffer(bodiesGPU->getBufferCL());
1114                         launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1115                         launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
1116                         launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1117                         launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1118                         launcher.setConst(numBodies);
1119                         launcher.launch1D(numBodies);
1120                         clFinish(m_queue);
1121                 }
1122
1123                 //easy
1124                 for (int i=0;i<numBodiesCPU;i++)
1125                 {
1126                         if (bodiesCPU[i].m_invMass)
1127                         {
1128                                 int bodyOffset = offsetSplitBodies[i];
1129                                 int count = bodyCount[i];
1130                                 float factor = 1.f/float(count);
1131                                 b3Vector3 averageLinVel;
1132                                 averageLinVel.setZero();
1133                                 b3Vector3 averageAngVel;
1134                                 averageAngVel.setZero();
1135                                 for (int j=0;j<count;j++)
1136                                 {
1137                                         averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
1138                                         averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
1139                                 }
1140                                 for (int j=0;j<count;j++)
1141                                 {
1142                                         deltaLinearVelocities[bodyOffset+j] = averageLinVel;
1143                                         deltaAngularVelocities[bodyOffset+j] = averageAngVel;
1144                                 }
1145                         }
1146                 }
1147 //      m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities);
1148         //m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities);
1149         m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
1150         m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
1151
1152 #if 0
1153
1154                 {
1155                         B3_PROFILE("m_solveFrictionKernel");
1156                         b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel);
1157                         launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
1158                         launcher.setBuffer(bodiesGPU->getBufferCL());
1159                         launcher.setBuffer(inertiasGPU->getBufferCL());
1160                         launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
1161                         launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1162                         launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1163                         launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1164                         launcher.setConst(solverInfo.m_deltaTime);
1165                         launcher.setConst(solverInfo.m_positionDrift);
1166                         launcher.setConst(solverInfo.m_positionConstraintCoeff);
1167                         launcher.setConst(solverInfo.m_fixedBodyIndex);
1168                         launcher.setConst(numManifolds);
1169
1170                         launcher.launch1D(numManifolds);
1171                         clFinish(m_queue);
1172                 }
1173
1174                 //solve friction
1175
1176                 for(int i=0; i<numManifoldsCPU; i++)
1177                 {
1178                         float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
1179                         float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
1180
1181                         float sum = 0;
1182                         for(int j=0; j<4; j++)
1183                         {
1184                                 sum +=contactConstraints[i].m_appliedRambdaDt[j];
1185                         }
1186                         float frictionCoeff = contactConstraints[i].getFrictionCoeff();
1187                         int aIdx = (int)contactConstraints[i].m_bodyA;
1188                         int bIdx = (int)contactConstraints[i].m_bodyB;
1189                         b3RigidBodyData& bodyA = bodiesCPU[aIdx];
1190                         b3RigidBodyData& bodyB = bodiesCPU[bIdx];
1191
1192                         b3Vector3 zero(0,0,0);
1193                         
1194                         b3Vector3* dlvAPtr=&zero;
1195                         b3Vector3* davAPtr=&zero;
1196                         b3Vector3* dlvBPtr=&zero;
1197                         b3Vector3* davBPtr=&zero;
1198                         
1199                         if (bodyA.m_invMass)
1200                         {
1201                                 int bodyOffsetA = offsetSplitBodies[aIdx];
1202                                 int constraintOffsetA = contactConstraintOffsets[i].x;
1203                                 int splitIndexA = bodyOffsetA+constraintOffsetA;
1204                                 dlvAPtr = &deltaLinearVelocities[splitIndexA];
1205                                 davAPtr = &deltaAngularVelocities[splitIndexA];
1206                         }
1207
1208                         if (bodyB.m_invMass)
1209                         {
1210                                 int bodyOffsetB = offsetSplitBodies[bIdx];
1211                                 int constraintOffsetB = contactConstraintOffsets[i].y;
1212                                 int splitIndexB= bodyOffsetB+constraintOffsetB;
1213                                 dlvBPtr =&deltaLinearVelocities[splitIndexB];
1214                                 davBPtr = &deltaAngularVelocities[splitIndexB];
1215                         }
1216
1217                         for(int j=0; j<4; j++)
1218                         {
1219                                 maxRambdaDt[j] = frictionCoeff*sum;
1220                                 minRambdaDt[j] = -maxRambdaDt[j];
1221                         }
1222
1223                         solveFriction( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,inertiasCPU[aIdx].m_invInertiaWorld, 
1224                                 (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
1225                                 maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr);
1226
1227                 }
1228
1229                 {
1230                         B3_PROFILE("average velocities");
1231                         b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
1232                         launcher.setBuffer(bodiesGPU->getBufferCL());
1233                         launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1234                         launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
1235                         launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1236                         launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1237                         launcher.setConst(numBodies);
1238                         launcher.launch1D(numBodies);
1239                         clFinish(m_queue);
1240                 }
1241
1242                 //easy
1243                 for (int i=0;i<numBodiesCPU;i++)
1244                 {
1245                         if (bodiesCPU[i].m_invMass)
1246                         {
1247                                 int bodyOffset = offsetSplitBodies[i];
1248                                 int count = bodyCount[i];
1249                                 float factor = 1.f/float(count);
1250                                 b3Vector3 averageLinVel;
1251                                 averageLinVel.setZero();
1252                                 b3Vector3 averageAngVel;
1253                                 averageAngVel.setZero();
1254                                 for (int j=0;j<count;j++)
1255                                 {
1256                                         averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
1257                                         averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
1258                                 }
1259                                 for (int j=0;j<count;j++)
1260                                 {
1261                                         deltaLinearVelocities[bodyOffset+j] = averageLinVel;
1262                                         deltaAngularVelocities[bodyOffset+j] = averageAngVel;
1263                                 }
1264                         }
1265                 }
1266
1267 #endif
1268
1269         }
1270
1271         {
1272                 B3_PROFILE("update body velocities");
1273                 b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel);
1274                 launcher.setBuffer(bodiesGPU->getBufferCL());
1275                 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1276                 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
1277                 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1278                 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1279                 launcher.setConst(numBodies);
1280                 launcher.launch1D(numBodies);
1281                 clFinish(m_queue);
1282         }
1283
1284
1285         //easy
1286         for (int i=0;i<numBodiesCPU;i++)
1287         {
1288                 if (bodiesCPU[i].m_invMass)
1289                 {
1290                         int bodyOffset = offsetSplitBodies[i];
1291                         int count = bodyCount[i];
1292                         if (count)
1293                         {
1294                                 bodiesCPU[i].m_linVel += deltaLinearVelocities[bodyOffset];
1295                                 bodiesCPU[i].m_angVel += deltaAngularVelocities[bodyOffset];
1296                         }
1297                 }
1298         }
1299
1300
1301 //      bodiesGPU->copyFromHost(bodiesCPU);
1302
1303
1304 }
1305 #endif