[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / RigidBody / kernels / jointSolver.cl
1 /*
2 Copyright (c) 2013 Advanced Micro Devices, Inc.  
3
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose, 
7 including commercial applications, and to alter it and redistribute it freely, 
8 subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 //Originally written by Erwin Coumans
15
16 #define B3_CONSTRAINT_FLAG_ENABLED 1
17
18 #define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3
19 #define B3_GPU_FIXED_CONSTRAINT_TYPE 4
20
21 #define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails
22 #define B3_INFINITY 1e30f
23
24 #define mymake_float4 (float4)
25
26
27 __inline float dot3F4(float4 a, float4 b)
28 {
29         float4 a1 = mymake_float4(a.xyz,0.f);
30         float4 b1 = mymake_float4(b.xyz,0.f);
31         return dot(a1, b1);
32 }
33
34
35 typedef float4 Quaternion;
36
37
38 typedef struct
39 {
40         float4 m_row[3];
41 }Matrix3x3;
42
43 __inline
44 float4 mtMul1(Matrix3x3 a, float4 b);
45
46 __inline
47 float4 mtMul3(float4 a, Matrix3x3 b);
48
49
50
51
52
53 __inline
54 float4 mtMul1(Matrix3x3 a, float4 b)
55 {
56         float4 ans;
57         ans.x = dot3F4( a.m_row[0], b );
58         ans.y = dot3F4( a.m_row[1], b );
59         ans.z = dot3F4( a.m_row[2], b );
60         ans.w = 0.f;
61         return ans;
62 }
63
64 __inline
65 float4 mtMul3(float4 a, Matrix3x3 b)
66 {
67         float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
68         float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
69         float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
70
71         float4 ans;
72         ans.x = dot3F4( a, colx );
73         ans.y = dot3F4( a, coly );
74         ans.z = dot3F4( a, colz );
75         return ans;
76 }
77
78
79
80 typedef struct
81 {
82         Matrix3x3 m_invInertiaWorld;
83         Matrix3x3 m_initInvInertia;
84 } BodyInertia;
85
86
87 typedef struct
88 {
89         Matrix3x3 m_basis;//orientation
90         float4  m_origin;//transform
91 }b3Transform;
92
93 typedef struct
94 {
95 //      b3Transform             m_worldTransformUnused;
96         float4          m_deltaLinearVelocity;
97         float4          m_deltaAngularVelocity;
98         float4          m_angularFactor;
99         float4          m_linearFactor;
100         float4          m_invMass;
101         float4          m_pushVelocity;
102         float4          m_turnVelocity;
103         float4          m_linearVelocity;
104         float4          m_angularVelocity;
105
106         union 
107         {
108                 void*   m_originalBody;
109                 int             m_originalBodyIndex;
110         };
111         int padding[3];
112
113 } b3GpuSolverBody;
114
115 typedef struct
116 {
117         float4 m_pos;
118         Quaternion m_quat;
119         float4 m_linVel;
120         float4 m_angVel;
121
122         unsigned int m_shapeIdx;
123         float m_invMass;
124         float m_restituitionCoeff;
125         float m_frictionCoeff;
126 } b3RigidBodyCL;
127
128 typedef struct
129 {
130
131         float4          m_relpos1CrossNormal;
132         float4          m_contactNormal;
133
134         float4          m_relpos2CrossNormal;
135         //float4                m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal
136
137         float4          m_angularComponentA;
138         float4          m_angularComponentB;
139         
140         float   m_appliedPushImpulse;
141         float   m_appliedImpulse;
142         int     m_padding1;
143         int     m_padding2;
144         float   m_friction;
145         float   m_jacDiagABInv;
146         float           m_rhs;
147         float           m_cfm;
148         
149     float               m_lowerLimit;
150         float           m_upperLimit;
151         float           m_rhsPenetration;
152         int                     m_originalConstraint;
153
154
155         int     m_overrideNumSolverIterations;
156     int                 m_frictionIndex;
157         int m_solverBodyIdA;
158         int m_solverBodyIdB;
159
160 } b3SolverConstraint;
161
162 typedef struct 
163 {
164         int m_bodyAPtrAndSignBit;
165         int m_bodyBPtrAndSignBit;
166         int m_originalConstraintIndex;
167         int m_batchId;
168 } b3BatchConstraint;
169
170
171
172
173
174
175 typedef struct 
176 {
177         int                             m_constraintType;
178         int                             m_rbA;
179         int                             m_rbB;
180         float                   m_breakingImpulseThreshold;
181
182         float4 m_pivotInA;
183         float4 m_pivotInB;
184         Quaternion m_relTargetAB;
185
186         int     m_flags;
187         int m_padding[3];
188 } b3GpuGenericConstraint;
189
190
191 /*b3Transform   getWorldTransform(b3RigidBodyCL* rb)
192 {
193         b3Transform newTrans;
194         newTrans.setOrigin(rb->m_pos);
195         newTrans.setRotation(rb->m_quat);
196         return newTrans;
197 }*/
198
199
200
201
202 __inline
203 float4 cross3(float4 a, float4 b)
204 {
205         return cross(a,b);
206 }
207
208 __inline
209 float4 fastNormalize4(float4 v)
210 {
211         v = mymake_float4(v.xyz,0.f);
212         return fast_normalize(v);
213 }
214
215
216 __inline
217 Quaternion qtMul(Quaternion a, Quaternion b);
218
219 __inline
220 Quaternion qtNormalize(Quaternion in);
221
222 __inline
223 float4 qtRotate(Quaternion q, float4 vec);
224
225 __inline
226 Quaternion qtInvert(Quaternion q);
227
228
229
230
231 __inline
232 Quaternion qtMul(Quaternion a, Quaternion b)
233 {
234         Quaternion ans;
235         ans = cross3( a, b );
236         ans += a.w*b+b.w*a;
237 //      ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
238         ans.w = a.w*b.w - dot3F4(a, b);
239         return ans;
240 }
241
242 __inline
243 Quaternion qtNormalize(Quaternion in)
244 {
245         return fastNormalize4(in);
246 //      in /= length( in );
247 //      return in;
248 }
249 __inline
250 float4 qtRotate(Quaternion q, float4 vec)
251 {
252         Quaternion qInv = qtInvert( q );
253         float4 vcpy = vec;
254         vcpy.w = 0.f;
255         float4 out = qtMul(qtMul(q,vcpy),qInv);
256         return out;
257 }
258
259 __inline
260 Quaternion qtInvert(Quaternion q)
261 {
262         return (Quaternion)(-q.xyz, q.w);
263 }
264
265
266 __inline void internalApplyImpulse(__global b3GpuSolverBody* body,  float4 linearComponent, float4 angularComponent,float impulseMagnitude)
267 {
268         body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor;
269         body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor);
270 }
271
272
273 void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c)
274 {
275         float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm;
276         float deltaVel1Dotn     =       dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity)         + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity);
277         float deltaVel2Dotn     =       -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity);
278
279         deltaImpulse    -=      deltaVel1Dotn*c->m_jacDiagABInv;
280         deltaImpulse    -=      deltaVel2Dotn*c->m_jacDiagABInv;
281
282         float sum = c->m_appliedImpulse + deltaImpulse;
283         if (sum < c->m_lowerLimit)
284         {
285                 deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse;
286                 c->m_appliedImpulse = c->m_lowerLimit;
287         }
288         else if (sum > c->m_upperLimit) 
289         {
290                 deltaImpulse = c->m_upperLimit-c->m_appliedImpulse;
291                 c->m_appliedImpulse = c->m_upperLimit;
292         }
293         else
294         {
295                 c->m_appliedImpulse = sum;
296         }
297
298         internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse);
299         internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse);
300
301 }
302
303 __kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,
304                                           __global b3BatchConstraint* batchConstraints,
305                                                 __global b3SolverConstraint* rows,
306                                                 __global unsigned int* numConstraintRowsInfo1, 
307                                                 __global unsigned int* rowOffsets,
308                                                 __global b3GpuGenericConstraint* constraints,
309                                                 int batchOffset,
310                                                 int numConstraintsInBatch
311                       )
312 {
313         int b = get_global_id(0);
314         if (b>=numConstraintsInBatch)
315                 return;
316
317         __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];
318         int originalConstraintIndex = c->m_originalConstraintIndex;
319         if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED)
320         {
321                 int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex];
322                 int rowOffset = rowOffsets[originalConstraintIndex];
323                 for (int jj=0;jj<numConstraintRows;jj++)
324                 {
325                         __global b3SolverConstraint* constraint = &rows[rowOffset+jj];
326                         resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint);
327                 }
328         }
329 };
330
331 __kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies)
332 {
333         int i = get_global_id(0);
334         if (i>=numBodies)
335                 return;
336
337         __global b3GpuSolverBody* solverBody = &solverBodies[i];
338         __global b3RigidBodyCL* bodyCL = &bodiesCL[i];
339
340         solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f);
341         solverBody->m_deltaAngularVelocity  = (float4)(0.f,0.f,0.f,0.f);
342         solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);
343         solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);
344         solverBody->m_invMass = (float4)(bodyCL->m_invMass,bodyCL->m_invMass,bodyCL->m_invMass,0.f);
345         solverBody->m_originalBodyIndex = i;
346         solverBody->m_angularFactor = (float4)(1,1,1,0);
347         solverBody->m_linearFactor = (float4) (1,1,1,0);
348         solverBody->m_linearVelocity = bodyCL->m_linVel;
349         solverBody->m_angularVelocity = bodyCL->m_angVel;
350 }
351
352 __kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints)
353 {
354         int cid = get_global_id(0);
355         if (cid>=numConstraints)
356                 return;
357         int numRows = numConstraintRows[cid];
358         if (numRows)
359         {
360                 for (int i=0;i<numRows;i++)
361                 {
362                         int rowIndex = rowOffsets[cid]+i;
363                         float breakingThreshold = constraints[cid].m_breakingImpulseThreshold;
364                         if (fabs(rows[rowIndex].m_appliedImpulse) >= breakingThreshold)
365                         {
366                                 constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED;
367                         }
368                 }
369         }
370 }
371
372
373
374 __kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints)
375 {
376         int i = get_global_id(0);
377         if (i>=numConstraints)
378                 return;
379
380         __global b3GpuGenericConstraint* constraint = &constraints[i];
381
382         switch (constraint->m_constraintType)
383         {
384                 case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:
385                 {
386                         infos[i] = 3;
387                         break;
388                 }
389                 case B3_GPU_FIXED_CONSTRAINT_TYPE:
390                 {
391                         infos[i] = 6;
392                         break;
393                 }
394                 default:
395                 {
396                 }
397         }
398 }
399
400 __kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, 
401                                                                                 __global b3BatchConstraint* batchConstraints, 
402                                                                                 __global b3GpuGenericConstraint* constraints,
403                                                                                 __global b3RigidBodyCL* bodies,
404                                                                                 int numConstraints)
405 {
406         int i = get_global_id(0);
407         if (i>=numConstraints)
408                 return;
409
410         int rbA = constraints[i].m_rbA;
411         int rbB = constraints[i].m_rbB;
412
413         batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass != 0.f ? rbA : -rbA;
414         batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass != 0.f ? rbB : -rbB;
415         batchConstraints[i].m_batchId = -1;
416         batchConstraints[i].m_originalConstraintIndex = i;
417
418 }
419
420
421
422
423 typedef struct
424 {
425         // integrator parameters: frames per second (1/stepsize), default error
426         // reduction parameter (0..1).
427         float fps,erp;
428
429         // for the first and second body, pointers to two (linear and angular)
430         // n*3 jacobian sub matrices, stored by rows. these matrices will have
431         // been initialized to 0 on entry. if the second body is zero then the
432         // J2xx pointers may be 0.
433         union 
434         {
435                 __global float4* m_J1linearAxisFloat4;
436                 __global float* m_J1linearAxis;
437         };
438         union
439         {
440                 __global float4* m_J1angularAxisFloat4;
441                 __global float* m_J1angularAxis;
442
443         };
444         union
445         {
446         __global float4* m_J2linearAxisFloat4;
447         __global float* m_J2linearAxis;
448         };
449         union
450         {
451                 __global float4* m_J2angularAxisFloat4;
452                 __global float* m_J2angularAxis;
453         };
454         // elements to jump from one row to the next in J's
455         int rowskip;
456
457         // right hand sides of the equation J*v = c + cfm * lambda. cfm is the
458         // "constraint force mixing" vector. c is set to zero on entry, cfm is
459         // set to a constant value (typically very small or zero) value on entry.
460         __global float* m_constraintError;
461         __global float* cfm;
462
463         // lo and hi limits for variables (set to -/+ infinity on entry).
464         __global float* m_lowerLimit;
465         __global float* m_upperLimit;
466
467         // findex vector for variables. see the LCP solver interface for a
468         // description of what this does. this is set to -1 on entry.
469         // note that the returned indexes are relative to the first index of
470         // the constraint.
471         __global int *findex;
472         // number of solver iterations
473         int m_numIterations;
474
475         //damping of the velocity
476         float   m_damping;
477 } b3GpuConstraintInfo2;
478
479
480 void    getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2)
481 {
482         *v0 = (float4)(0.               ,-vecIn.z               ,vecIn.y,0.f);
483         *v1 = (float4)(vecIn.z  ,0.                     ,-vecIn.x,0.f);
484         *v2 = (float4)(-vecIn.y ,vecIn.x        ,0.f,0.f);
485 }
486
487
488 void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies)
489 {
490         float4 posA = bodies[constraint->m_rbA].m_pos;
491         Quaternion rotA = bodies[constraint->m_rbA].m_quat;
492
493         float4 posB = bodies[constraint->m_rbB].m_pos;
494         Quaternion rotB = bodies[constraint->m_rbB].m_quat;
495
496
497
498                 // anchor points in global coordinates with respect to body PORs.
499    
500     // set jacobian
501     info->m_J1linearAxis[0] = 1;
502         info->m_J1linearAxis[info->rowskip+1] = 1;
503         info->m_J1linearAxis[2*info->rowskip+2] = 1;
504
505         float4 a1 = qtRotate(rotA,constraint->m_pivotInA);
506
507         {
508                 __global float4* angular0 = (__global float4*)(info->m_J1angularAxis);
509                 __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip);
510                 __global float4* angular2 = (__global float4*)(info->m_J1angularAxis+2*info->rowskip);
511                 float4 a1neg = -a1;
512                 getSkewSymmetricMatrix(a1neg,angular0,angular1,angular2);
513         }
514         if (info->m_J2linearAxis)
515         {
516                 info->m_J2linearAxis[0] = -1;
517                 info->m_J2linearAxis[info->rowskip+1] = -1;
518                 info->m_J2linearAxis[2*info->rowskip+2] = -1;
519         }
520         
521         float4 a2 = qtRotate(rotB,constraint->m_pivotInB);
522    
523         {
524         //      float4 a2n = -a2;
525                 __global float4* angular0 = (__global float4*)(info->m_J2angularAxis);
526                 __global float4* angular1 = (__global float4*)(info->m_J2angularAxis+info->rowskip);
527                 __global float4* angular2 = (__global float4*)(info->m_J2angularAxis+2*info->rowskip);
528                 getSkewSymmetricMatrix(a2,angular0,angular1,angular2);
529         }
530     
531     // set right hand side
532 //      float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;
533         float currERP = info->erp;
534
535         float k = info->fps * currERP;
536     int j;
537         float4 result = a2 + posB - a1 - posA;
538         float* resultPtr = &result;
539
540         for (j=0; j<3; j++)
541     {
542         info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]);
543     }
544 }
545
546 Quaternion nearest( Quaternion first, Quaternion qd)
547 {
548         Quaternion diff,sum;
549         diff = first- qd;
550         sum = first + qd;
551         
552         if( dot(diff,diff) < dot(sum,sum) )
553                 return qd;
554         return (-qd);
555 }
556
557 float b3Acos(float x) 
558
559         if (x<-1)       
560                 x=-1; 
561         if (x>1)        
562                 x=1;
563         return acos(x); 
564 }
565
566 float getAngle(Quaternion orn)
567 {
568         if (orn.w>=1.f)
569                 orn.w=1.f;
570         float s = 2.f * b3Acos(orn.w);
571         return s;
572 }
573
574 void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle)
575 {
576         Quaternion orn1 = nearest(orn0,orn1a);
577         
578         Quaternion dorn = qtMul(orn1,qtInvert(orn0));
579         *angle = getAngle(dorn);
580         *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f);
581         
582         //check for axis length
583         float len = dot3F4(*axis,*axis);
584         if (len < FLT_EPSILON*FLT_EPSILON)
585                 *axis = (float4)(1,0,0,0);
586         else
587                 *axis /= sqrt(len);
588 }
589
590
591
592 void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row)
593 {
594         Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat;
595         Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat;
596
597         int s = info->rowskip;
598         int start_index = start_row * s;
599
600         // 3 rows to make body rotations equal
601         info->m_J1angularAxis[start_index] = 1;
602         info->m_J1angularAxis[start_index + s + 1] = 1;
603         info->m_J1angularAxis[start_index + s*2+2] = 1;
604         if ( info->m_J2angularAxis)
605         {
606                 info->m_J2angularAxis[start_index] = -1;
607                 info->m_J2angularAxis[start_index + s+1] = -1;
608                 info->m_J2angularAxis[start_index + s*2+2] = -1;
609         }
610         
611         float currERP = info->erp;
612         float k = info->fps * currERP;
613         float4 diff;
614         float angle;
615         float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB));
616         
617         calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle);
618         diff*=-angle;
619                 
620         float* resultPtr = &diff;
621         
622         for (int j=0; j<3; j++)
623     {
624         info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j];
625     }
626         
627
628 }
629
630
631 __kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies)
632 {
633         int i = get_global_id(0);
634         if (i>=numBodies)
635                 return;
636
637         if (bodies[i].m_invMass)
638         {
639 //              if (length(solverBodies[i].m_deltaLinearVelocity)<MOTIONCLAMP)
640                 {
641                         bodies[i].m_linVel += solverBodies[i].m_deltaLinearVelocity;
642                 }
643 //              if (length(solverBodies[i].m_deltaAngularVelocity)<MOTIONCLAMP)
644                 {
645                         bodies[i].m_angVel += solverBodies[i].m_deltaAngularVelocity;
646                 } 
647         }
648 }
649
650
651 __kernel void getInfo2Kernel(__global b3SolverConstraint* solverConstraintRows, 
652                                                         __global unsigned int* infos, 
653                                                         __global unsigned int* constraintRowOffsets, 
654                                                         __global b3GpuGenericConstraint* constraints, 
655                                                         __global b3BatchConstraint* batchConstraints, 
656                                                         __global b3RigidBodyCL* bodies,
657                                                         __global BodyInertia* inertias,
658                                                         __global b3GpuSolverBody* solverBodies,
659                                                         float timeStep,
660                                                         float globalErp,
661                                                         float globalCfm,
662                                                         float globalDamping,
663                                                         int globalNumIterations,
664                                                         int numConstraints)
665 {
666
667         int i = get_global_id(0);
668         if (i>=numConstraints)
669                 return;
670                 
671         //for now, always initialize the batch info
672         int info1 = infos[i];
673                         
674         __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]];
675         __global b3GpuGenericConstraint* constraint = &constraints[i];
676
677         __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];
678         __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];
679
680         int solverBodyIdA = constraint->m_rbA;
681         int solverBodyIdB = constraint->m_rbB;
682
683         __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];
684         __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];
685
686
687         if (rbA->m_invMass)
688         {
689                 batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;
690         } else
691         {
692 //                      if (!solverBodyIdA)
693 //                              m_staticIdx = 0;
694                 batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;
695         }
696
697         if (rbB->m_invMass)
698         {
699                 batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;
700         } else
701         {
702 //                      if (!solverBodyIdB)
703 //                              m_staticIdx = 0;
704                 batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;
705         }
706
707         if (info1)
708         {
709                 int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
710 //              if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
711         //              m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
712
713
714                 int j;
715                 for ( j=0;j<info1;j++)
716                 {
717 //                      memset(&currentConstraintRow[j],0,sizeof(b3SolverConstraint));
718                         currentConstraintRow[j].m_angularComponentA = (float4)(0,0,0,0);
719                         currentConstraintRow[j].m_angularComponentB = (float4)(0,0,0,0);
720                         currentConstraintRow[j].m_appliedImpulse = 0.f;
721                         currentConstraintRow[j].m_appliedPushImpulse = 0.f;
722                         currentConstraintRow[j].m_cfm = 0.f;
723                         currentConstraintRow[j].m_contactNormal = (float4)(0,0,0,0);
724                         currentConstraintRow[j].m_friction = 0.f;
725                         currentConstraintRow[j].m_frictionIndex = 0;
726                         currentConstraintRow[j].m_jacDiagABInv = 0.f;
727                         currentConstraintRow[j].m_lowerLimit = 0.f;
728                         currentConstraintRow[j].m_upperLimit = 0.f;
729
730                         currentConstraintRow[j].m_originalConstraint = i;
731                         currentConstraintRow[j].m_overrideNumSolverIterations = 0;
732                         currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0);
733                         currentConstraintRow[j].m_relpos2CrossNormal = (float4)(0,0,0,0);
734                         currentConstraintRow[j].m_rhs = 0.f;
735                         currentConstraintRow[j].m_rhsPenetration = 0.f;
736                         currentConstraintRow[j].m_solverBodyIdA = 0;
737                         currentConstraintRow[j].m_solverBodyIdB = 0;
738                                                         
739                         currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;
740                         currentConstraintRow[j].m_upperLimit = B3_INFINITY;
741                         currentConstraintRow[j].m_appliedImpulse = 0.f;
742                         currentConstraintRow[j].m_appliedPushImpulse = 0.f;
743                         currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
744                         currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
745                         currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;            
746                 }
747
748                 bodyAPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);
749                 bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);
750                 bodyAPtr->m_pushVelocity = (float4)(0,0,0,0);
751                 bodyAPtr->m_turnVelocity = (float4)(0,0,0,0);
752                 bodyBPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);
753                 bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);
754                 bodyBPtr->m_pushVelocity = (float4)(0,0,0,0);
755                 bodyBPtr->m_turnVelocity  = (float4)(0,0,0,0);
756
757                 int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this
758
759                 
760
761
762                 b3GpuConstraintInfo2 info2;
763                 info2.fps = 1.f/timeStep;
764                 info2.erp = globalErp;
765                 info2.m_J1linearAxisFloat4 = &currentConstraintRow->m_contactNormal;
766                 info2.m_J1angularAxisFloat4 = &currentConstraintRow->m_relpos1CrossNormal;
767                 info2.m_J2linearAxisFloat4 = 0;
768                 info2.m_J2angularAxisFloat4 = &currentConstraintRow->m_relpos2CrossNormal;
769                 info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this
770
771                 ///the size of b3SolverConstraint needs be a multiple of float
772 //              b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint));
773                 info2.m_constraintError = &currentConstraintRow->m_rhs;
774                 currentConstraintRow->m_cfm = globalCfm;
775                 info2.m_damping = globalDamping;
776                 info2.cfm = &currentConstraintRow->m_cfm;
777                 info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
778                 info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
779                 info2.m_numIterations = globalNumIterations;
780
781                 switch (constraint->m_constraintType)
782                 {
783                         case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:
784                         {
785                                 getInfo2Point2Point(constraint,&info2,bodies);
786                                 break;
787                         }
788                         case B3_GPU_FIXED_CONSTRAINT_TYPE:
789                         {
790                                 getInfo2Point2Point(constraint,&info2,bodies);
791
792                                 getInfo2FixedOrientation(constraint,&info2,bodies,3);
793
794                                 break;
795                         }
796
797                         default:
798                         {
799                         }
800                 }
801
802                 ///finalize the constraint setup
803                 for ( j=0;j<info1;j++)
804                 {
805                         __global b3SolverConstraint* solverConstraint = &currentConstraintRow[j];
806
807                         if (solverConstraint->m_upperLimit>=constraint->m_breakingImpulseThreshold)
808                         {
809                                 solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold;
810                         }
811
812                         if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold)
813                         {
814                                 solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold;
815                         }
816
817 //                                              solverConstraint->m_originalContactPoint = constraint;
818                                                         
819                         Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld;
820                         {
821
822                                 //float4 angularFactorA(1,1,1);
823                                 float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal;
824                                 solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA;
825                         }
826                                                 
827                         Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld;
828                         {
829
830                                 float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal;
831                                 solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor();
832                         }
833
834                         {
835                                 //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal
836                                 //because it gets multiplied iMJlB
837                                 float4 iMJlA = solverConstraint->m_contactNormal*rbA->m_invMass;
838                                 float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA);
839                                 float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal?
840                                 float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB);
841
842                                 float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal);
843                                 sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal);
844                                 sum += dot3F4(iMJlB,solverConstraint->m_contactNormal);
845                                 sum += dot3F4(iMJaB,solverConstraint->m_relpos2CrossNormal);
846                                 float fsum = fabs(sum);
847                                 if (fsum>FLT_EPSILON)
848                                 {
849                                         solverConstraint->m_jacDiagABInv = 1.f/sum;
850                                 } else
851                                 {
852                                         solverConstraint->m_jacDiagABInv = 0.f;
853                                 }
854                         }
855
856
857                         ///fix rhs
858                         ///todo: add force/torque accelerators
859                         {
860                                 float rel_vel;
861                                 float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel);
862                                 float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel);
863
864                                 rel_vel = vel1Dotn+vel2Dotn;
865
866                                 float restitution = 0.f;
867                                 float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2
868                                 float   velocityError = restitution - rel_vel * info2.m_damping;
869                                 float   penetrationImpulse = positionalError*solverConstraint->m_jacDiagABInv;
870                                 float   velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv;
871                                 solverConstraint->m_rhs = penetrationImpulse+velocityImpulse;
872                                 solverConstraint->m_appliedImpulse = 0.f;
873
874                         }
875                 }
876         }
877 }