[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / RigidBody / kernels / solverUtils.cl
1 /*
2 Copyright (c) 2013 Advanced Micro Devices, Inc.  
3
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose, 
7 including commercial applications, and to alter it and redistribute it freely, 
8 subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 //Originally written by Erwin Coumans
15
16 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
17
18 #pragma OPENCL EXTENSION cl_amd_printf : enable
19 #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
20 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
21 #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
22 #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
23
24
25 #ifdef cl_ext_atomic_counters_32
26 #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
27 #else
28 #define counter32_t volatile global int*
29 #endif
30
31 typedef unsigned int u32;
32 typedef unsigned short u16;
33 typedef unsigned char u8;
34
35 #define GET_GROUP_IDX get_group_id(0)
36 #define GET_LOCAL_IDX get_local_id(0)
37 #define GET_GLOBAL_IDX get_global_id(0)
38 #define GET_GROUP_SIZE get_local_size(0)
39 #define GET_NUM_GROUPS get_num_groups(0)
40 #define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
41 #define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
42 #define AtomInc(x) atom_inc(&(x))
43 #define AtomInc1(x, out) out = atom_inc(&(x))
44 #define AppendInc(x, out) out = atomic_inc(x)
45 #define AtomAdd(x, value) atom_add(&(x), value)
46 #define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
47 #define AtomXhg(x, value) atom_xchg ( &(x), value )
48
49
50 #define SELECT_UINT4( b, a, condition ) select( b,a,condition )
51
52 #define make_float4 (float4)
53 #define make_float2 (float2)
54 #define make_uint4 (uint4)
55 #define make_int4 (int4)
56 #define make_uint2 (uint2)
57 #define make_int2 (int2)
58
59
60 #define max2 max
61 #define min2 min
62
63
64 ///////////////////////////////////////
65 //      Vector
66 ///////////////////////////////////////
67 __inline
68 float fastDiv(float numerator, float denominator)
69 {
70         return native_divide(numerator, denominator);   
71 //      return numerator/denominator;   
72 }
73
74 __inline
75 float4 fastDiv4(float4 numerator, float4 denominator)
76 {
77         return native_divide(numerator, denominator);   
78 }
79
80 __inline
81 float fastSqrtf(float f2)
82 {
83         return native_sqrt(f2);
84 //      return sqrt(f2);
85 }
86
87 __inline
88 float fastRSqrt(float f2)
89 {
90         return native_rsqrt(f2);
91 }
92
93 __inline
94 float fastLength4(float4 v)
95 {
96         return fast_length(v);
97 }
98
99 __inline
100 float4 fastNormalize4(float4 v)
101 {
102         return fast_normalize(v);
103 }
104
105
106 __inline
107 float sqrtf(float a)
108 {
109 //      return sqrt(a);
110         return native_sqrt(a);
111 }
112
113 __inline
114 float4 cross3(float4 a1, float4 b1)
115 {
116
117         float4  a=make_float4(a1.xyz,0.f);
118         float4  b=make_float4(b1.xyz,0.f);
119         //float4        a=a1;
120         //float4        b=b1;
121         return cross(a,b);
122 }
123
124 __inline
125 float dot3F4(float4 a, float4 b)
126 {
127         float4 a1 = make_float4(a.xyz,0.f);
128         float4 b1 = make_float4(b.xyz,0.f);
129         return dot(a1, b1);
130 }
131
132 __inline
133 float length3(const float4 a)
134 {
135         return sqrtf(dot3F4(a,a));
136 }
137
138 __inline
139 float dot4(const float4 a, const float4 b)
140 {
141         return dot( a, b );
142 }
143
144 //      for height
145 __inline
146 float dot3w1(const float4 point, const float4 eqn)
147 {
148         return dot3F4(point,eqn) + eqn.w;
149 }
150
151 __inline
152 float4 normalize3(const float4 a)
153 {
154         float4 n = make_float4(a.x, a.y, a.z, 0.f);
155         return fastNormalize4( n );
156 //      float length = sqrtf(dot3F4(a, a));
157 //      return 1.f/length * a;
158 }
159
160 __inline
161 float4 normalize4(const float4 a)
162 {
163         float length = sqrtf(dot4(a, a));
164         return 1.f/length * a;
165 }
166
167 __inline
168 float4 createEquation(const float4 a, const float4 b, const float4 c)
169 {
170         float4 eqn;
171         float4 ab = b-a;
172         float4 ac = c-a;
173         eqn = normalize3( cross3(ab, ac) );
174         eqn.w = -dot3F4(eqn,a);
175         return eqn;
176 }
177
178 ///////////////////////////////////////
179 //      Matrix3x3
180 ///////////////////////////////////////
181
182 typedef struct
183 {
184         float4 m_row[3];
185 }Matrix3x3;
186
187 __inline
188 Matrix3x3 mtZero();
189
190 __inline
191 Matrix3x3 mtIdentity();
192
193 __inline
194 Matrix3x3 mtTranspose(Matrix3x3 m);
195
196 __inline
197 Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);
198
199 __inline
200 float4 mtMul1(Matrix3x3 a, float4 b);
201
202 __inline
203 float4 mtMul3(float4 a, Matrix3x3 b);
204
205 __inline
206 Matrix3x3 mtZero()
207 {
208         Matrix3x3 m;
209         m.m_row[0] = (float4)(0.f);
210         m.m_row[1] = (float4)(0.f);
211         m.m_row[2] = (float4)(0.f);
212         return m;
213 }
214
215 __inline
216 Matrix3x3 mtIdentity()
217 {
218         Matrix3x3 m;
219         m.m_row[0] = (float4)(1,0,0,0);
220         m.m_row[1] = (float4)(0,1,0,0);
221         m.m_row[2] = (float4)(0,0,1,0);
222         return m;
223 }
224
225 __inline
226 Matrix3x3 mtTranspose(Matrix3x3 m)
227 {
228         Matrix3x3 out;
229         out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
230         out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
231         out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
232         return out;
233 }
234
235 __inline
236 Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)
237 {
238         Matrix3x3 transB;
239         transB = mtTranspose( b );
240         Matrix3x3 ans;
241         //      why this doesn't run when 0ing in the for{}
242         a.m_row[0].w = 0.f;
243         a.m_row[1].w = 0.f;
244         a.m_row[2].w = 0.f;
245         for(int i=0; i<3; i++)
246         {
247 //      a.m_row[i].w = 0.f;
248                 ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);
249                 ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);
250                 ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);
251                 ans.m_row[i].w = 0.f;
252         }
253         return ans;
254 }
255
256 __inline
257 float4 mtMul1(Matrix3x3 a, float4 b)
258 {
259         float4 ans;
260         ans.x = dot3F4( a.m_row[0], b );
261         ans.y = dot3F4( a.m_row[1], b );
262         ans.z = dot3F4( a.m_row[2], b );
263         ans.w = 0.f;
264         return ans;
265 }
266
267 __inline
268 float4 mtMul3(float4 a, Matrix3x3 b)
269 {
270         float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
271         float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
272         float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
273
274         float4 ans;
275         ans.x = dot3F4( a, colx );
276         ans.y = dot3F4( a, coly );
277         ans.z = dot3F4( a, colz );
278         return ans;
279 }
280
281 ///////////////////////////////////////
282 //      Quaternion
283 ///////////////////////////////////////
284
285 typedef float4 Quaternion;
286
287 __inline
288 Quaternion qtMul(Quaternion a, Quaternion b);
289
290 __inline
291 Quaternion qtNormalize(Quaternion in);
292
293 __inline
294 float4 qtRotate(Quaternion q, float4 vec);
295
296 __inline
297 Quaternion qtInvert(Quaternion q);
298
299
300
301
302
303 __inline
304 Quaternion qtMul(Quaternion a, Quaternion b)
305 {
306         Quaternion ans;
307         ans = cross3( a, b );
308         ans += a.w*b+b.w*a;
309 //      ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
310         ans.w = a.w*b.w - dot3F4(a, b);
311         return ans;
312 }
313
314 __inline
315 Quaternion qtNormalize(Quaternion in)
316 {
317         return fastNormalize4(in);
318 //      in /= length( in );
319 //      return in;
320 }
321 __inline
322 float4 qtRotate(Quaternion q, float4 vec)
323 {
324         Quaternion qInv = qtInvert( q );
325         float4 vcpy = vec;
326         vcpy.w = 0.f;
327         float4 out = qtMul(qtMul(q,vcpy),qInv);
328         return out;
329 }
330
331 __inline
332 Quaternion qtInvert(Quaternion q)
333 {
334         return (Quaternion)(-q.xyz, q.w);
335 }
336
337 __inline
338 float4 qtInvRotate(const Quaternion q, float4 vec)
339 {
340         return qtRotate( qtInvert( q ), vec );
341 }
342
343
344
345
346 #define WG_SIZE 64
347
348 typedef struct
349 {
350         float4 m_pos;
351         Quaternion m_quat;
352         float4 m_linVel;
353         float4 m_angVel;
354
355         u32 m_shapeIdx;
356         float m_invMass;
357         float m_restituitionCoeff;
358         float m_frictionCoeff;
359 } Body;
360
361
362
363 typedef struct
364 {
365         Matrix3x3 m_invInertia;
366         Matrix3x3 m_initInvInertia;
367 } Shape;
368
369 typedef struct
370 {
371         float4 m_linear;
372         float4 m_worldPos[4];
373         float4 m_center;        
374         float m_jacCoeffInv[4];
375         float m_b[4];
376         float m_appliedRambdaDt[4];
377
378         float m_fJacCoeffInv[2];        
379         float m_fAppliedRambdaDt[2];    
380
381         u32 m_bodyA;
382         u32 m_bodyB;
383         int m_batchIdx;
384         u32 m_paddings;
385 } Constraint4;
386
387
388
389
390
391
392 __kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)
393 {
394         int i = GET_GLOBAL_IDX;
395         
396         if( i < numContactManifolds)
397         {
398                 int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;
399                 bool isFixedA = (pa <0) || (pa == fixedBodyIndex);
400                 int bodyIndexA = abs(pa);
401                 if (!isFixedA)
402                 {
403                          AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x);
404                 }
405                 barrier(CLK_GLOBAL_MEM_FENCE);
406                 int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;
407                 bool isFixedB = (pb <0) || (pb == fixedBodyIndex);
408                 int bodyIndexB = abs(pb);
409                 if (!isFixedB)
410                 {
411                         AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y);
412                 } 
413         }
414 }
415
416 __kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies)
417 {
418         int i = GET_GLOBAL_IDX;
419         
420         if( i < numSplitBodies)
421         {
422                 linearVelocities[i] = make_float4(0);
423                 angularVelocities[i] = make_float4(0);
424         }
425 }
426
427
428 __kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,
429 __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)
430 {
431         int i = GET_GLOBAL_IDX;
432         if (i<numBodies)
433         {
434                 if (gBodies[i].m_invMass)
435                 {
436                         int bodyOffset = offsetSplitBodies[i];
437                         int count = bodyCount[i];
438                         float factor = 1.f/((float)count);
439                         float4 averageLinVel = make_float4(0.f);
440                         float4 averageAngVel = make_float4(0.f);
441                         
442                         for (int j=0;j<count;j++)
443                         {
444                                 averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
445                                 averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
446                         }
447                         
448                         for (int j=0;j<count;j++)
449                         {
450                                 deltaLinearVelocities[bodyOffset+j] = averageLinVel;
451                                 deltaAngularVelocities[bodyOffset+j] = averageAngVel;
452                         }
453                         
454                 }//bodies[i].m_invMass
455         }//i<numBodies
456 }
457
458
459
460 void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)
461 {
462         *linear = make_float4(n.xyz,0.f);
463         *angular0 = cross3(r0, n);
464         *angular1 = -cross3(r1, n);
465 }
466
467
468 float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )
469 {
470         return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);
471 }
472
473
474 float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,
475                                         float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB)
476 {
477         //      linear0,1 are normlized
478         float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;
479         float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);
480         float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
481         float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);
482         return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);
483 }
484
485
486 void btPlaneSpace1 (float4 n, float4* p, float4* q);
487  void btPlaneSpace1 (float4 n, float4* p, float4* q)
488 {
489   if (fabs(n.z) > 0.70710678f) {
490     // choose p in y-z plane
491     float a = n.y*n.y + n.z*n.z;
492     float k = 1.f/sqrt(a);
493     p[0].x = 0;
494         p[0].y = -n.z*k;
495         p[0].z = n.y*k;
496     // set q = n x p
497     q[0].x = a*k;
498         q[0].y = -n.x*p[0].z;
499         q[0].z = n.x*p[0].y;
500   }
501   else {
502     // choose p in x-y plane
503     float a = n.x*n.x + n.y*n.y;
504     float k = 1.f/sqrt(a);
505     p[0].x = -n.y*k;
506         p[0].y = n.x*k;
507         p[0].z = 0;
508     // set q = n x p
509     q[0].x = -n.z*p[0].y;
510         q[0].y = n.z*p[0].x;
511         q[0].z = a*k;
512   }
513 }
514
515
516
517
518
519 void solveContact(__global Constraint4* cs,
520                         float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,
521                         float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,
522                         float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB)
523 {
524         float minRambdaDt = 0;
525         float maxRambdaDt = FLT_MAX;
526
527         for(int ic=0; ic<4; ic++)
528         {
529                 if( cs->m_jacCoeffInv[ic] == 0.f ) continue;
530
531                 float4 angular0, angular1, linear;
532                 float4 r0 = cs->m_worldPos[ic] - posA;
533                 float4 r1 = cs->m_worldPos[ic] - posB;
534                 setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 );
535         
536
537
538                 float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, 
539                         *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];
540                 rambdaDt *= cs->m_jacCoeffInv[ic];
541
542                 
543                 {
544                         float prevSum = cs->m_appliedRambdaDt[ic];
545                         float updated = prevSum;
546                         updated += rambdaDt;
547                         updated = max2( updated, minRambdaDt );
548                         updated = min2( updated, maxRambdaDt );
549                         rambdaDt = updated - prevSum;
550                         cs->m_appliedRambdaDt[ic] = updated;
551                 }
552
553                         
554                 float4 linImp0 = invMassA*linear*rambdaDt;
555                 float4 linImp1 = invMassB*(-linear)*rambdaDt;
556                 float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
557                 float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
558
559                 
560                 if (invMassA)
561                 {
562                         *dLinVelA += linImp0;
563                         *dAngVelA += angImp0;
564                 }
565                 if (invMassB)
566                 {
567                         *dLinVelB += linImp1;
568                         *dAngVelB += angImp1;
569                 }
570         }
571 }
572
573
574 //      solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);
575
576
577 void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, 
578 __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,
579 __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)
580 {
581
582         //float frictionCoeff = ldsCs[0].m_linear.w;
583         int aIdx = ldsCs[0].m_bodyA;
584         int bIdx = ldsCs[0].m_bodyB;
585
586         float4 posA = gBodies[aIdx].m_pos;
587         float4 linVelA = gBodies[aIdx].m_linVel;
588         float4 angVelA = gBodies[aIdx].m_angVel;
589         float invMassA = gBodies[aIdx].m_invMass;
590         Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;
591
592         float4 posB = gBodies[bIdx].m_pos;
593         float4 linVelB = gBodies[bIdx].m_linVel;
594         float4 angVelB = gBodies[bIdx].m_angVel;
595         float invMassB = gBodies[bIdx].m_invMass;
596         Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;
597
598                         
599         float4 dLinVelA = make_float4(0,0,0,0);
600         float4 dAngVelA = make_float4(0,0,0,0);
601         float4 dLinVelB = make_float4(0,0,0,0);
602         float4 dAngVelB = make_float4(0,0,0,0);
603                         
604         int bodyOffsetA = offsetSplitBodies[aIdx];
605         int constraintOffsetA = contactConstraintOffsets[0].x;
606         int splitIndexA = bodyOffsetA+constraintOffsetA;
607         
608         if (invMassA)
609         {
610                 dLinVelA = deltaLinearVelocities[splitIndexA];
611                 dAngVelA = deltaAngularVelocities[splitIndexA];
612         }
613
614         int bodyOffsetB = offsetSplitBodies[bIdx];
615         int constraintOffsetB = contactConstraintOffsets[0].y;
616         int splitIndexB= bodyOffsetB+constraintOffsetB;
617
618         if (invMassB)
619         {
620                 dLinVelB = deltaLinearVelocities[splitIndexB];
621                 dAngVelB = deltaAngularVelocities[splitIndexB];
622         }
623
624         solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,
625                         posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB);
626
627         if (invMassA)
628         {
629                 deltaLinearVelocities[splitIndexA] = dLinVelA;
630                 deltaAngularVelocities[splitIndexA] = dAngVelA;
631         } 
632         if (invMassB)
633         {
634                 deltaLinearVelocities[splitIndexB] = dLinVelB;
635                 deltaAngularVelocities[splitIndexB] = dAngVelB;
636         }
637
638 }
639
640
641 __kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,
642 __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,
643 float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds
644 )
645 {
646         int i = GET_GLOBAL_IDX;
647         if (i<numManifolds)
648         {
649                 solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);
650         }
651 }
652
653
654
655
656 void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,
657                                                         __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,
658                                                         __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)
659 {
660         float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w;
661         int aIdx = ldsCs[0].m_bodyA;
662         int bIdx = ldsCs[0].m_bodyB;
663
664
665         float4 posA = gBodies[aIdx].m_pos;
666         float4 linVelA = gBodies[aIdx].m_linVel;
667         float4 angVelA = gBodies[aIdx].m_angVel;
668         float invMassA = gBodies[aIdx].m_invMass;
669         Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;
670
671         float4 posB = gBodies[bIdx].m_pos;
672         float4 linVelB = gBodies[bIdx].m_linVel;
673         float4 angVelB = gBodies[bIdx].m_angVel;
674         float invMassB = gBodies[bIdx].m_invMass;
675         Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;
676         
677
678         float4 dLinVelA = make_float4(0,0,0,0);
679         float4 dAngVelA = make_float4(0,0,0,0);
680         float4 dLinVelB = make_float4(0,0,0,0);
681         float4 dAngVelB = make_float4(0,0,0,0);
682                         
683         int bodyOffsetA = offsetSplitBodies[aIdx];
684         int constraintOffsetA = contactConstraintOffsets[0].x;
685         int splitIndexA = bodyOffsetA+constraintOffsetA;
686         
687         if (invMassA)
688         {
689                 dLinVelA = deltaLinearVelocities[splitIndexA];
690                 dAngVelA = deltaAngularVelocities[splitIndexA];
691         }
692
693         int bodyOffsetB = offsetSplitBodies[bIdx];
694         int constraintOffsetB = contactConstraintOffsets[0].y;
695         int splitIndexB= bodyOffsetB+constraintOffsetB;
696
697         if (invMassB)
698         {
699                 dLinVelB = deltaLinearVelocities[splitIndexB];
700                 dAngVelB = deltaAngularVelocities[splitIndexB];
701         }
702
703
704
705
706         {
707                 float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
708                 float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
709
710                 float sum = 0;
711                 for(int j=0; j<4; j++)
712                 {
713                         sum +=ldsCs[0].m_appliedRambdaDt[j];
714                 }
715                 frictionCoeff = 0.7f;
716                 for(int j=0; j<4; j++)
717                 {
718                         maxRambdaDt[j] = frictionCoeff*sum;
719                         minRambdaDt[j] = -maxRambdaDt[j];
720                 }
721
722                 
723 //              solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,
724 //                      posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );
725                 
726                 
727                 {
728                         
729                         __global Constraint4* cs = ldsCs;
730                         
731                         if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;
732                         const float4 center = cs->m_center;
733                         
734                         float4 n = -cs->m_linear;
735                         
736                         float4 tangent[2];
737                         btPlaneSpace1(n,&tangent[0],&tangent[1]);
738                         float4 angular0, angular1, linear;
739                         float4 r0 = center - posA;
740                         float4 r1 = center - posB;
741                         for(int i=0; i<2; i++)
742                         {
743                                 setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );
744                                 float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
745                                                                                         linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB );
746                                 rambdaDt *= cs->m_fJacCoeffInv[i];
747                                 
748                                 {
749                                         float prevSum = cs->m_fAppliedRambdaDt[i];
750                                         float updated = prevSum;
751                                         updated += rambdaDt;
752                                         updated = max2( updated, minRambdaDt[i] );
753                                         updated = min2( updated, maxRambdaDt[i] );
754                                         rambdaDt = updated - prevSum;
755                                         cs->m_fAppliedRambdaDt[i] = updated;
756                                 }
757                                 
758                                 float4 linImp0 = invMassA*linear*rambdaDt;
759                                 float4 linImp1 = invMassB*(-linear)*rambdaDt;
760                                 float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
761                                 float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
762                                 
763                                 dLinVelA += linImp0;
764                                 dAngVelA += angImp0;
765                                 dLinVelB += linImp1;
766                                 dAngVelB += angImp1;
767                         }
768                         {       //      angular damping for point constraint
769                                 float4 ab = normalize3( posB - posA );
770                                 float4 ac = normalize3( center - posA );
771                                 if( dot3F4( ab, ac ) > 0.95f  || (invMassA == 0.f || invMassB == 0.f))
772                                 {
773                                         float angNA = dot3F4( n, angVelA );
774                                         float angNB = dot3F4( n, angVelB );
775                                         
776                                         dAngVelA -= (angNA*0.1f)*n;
777                                         dAngVelB -= (angNB*0.1f)*n;
778                                 }
779                         }
780                 }
781
782                 
783                 
784         }
785
786         if (invMassA)
787         {
788                 deltaLinearVelocities[splitIndexA] = dLinVelA;
789                 deltaAngularVelocities[splitIndexA] = dAngVelA;
790         } 
791         if (invMassB)
792         {
793                 deltaLinearVelocities[splitIndexB] = dLinVelB;
794                 deltaAngularVelocities[splitIndexB] = dAngVelB;
795         }
796  
797
798 }
799
800
801 __kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,
802                                                                                 __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,
803                                                                                 __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,
804                                                                                 float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds
805 )
806 {
807         int i = GET_GLOBAL_IDX;
808         if (i<numManifolds)
809         {
810                 solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);
811         }
812 }
813
814
815 __kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,
816                                                                         __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)
817 {
818         int i = GET_GLOBAL_IDX;
819         if (i<numBodies)
820         {
821                 if (gBodies[i].m_invMass)
822                 {
823                         int bodyOffset = offsetSplitBodies[i];
824                         int count = bodyCount[i];
825                         if (count)
826                         {
827                                 gBodies[i].m_linVel += deltaLinearVelocities[bodyOffset];
828                                 gBodies[i].m_angVel += deltaAngularVelocities[bodyOffset];
829                         }
830                 }
831         }
832 }
833
834
835
836 void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,
837         const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, 
838         __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,
839         Constraint4* dstC )
840 {
841         dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
842         dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
843
844         float dtInv = 1.f/dt;
845         for(int ic=0; ic<4; ic++)
846         {
847                 dstC->m_appliedRambdaDt[ic] = 0.f;
848         }
849         dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
850
851
852         dstC->m_linear = src->m_worldNormalOnB;
853         dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );
854         for(int ic=0; ic<4; ic++)
855         {
856                 float4 r0 = src->m_worldPosB[ic] - posA;
857                 float4 r1 = src->m_worldPosB[ic] - posB;
858
859                 if( ic >= src->m_worldNormalOnB.w )//npoints
860                 {
861                         dstC->m_jacCoeffInv[ic] = 0.f;
862                         continue;
863                 }
864
865                 float relVelN;
866                 {
867                         float4 linear, angular0, angular1;
868                         setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);
869
870                         dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
871                                 invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);
872
873                         relVelN = calcRelVel(linear, -linear, angular0, angular1,
874                                 linVelA, angVelA, linVelB, angVelB);
875
876                         float e = 0.f;//src->getRestituitionCoeff();
877                         if( relVelN*relVelN < 0.004f ) e = 0.f;
878
879                         dstC->m_b[ic] = e*relVelN;
880                         //float penetration = src->m_worldPosB[ic].w;
881                         dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;
882                         dstC->m_appliedRambdaDt[ic] = 0.f;
883                 }
884         }
885
886         if( src->m_worldNormalOnB.w > 0 )//npoints
887         {       //      prepare friction
888                 float4 center = make_float4(0.f);
889                 for(int i=0; i<src->m_worldNormalOnB.w; i++) 
890                         center += src->m_worldPosB[i];
891                 center /= (float)src->m_worldNormalOnB.w;
892
893                 float4 tangent[2];
894                 btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]);
895                 
896                 float4 r[2];
897                 r[0] = center - posA;
898                 r[1] = center - posB;
899
900                 for(int i=0; i<2; i++)
901                 {
902                         float4 linear, angular0, angular1;
903                         setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);
904
905                         dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
906                                 invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);
907                         dstC->m_fAppliedRambdaDt[i] = 0.f;
908                 }
909                 dstC->m_center = center;
910         }
911
912         for(int i=0; i<4; i++)
913         {
914                 if( i<src->m_worldNormalOnB.w )
915                 {
916                         dstC->m_worldPos[i] = src->m_worldPosB[i];
917                 }
918                 else
919                 {
920                         dstC->m_worldPos[i] = make_float4(0.f);
921                 }
922         }
923 }
924
925
926 __kernel
927 __attribute__((reqd_work_group_size(WG_SIZE,1,1)))
928 void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, 
929 __global const unsigned int* bodyCount,
930 int nContacts,
931 float dt,
932 float positionDrift,
933 float positionConstraintCoeff
934 )
935 {
936         int gIdx = GET_GLOBAL_IDX;
937         
938         if( gIdx < nContacts )
939         {
940                 int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);
941                 int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
942
943                 float4 posA = gBodies[aIdx].m_pos;
944                 float4 linVelA = gBodies[aIdx].m_linVel;
945                 float4 angVelA = gBodies[aIdx].m_angVel;
946                 float invMassA = gBodies[aIdx].m_invMass;
947                 Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;
948
949                 float4 posB = gBodies[bIdx].m_pos;
950                 float4 linVelB = gBodies[bIdx].m_linVel;
951                 float4 angVelB = gBodies[bIdx].m_angVel;
952                 float invMassB = gBodies[bIdx].m_invMass;
953                 Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;
954
955                 Constraint4 cs;
956
957                 float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1;
958                 float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1;
959
960         setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
961                         &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,
962                         &cs  );
963                 
964                 cs.m_batchIdx = gContact[gIdx].m_batchIdx;
965
966                 gConstraintOut[gIdx] = cs;
967         }
968 }