1 static const char* solverKernelsCL= \
\r
2 "#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
\r
3 "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
\r
4 "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
\r
5 "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
\r
6 "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
\r
9 "#ifdef cl_ext_atomic_counters_32\n"
\r
10 "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
\r
12 "#define counter32_t volatile global int*\n"
\r
15 "typedef unsigned int u32;\n"
\r
16 "typedef unsigned short u16;\n"
\r
17 "typedef unsigned char u8;\n"
\r
19 "#define GET_GROUP_IDX get_group_id(0)\n"
\r
20 "#define GET_LOCAL_IDX get_local_id(0)\n"
\r
21 "#define GET_GLOBAL_IDX get_global_id(0)\n"
\r
22 "#define GET_GROUP_SIZE get_local_size(0)\n"
\r
23 "#define GET_NUM_GROUPS get_num_groups(0)\n"
\r
24 "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
\r
25 "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
\r
26 "#define AtomInc(x) atom_inc(&(x))\n"
\r
27 "#define AtomInc1(x, out) out = atom_inc(&(x))\n"
\r
28 "#define AppendInc(x, out) out = atomic_inc(x)\n"
\r
29 "#define AtomAdd(x, value) atom_add(&(x), value)\n"
\r
30 "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
\r
31 "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
\r
34 "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
\r
36 "#define make_float4 (float4)\n"
\r
37 "#define make_float2 (float2)\n"
\r
38 "#define make_uint4 (uint4)\n"
\r
39 "#define make_int4 (int4)\n"
\r
40 "#define make_uint2 (uint2)\n"
\r
41 "#define make_int2 (int2)\n"
\r
44 "#define max2 max\n"
\r
45 "#define min2 min\n"
\r
48 "///////////////////////////////////////\n"
\r
50 "///////////////////////////////////////\n"
\r
52 "float fastDiv(float numerator, float denominator)\n"
\r
54 " return native_divide(numerator, denominator); \n"
\r
55 "// return numerator/denominator; \n"
\r
59 "float4 fastDiv4(float4 numerator, float4 denominator)\n"
\r
61 " return native_divide(numerator, denominator); \n"
\r
65 "float fastSqrtf(float f2)\n"
\r
67 " return native_sqrt(f2);\n"
\r
68 "// return sqrt(f2);\n"
\r
72 "float fastRSqrt(float f2)\n"
\r
74 " return native_rsqrt(f2);\n"
\r
78 "float fastLength4(float4 v)\n"
\r
80 " return fast_length(v);\n"
\r
84 "float4 fastNormalize4(float4 v)\n"
\r
86 " return fast_normalize(v);\n"
\r
91 "float sqrtf(float a)\n"
\r
93 "// return sqrt(a);\n"
\r
94 " return native_sqrt(a);\n"
\r
98 "float4 cross3(float4 a, float4 b)\n"
\r
100 " return cross(a,b);\n"
\r
104 "float dot3F4(float4 a, float4 b)\n"
\r
106 " float4 a1 = make_float4(a.xyz,0.f);\n"
\r
107 " float4 b1 = make_float4(b.xyz,0.f);\n"
\r
108 " return dot(a1, b1);\n"
\r
112 "float length3(const float4 a)\n"
\r
114 " return sqrtf(dot3F4(a,a));\n"
\r
118 "float dot4(const float4 a, const float4 b)\n"
\r
120 " return dot( a, b );\n"
\r
125 "float dot3w1(const float4 point, const float4 eqn)\n"
\r
127 " return dot3F4(point,eqn) + eqn.w;\n"
\r
131 "float4 normalize3(const float4 a)\n"
\r
133 " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n"
\r
134 " return fastNormalize4( n );\n"
\r
135 "// float length = sqrtf(dot3F4(a, a));\n"
\r
136 "// return 1.f/length * a;\n"
\r
140 "float4 normalize4(const float4 a)\n"
\r
142 " float length = sqrtf(dot4(a, a));\n"
\r
143 " return 1.f/length * a;\n"
\r
147 "float4 createEquation(const float4 a, const float4 b, const float4 c)\n"
\r
150 " float4 ab = b-a;\n"
\r
151 " float4 ac = c-a;\n"
\r
152 " eqn = normalize3( cross3(ab, ac) );\n"
\r
153 " eqn.w = -dot3F4(eqn,a);\n"
\r
157 "///////////////////////////////////////\n"
\r
159 "///////////////////////////////////////\n"
\r
163 " float4 m_row[3];\n"
\r
167 "Matrix3x3 mtZero();\n"
\r
170 "Matrix3x3 mtIdentity();\n"
\r
173 "Matrix3x3 mtTranspose(Matrix3x3 m);\n"
\r
176 "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n"
\r
179 "float4 mtMul1(Matrix3x3 a, float4 b);\n"
\r
182 "float4 mtMul3(float4 a, Matrix3x3 b);\n"
\r
185 "Matrix3x3 mtZero()\n"
\r
188 " m.m_row[0] = (float4)(0.f);\n"
\r
189 " m.m_row[1] = (float4)(0.f);\n"
\r
190 " m.m_row[2] = (float4)(0.f);\n"
\r
195 "Matrix3x3 mtIdentity()\n"
\r
198 " m.m_row[0] = (float4)(1,0,0,0);\n"
\r
199 " m.m_row[1] = (float4)(0,1,0,0);\n"
\r
200 " m.m_row[2] = (float4)(0,0,1,0);\n"
\r
205 "Matrix3x3 mtTranspose(Matrix3x3 m)\n"
\r
207 " Matrix3x3 out;\n"
\r
208 " out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
\r
209 " out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
\r
210 " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
\r
215 "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n"
\r
217 " Matrix3x3 transB;\n"
\r
218 " transB = mtTranspose( b );\n"
\r
219 " Matrix3x3 ans;\n"
\r
220 " // why this doesn't run when 0ing in the for{}\n"
\r
221 " a.m_row[0].w = 0.f;\n"
\r
222 " a.m_row[1].w = 0.f;\n"
\r
223 " a.m_row[2].w = 0.f;\n"
\r
224 " for(int i=0; i<3; i++)\n"
\r
226 "// a.m_row[i].w = 0.f;\n"
\r
227 " ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n"
\r
228 " ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n"
\r
229 " ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n"
\r
230 " ans.m_row[i].w = 0.f;\n"
\r
236 "float4 mtMul1(Matrix3x3 a, float4 b)\n"
\r
239 " ans.x = dot3F4( a.m_row[0], b );\n"
\r
240 " ans.y = dot3F4( a.m_row[1], b );\n"
\r
241 " ans.z = dot3F4( a.m_row[2], b );\n"
\r
247 "float4 mtMul3(float4 a, Matrix3x3 b)\n"
\r
249 " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
\r
250 " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
\r
251 " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
\r
254 " ans.x = dot3F4( a, colx );\n"
\r
255 " ans.y = dot3F4( a, coly );\n"
\r
256 " ans.z = dot3F4( a, colz );\n"
\r
260 "///////////////////////////////////////\n"
\r
262 "///////////////////////////////////////\n"
\r
264 "typedef float4 Quaternion;\n"
\r
267 "Quaternion qtMul(Quaternion a, Quaternion b);\n"
\r
270 "Quaternion qtNormalize(Quaternion in);\n"
\r
273 "float4 qtRotate(Quaternion q, float4 vec);\n"
\r
276 "Quaternion qtInvert(Quaternion q);\n"
\r
279 "Matrix3x3 qtGetRotationMatrix(Quaternion q);\n"
\r
284 "Quaternion qtMul(Quaternion a, Quaternion b)\n"
\r
286 " Quaternion ans;\n"
\r
287 " ans = cross3( a, b );\n"
\r
288 " ans += a.w*b+b.w*a;\n"
\r
289 "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
\r
290 " ans.w = a.w*b.w - dot3F4(a, b);\n"
\r
295 "Quaternion qtNormalize(Quaternion in)\n"
\r
297 " return fastNormalize4(in);\n"
\r
298 "// in /= length( in );\n"
\r
302 "float4 qtRotate(Quaternion q, float4 vec)\n"
\r
304 " Quaternion qInv = qtInvert( q );\n"
\r
305 " float4 vcpy = vec;\n"
\r
307 " float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
\r
312 "Quaternion qtInvert(Quaternion q)\n"
\r
314 " return (Quaternion)(-q.xyz, q.w);\n"
\r
318 "float4 qtInvRotate(const Quaternion q, float4 vec)\n"
\r
320 " return qtRotate( qtInvert( q ), vec );\n"
\r
324 "Matrix3x3 qtGetRotationMatrix(Quaternion quat)\n"
\r
326 " float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
\r
327 " Matrix3x3 out;\n"
\r
329 " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
\r
330 " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
\r
331 " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"
\r
332 " out.m_row[0].w = 0.f;\n"
\r
334 " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"
\r
335 " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"
\r
336 " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"
\r
337 " out.m_row[1].w = 0.f;\n"
\r
339 " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"
\r
340 " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"
\r
341 " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"
\r
342 " out.m_row[2].w = 0.f;\n"
\r
350 "#define WG_SIZE 64\n"
\r
355 " Quaternion m_quat;\n"
\r
356 " float4 m_linVel;\n"
\r
357 " float4 m_angVel;\n"
\r
359 " u32 m_shapeIdx;\n"
\r
360 " u32 m_shapeType;\n"
\r
361 " float m_invMass;\n"
\r
362 " float m_restituitionCoeff;\n"
\r
363 " float m_frictionCoeff;\n"
\r
368 " Matrix3x3 m_invInertia;\n"
\r
369 " Matrix3x3 m_initInvInertia;\n"
\r
374 " float4 m_linear;\n"
\r
375 " float4 m_worldPos[4];\n"
\r
376 " float4 m_center; \n"
\r
377 " float m_jacCoeffInv[4];\n"
\r
379 " float m_appliedRambdaDt[4];\n"
\r
381 " float m_fJacCoeffInv[2]; \n"
\r
382 " float m_fAppliedRambdaDt[2]; \n"
\r
387 " int m_batchIdx;\n"
\r
388 " u32 m_paddings[1];\n"
\r
393 " float4 m_worldPos[4];\n"
\r
394 " float4 m_worldNormal;\n"
\r
396 " int m_batchIdx;\n"
\r
398 " u32 m_bodyAPtr;\n"
\r
399 " u32 m_bodyBPtr;\n"
\r
404 " int m_nConstraints;\n"
\r
406 " int m_batchIdx;\n"
\r
408 "// int m_paddings[1];\n"
\r
413 " int m_solveFriction;\n"
\r
414 " int m_maxBatch; // long batch really kills the performance\n"
\r
415 " int m_batchIdx;\n"
\r
417 "// int m_paddings[1];\n"
\r
418 "} ConstBufferBatchSolve;\n"
\r
421 "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n"
\r
424 " *angular0 = -cross3(r0, n);\n"
\r
425 " *angular1 = cross3(r1, n);\n"
\r
429 "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n"
\r
431 " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n"
\r
435 "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"
\r
436 " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n"
\r
438 " // linear0,1 are normlized\n"
\r
439 " float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n"
\r
440 " float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n"
\r
441 " float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n"
\r
442 " float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n"
\r
443 " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n"
\r
448 "void solveContact(__global Constraint4* cs,\n"
\r
449 " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n"
\r
450 " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)\n"
\r
452 " float minRambdaDt = 0;\n"
\r
453 " float maxRambdaDt = FLT_MAX;\n"
\r
455 " for(int ic=0; ic<4; ic++)\n"
\r
457 " if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n"
\r
459 " float4 angular0, angular1, linear;\n"
\r
460 " float4 r0 = cs->m_worldPos[ic] - posA;\n"
\r
461 " float4 r1 = cs->m_worldPos[ic] - posB;\n"
\r
462 " setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n"
\r
464 " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n"
\r
465 " *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n"
\r
466 " rambdaDt *= cs->m_jacCoeffInv[ic];\n"
\r
469 " float prevSum = cs->m_appliedRambdaDt[ic];\n"
\r
470 " float updated = prevSum;\n"
\r
471 " updated += rambdaDt;\n"
\r
472 " updated = max2( updated, minRambdaDt );\n"
\r
473 " updated = min2( updated, maxRambdaDt );\n"
\r
474 " rambdaDt = updated - prevSum;\n"
\r
475 " cs->m_appliedRambdaDt[ic] = updated;\n"
\r
478 " float4 linImp0 = invMassA*linear*rambdaDt;\n"
\r
479 " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n"
\r
480 " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n"
\r
481 " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n"
\r
483 " *linVelA += linImp0;\n"
\r
484 " *angVelA += angImp0;\n"
\r
485 " *linVelB += linImp1;\n"
\r
486 " *angVelB += angImp1;\n"
\r
491 "void solveFriction(__global Constraint4* cs,\n"
\r
492 " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n"
\r
493 " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,\n"
\r
494 " float maxRambdaDt[4], float minRambdaDt[4])\n"
\r
496 " if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n"
\r
497 " const float4 center = cs->m_center;\n"
\r
499 " float4 n = -cs->m_linear;\n"
\r
501 " float4 tangent[2];\n"
\r
502 " tangent[0] = cross3( n, cs->m_worldPos[0]-center );\n"
\r
503 " tangent[1] = cross3( tangent[0], n );\n"
\r
504 " tangent[0] = normalize3( tangent[0] );\n"
\r
505 " tangent[1] = normalize3( tangent[1] );\n"
\r
507 " float4 angular0, angular1, linear;\n"
\r
508 " float4 r0 = center - posA;\n"
\r
509 " float4 r1 = center - posB;\n"
\r
510 " for(int i=0; i<2; i++)\n"
\r
512 " setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n"
\r
513 " float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n"
\r
514 " *linVelA, *angVelA, *linVelB, *angVelB );\n"
\r
515 " rambdaDt *= cs->m_fJacCoeffInv[i];\n"
\r
518 " float prevSum = cs->m_fAppliedRambdaDt[i];\n"
\r
519 " float updated = prevSum;\n"
\r
520 " updated += rambdaDt;\n"
\r
521 " updated = max2( updated, minRambdaDt[i] );\n"
\r
522 " updated = min2( updated, maxRambdaDt[i] );\n"
\r
523 " rambdaDt = updated - prevSum;\n"
\r
524 " cs->m_fAppliedRambdaDt[i] = updated;\n"
\r
527 " float4 linImp0 = invMassA*linear*rambdaDt;\n"
\r
528 " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n"
\r
529 " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n"
\r
530 " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n"
\r
532 " *linVelA += linImp0;\n"
\r
533 " *angVelA += angImp0;\n"
\r
534 " *linVelB += linImp1;\n"
\r
535 " *angVelB += angImp1;\n"
\r
537 " { // angular damping for point constraint\n"
\r
538 " float4 ab = normalize3( posB - posA );\n"
\r
539 " float4 ac = normalize3( center - posA );\n"
\r
540 " if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n"
\r
542 " float angNA = dot3F4( n, *angVelA );\n"
\r
543 " float angNB = dot3F4( n, *angVelB );\n"
\r
545 " *angVelA -= (angNA*0.1f)*n;\n"
\r
546 " *angVelB -= (angNB*0.1f)*n;\n"
\r
551 "void solveAConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n"
\r
553 " float frictionCoeff = ldsCs[0].m_linear.w;\n"
\r
554 " int aIdx = ldsCs[0].m_bodyA;\n"
\r
555 " int bIdx = ldsCs[0].m_bodyB;\n"
\r
557 " float4 posA = gBodies[aIdx].m_pos;\n"
\r
558 " float4 linVelA = gBodies[aIdx].m_linVel;\n"
\r
559 " float4 angVelA = gBodies[aIdx].m_angVel;\n"
\r
560 " float invMassA = gBodies[aIdx].m_invMass;\n"
\r
561 " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
\r
563 " float4 posB = gBodies[bIdx].m_pos;\n"
\r
564 " float4 linVelB = gBodies[bIdx].m_linVel;\n"
\r
565 " float4 angVelB = gBodies[bIdx].m_angVel;\n"
\r
566 " float invMassB = gBodies[bIdx].m_invMass;\n"
\r
567 " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
\r
571 " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"
\r
572 " posB, &linVelB, &angVelB, invMassB, invInertiaB );\n"
\r
576 " float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n"
\r
577 " float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n"
\r
579 " float sum = 0;\n"
\r
580 " for(int j=0; j<4; j++)\n"
\r
582 " sum +=ldsCs[0].m_appliedRambdaDt[j];\n"
\r
584 " frictionCoeff = 0.7f;\n"
\r
585 " for(int j=0; j<4; j++)\n"
\r
587 " maxRambdaDt[j] = frictionCoeff*sum;\n"
\r
588 " minRambdaDt[j] = -maxRambdaDt[j];\n"
\r
591 " solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"
\r
592 " posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n"
\r
595 " gBodies[aIdx].m_linVel = linVelA;\n"
\r
596 " gBodies[aIdx].m_angVel = angVelA;\n"
\r
597 " gBodies[bIdx].m_linVel = linVelB;\n"
\r
598 " gBodies[bIdx].m_angVel = angVelB;\n"
\r
601 "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n"
\r
603 " float frictionCoeff = ldsCs[0].m_linear.w;\n"
\r
604 " int aIdx = ldsCs[0].m_bodyA;\n"
\r
605 " int bIdx = ldsCs[0].m_bodyB;\n"
\r
607 " float4 posA = gBodies[aIdx].m_pos;\n"
\r
608 " float4 linVelA = gBodies[aIdx].m_linVel;\n"
\r
609 " float4 angVelA = gBodies[aIdx].m_angVel;\n"
\r
610 " float invMassA = gBodies[aIdx].m_invMass;\n"
\r
611 " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
\r
613 " float4 posB = gBodies[bIdx].m_pos;\n"
\r
614 " float4 linVelB = gBodies[bIdx].m_linVel;\n"
\r
615 " float4 angVelB = gBodies[bIdx].m_angVel;\n"
\r
616 " float invMassB = gBodies[bIdx].m_invMass;\n"
\r
617 " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
\r
619 " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"
\r
620 " posB, &linVelB, &angVelB, invMassB, invInertiaB );\n"
\r
622 " gBodies[aIdx].m_linVel = linVelA;\n"
\r
623 " gBodies[aIdx].m_angVel = angVelA;\n"
\r
624 " gBodies[bIdx].m_linVel = linVelB;\n"
\r
625 " gBodies[bIdx].m_angVel = angVelB;\n"
\r
628 "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n"
\r
630 " float frictionCoeff = ldsCs[0].m_linear.w;\n"
\r
631 " int aIdx = ldsCs[0].m_bodyA;\n"
\r
632 " int bIdx = ldsCs[0].m_bodyB;\n"
\r
634 " float4 posA = gBodies[aIdx].m_pos;\n"
\r
635 " float4 linVelA = gBodies[aIdx].m_linVel;\n"
\r
636 " float4 angVelA = gBodies[aIdx].m_angVel;\n"
\r
637 " float invMassA = gBodies[aIdx].m_invMass;\n"
\r
638 " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
\r
640 " float4 posB = gBodies[bIdx].m_pos;\n"
\r
641 " float4 linVelB = gBodies[bIdx].m_linVel;\n"
\r
642 " float4 angVelB = gBodies[bIdx].m_angVel;\n"
\r
643 " float invMassB = gBodies[bIdx].m_invMass;\n"
\r
644 " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
\r
647 " float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n"
\r
648 " float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n"
\r
650 " float sum = 0;\n"
\r
651 " for(int j=0; j<4; j++)\n"
\r
653 " sum +=ldsCs[0].m_appliedRambdaDt[j];\n"
\r
655 " frictionCoeff = 0.7f;\n"
\r
656 " for(int j=0; j<4; j++)\n"
\r
658 " maxRambdaDt[j] = frictionCoeff*sum;\n"
\r
659 " minRambdaDt[j] = -maxRambdaDt[j];\n"
\r
662 " solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"
\r
663 " posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n"
\r
666 " gBodies[aIdx].m_linVel = linVelA;\n"
\r
667 " gBodies[aIdx].m_angVel = angVelA;\n"
\r
668 " gBodies[bIdx].m_linVel = linVelB;\n"
\r
669 " gBodies[bIdx].m_angVel = angVelB;\n"
\r
672 "typedef struct \n"
\r
674 " int m_valInt0;\n"
\r
675 " int m_valInt1;\n"
\r
676 " int m_valInt2;\n"
\r
677 " int m_valInt3;\n"
\r
683 "} SolverDebugInfo;\n"
\r
687 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
\r
688 "//void BatchSolveKernel(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraints, __global int* gN, __global int* gOffsets, __global SolverDebugInfo* debugInfo, ConstBufferBatchSolve cb)\n"
\r
689 "void BatchSolveKernel(__global Body* gBodies, \n"
\r
690 "__global Shape* gShapes, \n"
\r
691 "__global Constraint4* gConstraints, \n"
\r
692 "__global int* gN, \n"
\r
693 "__global int* gOffsets, \n"
\r
694 "ConstBufferBatchSolve cb)\n"
\r
696 " __local int ldsBatchIdx[WG_SIZE+1];\n"
\r
698 " __local int ldsCurBatch;\n"
\r
699 " __local int ldsNextBatch;\n"
\r
700 " __local int ldsStart;\n"
\r
702 " int lIdx = GET_LOCAL_IDX;\n"
\r
703 " int wgIdx = GET_GROUP_IDX;\n"
\r
705 " int gIdx = GET_GLOBAL_IDX;\n"
\r
706 "// debugInfo[gIdx].m_valInt0 = gIdx;\n"
\r
707 " //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n"
\r
709 " const int solveFriction = cb.m_solveFriction;\n"
\r
710 " const int maxBatch = cb.m_maxBatch;\n"
\r
711 " const int bIdx = cb.m_batchIdx;\n"
\r
712 " const int nSplit = cb.m_nSplit;\n"
\r
714 " int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n"
\r
715 " int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n"
\r
716 " int cellIdx = xIdx+yIdx*nSplit;\n"
\r
718 " if( gN[cellIdx] == 0 ) \n"
\r
721 " const int start = gOffsets[cellIdx];\n"
\r
722 " const int end = start + gN[cellIdx];\n"
\r
725 " if( lIdx == 0 )\n"
\r
727 " ldsCurBatch = 0;\n"
\r
728 " ldsNextBatch = 0;\n"
\r
729 " ldsStart = start;\n"
\r
733 " GROUP_LDS_BARRIER;\n"
\r
735 " int idx=ldsStart+lIdx;\n"
\r
736 " while (ldsCurBatch < maxBatch)\n"
\r
738 " for(; idx<end; )\n"
\r
740 " if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n"
\r
742 " if( solveFriction )\n"
\r
743 " solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
\r
745 " solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
\r
752 " GROUP_LDS_BARRIER;\n"
\r
753 " if( lIdx == 0 )\n"
\r
755 " ldsCurBatch++;\n"
\r
757 " GROUP_LDS_BARRIER;\n"
\r
763 " int counter0 = 0;\n"
\r
764 " int counter1 = 0;\n"
\r
769 " int curStart = ldsStart;\n"
\r
770 " int curBatch = ldsCurBatch;\n"
\r
772 " GROUP_LDS_BARRIER;\n"
\r
774 " while( curBatch == ldsNextBatch && curStart < end )\n"
\r
777 " int idx = curStart + lIdx;\n"
\r
778 " if( lIdx == 0 ) \n"
\r
779 " ldsBatchIdx[0] = curBatch;\n"
\r
780 " GROUP_LDS_BARRIER;\n"
\r
781 " ldsBatchIdx[(lIdx == 0)? lIdx:lIdx+1] = curBatch;\n"
\r
782 " GROUP_LDS_BARRIER;\n"
\r
783 "// debugInfo[gIdx].m_valInt2 = gConstraints[idx].m_batchIdx;\n"
\r
784 "// debugInfo[gIdx].m_valInt3 = idx;\n"
\r
787 " ldsBatchIdx[lIdx+1] = (idx<end)? gConstraints[idx].m_batchIdx : -1;\n"
\r
789 " GROUP_LDS_BARRIER;\n"
\r
792 " if( ldsBatchIdx[lIdx+1] == curBatch )\n"
\r
796 " if( solveFriction )\n"
\r
798 " solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
\r
802 " solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
\r
807 " if( ldsBatchIdx[lIdx] == curBatch )\n"
\r
809 " ldsStart = idx;\n"
\r
810 " ldsNextBatch = ldsBatchIdx[lIdx+1];\n"
\r
814 " GROUP_LDS_BARRIER;\n"
\r
816 " curStart += GET_GROUP_SIZE;\n"
\r
819 " if( lIdx == 0 )\n"
\r
821 " ldsCurBatch = ldsNextBatch;\n"
\r
824 " GROUP_LDS_BARRIER;\n"
\r
826 "// while( ldsCurBatch != -1 && iter <= 10 );\n"
\r
827 " while( ldsCurBatch != -1 && ldsCurBatch <= maxBatch );\n"
\r
835 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
\r
836 "void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __global int2* sortData, int4 cb )\n"
\r
838 " int nContacts = cb.x;\n"
\r
839 " int gIdx = GET_GLOBAL_IDX;\n"
\r
841 " if( gIdx < nContacts )\n"
\r
843 " int srcIdx = sortData[gIdx].y;\n"
\r
844 " out[gIdx] = in[srcIdx];\n"
\r
850 " int m_nContacts;\n"
\r
851 " int m_staticIdx;\n"
\r
852 " float m_scale;\n"
\r
854 "} ConstBufferSSD;\n"
\r
857 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
\r
858 "void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, ConstBufferSSD cb )\n"
\r
860 " int gIdx = GET_GLOBAL_IDX;\n"
\r
861 " int nContacts = cb.m_nContacts;\n"
\r
862 " int staticIdx = cb.m_staticIdx;\n"
\r
863 " float scale = cb.m_scale;\n"
\r
864 " int N_SPLIT = cb.m_nSplit;\n"
\r
866 " if( gIdx < nContacts )\n"
\r
868 " int aIdx = gContact[gIdx].m_bodyAPtr;\n"
\r
869 " int bIdx = gContact[gIdx].m_bodyBPtr;\n"
\r
871 " int idx = (aIdx == staticIdx)? bIdx: aIdx;\n"
\r
872 " float4 p = gBodies[idx].m_pos;\n"
\r
873 " int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
\r
874 " int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
\r
876 " gSortDataOut[gIdx].x = (xIdx+zIdx*N_SPLIT);\n"
\r
877 " gSortDataOut[gIdx].y = gIdx;\n"
\r
881 " gSortDataOut[gIdx].x = 0xffffffff;\n"
\r
886 "void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA, \n"
\r
887 " const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n"
\r
888 " Contact4 src, float dt, float positionDrift, float positionConstraintCoeff,\n"
\r
889 " Constraint4* dstC )\n"
\r
891 " dstC->m_bodyA = src.m_bodyAPtr;\n"
\r
892 " dstC->m_bodyB = src.m_bodyBPtr;\n"
\r
894 " float dtInv = 1.f/dt;\n"
\r
895 " for(int ic=0; ic<4; ic++)\n"
\r
897 " dstC->m_appliedRambdaDt[ic] = 0.f;\n"
\r
899 " dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n"
\r
902 " dstC->m_linear = -src.m_worldNormal;\n"
\r
903 " dstC->m_linear.w = 0.7f ;//src.getFrictionCoeff() );\n"
\r
904 " for(int ic=0; ic<4; ic++)\n"
\r
906 " float4 r0 = src.m_worldPos[ic] - posA;\n"
\r
907 " float4 r1 = src.m_worldPos[ic] - posB;\n"
\r
909 " if( ic >= src.m_worldNormal.w )//npoints\n"
\r
911 " dstC->m_jacCoeffInv[ic] = 0.f;\n"
\r
915 " float relVelN;\n"
\r
917 " float4 linear, angular0, angular1;\n"
\r
918 " setLinearAndAngular(src.m_worldNormal, r0, r1, &linear, &angular0, &angular1);\n"
\r
920 " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
\r
921 " invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
\r
923 " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n"
\r
924 " linVelA, angVelA, linVelB, angVelB);\n"
\r
926 " float e = 0.f;//src.getRestituitionCoeff();\n"
\r
927 " if( relVelN*relVelN < 0.004f ) e = 0.f;\n"
\r
929 " dstC->m_b[ic] = e*relVelN;\n"
\r
930 " //float penetration = src.m_worldPos[ic].w;\n"
\r
931 " dstC->m_b[ic] += (src.m_worldPos[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n"
\r
932 " dstC->m_appliedRambdaDt[ic] = 0.f;\n"
\r
936 " if( src.m_worldNormal.w > 1 )//npoints\n"
\r
937 " { // prepare friction\n"
\r
938 " float4 center = make_float4(0.f);\n"
\r
939 " for(int i=0; i<src.m_worldNormal.w; i++) center += src.m_worldPos[i];\n"
\r
940 " center /= (float)src.m_worldNormal.w;\n"
\r
942 " float4 tangent[2];\n"
\r
943 " tangent[0] = cross3( src.m_worldNormal, src.m_worldPos[0]-center );\n"
\r
944 " tangent[1] = cross3( tangent[0], src.m_worldNormal );\n"
\r
945 " tangent[0] = normalize3( tangent[0] );\n"
\r
946 " tangent[1] = normalize3( tangent[1] );\n"
\r
948 " r[0] = center - posA;\n"
\r
949 " r[1] = center - posB;\n"
\r
951 " for(int i=0; i<2; i++)\n"
\r
953 " float4 linear, angular0, angular1;\n"
\r
954 " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n"
\r
956 " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n"
\r
957 " invMassA, &invInertiaA, invMassB, &invInertiaB );\n"
\r
958 " dstC->m_fAppliedRambdaDt[i] = 0.f;\n"
\r
960 " dstC->m_center = center;\n"
\r
964 " // single point constraint\n"
\r
967 " for(int i=0; i<4; i++)\n"
\r
969 " if( i<src.m_worldNormal.w )\n"
\r
971 " dstC->m_worldPos[i] = src.m_worldPos[i];\n"
\r
975 " dstC->m_worldPos[i] = make_float4(0.f);\n"
\r
982 " int m_nContacts;\n"
\r
984 " float m_positionDrift;\n"
\r
985 " float m_positionConstraintCoeff;\n"
\r
986 "} ConstBufferCTC;\n"
\r
989 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
\r
990 "void ContactToConstraintKernel(__global Contact4* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, ConstBufferCTC cb)\n"
\r
992 " int gIdx = GET_GLOBAL_IDX;\n"
\r
993 " int nContacts = cb.m_nContacts;\n"
\r
994 " float dt = cb.m_dt;\n"
\r
995 " float positionDrift = cb.m_positionDrift;\n"
\r
996 " float positionConstraintCoeff = cb.m_positionConstraintCoeff;\n"
\r
998 " if( gIdx < nContacts )\n"
\r
1000 " int aIdx = gContact[gIdx].m_bodyAPtr;\n"
\r
1001 " int bIdx = gContact[gIdx].m_bodyBPtr;\n"
\r
1003 " float4 posA = gBodies[aIdx].m_pos;\n"
\r
1004 " float4 linVelA = gBodies[aIdx].m_linVel;\n"
\r
1005 " float4 angVelA = gBodies[aIdx].m_angVel;\n"
\r
1006 " float invMassA = gBodies[aIdx].m_invMass;\n"
\r
1007 " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"
\r
1009 " float4 posB = gBodies[bIdx].m_pos;\n"
\r
1010 " float4 linVelB = gBodies[bIdx].m_linVel;\n"
\r
1011 " float4 angVelB = gBodies[bIdx].m_angVel;\n"
\r
1012 " float invMassB = gBodies[bIdx].m_invMass;\n"
\r
1013 " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"
\r
1015 " Constraint4 cs;\n"
\r
1017 " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n"
\r
1018 " gContact[gIdx], dt, positionDrift, positionConstraintCoeff, \n"
\r
1021 " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n"
\r
1023 " gConstraintOut[gIdx] = cs;\n"
\r
1028 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
\r
1029 "void CopyConstraintKernel(__global Contact4* gIn, __global Contact4* gOut, int4 cb )\n"
\r
1031 " int gIdx = GET_GLOBAL_IDX;\n"
\r
1032 " if( gIdx < cb.x )\n"
\r
1034 " gOut[gIdx] = gIn[gIdx];\n"
\r