Tizen 2.1 base
[platform/upstream/libbullet.git] / Extras / RigidBodyGpuPipeline / dynamics / basic_demo / Stubs / SolverKernels.h
1 static const char* solverKernelsCL= \\r
2 "#pragma OPENCL EXTENSION cl_amd_printf : enable\n"\r
3 "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"\r
4 "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"\r
5 "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"\r
6 "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"\r
7 "\n"\r
8 "\n"\r
9 "#ifdef cl_ext_atomic_counters_32\n"\r
10 "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"\r
11 "#else\n"\r
12 "#define counter32_t volatile global int*\n"\r
13 "#endif\n"\r
14 "\n"\r
15 "typedef unsigned int u32;\n"\r
16 "typedef unsigned short u16;\n"\r
17 "typedef unsigned char u8;\n"\r
18 "\n"\r
19 "#define GET_GROUP_IDX get_group_id(0)\n"\r
20 "#define GET_LOCAL_IDX get_local_id(0)\n"\r
21 "#define GET_GLOBAL_IDX get_global_id(0)\n"\r
22 "#define GET_GROUP_SIZE get_local_size(0)\n"\r
23 "#define GET_NUM_GROUPS get_num_groups(0)\n"\r
24 "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"\r
25 "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"\r
26 "#define AtomInc(x) atom_inc(&(x))\n"\r
27 "#define AtomInc1(x, out) out = atom_inc(&(x))\n"\r
28 "#define AppendInc(x, out) out = atomic_inc(x)\n"\r
29 "#define AtomAdd(x, value) atom_add(&(x), value)\n"\r
30 "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"\r
31 "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"\r
32 "\n"\r
33 "\n"\r
34 "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"\r
35 "\n"\r
36 "#define make_float4 (float4)\n"\r
37 "#define make_float2 (float2)\n"\r
38 "#define make_uint4 (uint4)\n"\r
39 "#define make_int4 (int4)\n"\r
40 "#define make_uint2 (uint2)\n"\r
41 "#define make_int2 (int2)\n"\r
42 "\n"\r
43 "\n"\r
44 "#define max2 max\n"\r
45 "#define min2 min\n"\r
46 "\n"\r
47 "\n"\r
48 "///////////////////////////////////////\n"\r
49 "//     Vector\n"\r
50 "///////////////////////////////////////\n"\r
51 "__inline\n"\r
52 "float fastDiv(float numerator, float denominator)\n"\r
53 "{\n"\r
54 "       return native_divide(numerator, denominator);   \n"\r
55 "//     return numerator/denominator;   \n"\r
56 "}\n"\r
57 "\n"\r
58 "__inline\n"\r
59 "float4 fastDiv4(float4 numerator, float4 denominator)\n"\r
60 "{\n"\r
61 "       return native_divide(numerator, denominator);   \n"\r
62 "}\n"\r
63 "\n"\r
64 "__inline\n"\r
65 "float fastSqrtf(float f2)\n"\r
66 "{\n"\r
67 "       return native_sqrt(f2);\n"\r
68 "//     return sqrt(f2);\n"\r
69 "}\n"\r
70 "\n"\r
71 "__inline\n"\r
72 "float fastRSqrt(float f2)\n"\r
73 "{\n"\r
74 "       return native_rsqrt(f2);\n"\r
75 "}\n"\r
76 "\n"\r
77 "__inline\n"\r
78 "float fastLength4(float4 v)\n"\r
79 "{\n"\r
80 "       return fast_length(v);\n"\r
81 "}\n"\r
82 "\n"\r
83 "__inline\n"\r
84 "float4 fastNormalize4(float4 v)\n"\r
85 "{\n"\r
86 "       return fast_normalize(v);\n"\r
87 "}\n"\r
88 "\n"\r
89 "\n"\r
90 "__inline\n"\r
91 "float sqrtf(float a)\n"\r
92 "{\n"\r
93 "//     return sqrt(a);\n"\r
94 "       return native_sqrt(a);\n"\r
95 "}\n"\r
96 "\n"\r
97 "__inline\n"\r
98 "float4 cross3(float4 a, float4 b)\n"\r
99 "{\n"\r
100 "       return cross(a,b);\n"\r
101 "}\n"\r
102 "\n"\r
103 "__inline\n"\r
104 "float dot3F4(float4 a, float4 b)\n"\r
105 "{\n"\r
106 "       float4 a1 = make_float4(a.xyz,0.f);\n"\r
107 "       float4 b1 = make_float4(b.xyz,0.f);\n"\r
108 "       return dot(a1, b1);\n"\r
109 "}\n"\r
110 "\n"\r
111 "__inline\n"\r
112 "float length3(const float4 a)\n"\r
113 "{\n"\r
114 "       return sqrtf(dot3F4(a,a));\n"\r
115 "}\n"\r
116 "\n"\r
117 "__inline\n"\r
118 "float dot4(const float4 a, const float4 b)\n"\r
119 "{\n"\r
120 "       return dot( a, b );\n"\r
121 "}\n"\r
122 "\n"\r
123 "//     for height\n"\r
124 "__inline\n"\r
125 "float dot3w1(const float4 point, const float4 eqn)\n"\r
126 "{\n"\r
127 "       return dot3F4(point,eqn) + eqn.w;\n"\r
128 "}\n"\r
129 "\n"\r
130 "__inline\n"\r
131 "float4 normalize3(const float4 a)\n"\r
132 "{\n"\r
133 "       float4 n = make_float4(a.x, a.y, a.z, 0.f);\n"\r
134 "       return fastNormalize4( n );\n"\r
135 "//     float length = sqrtf(dot3F4(a, a));\n"\r
136 "//     return 1.f/length * a;\n"\r
137 "}\n"\r
138 "\n"\r
139 "__inline\n"\r
140 "float4 normalize4(const float4 a)\n"\r
141 "{\n"\r
142 "       float length = sqrtf(dot4(a, a));\n"\r
143 "       return 1.f/length * a;\n"\r
144 "}\n"\r
145 "\n"\r
146 "__inline\n"\r
147 "float4 createEquation(const float4 a, const float4 b, const float4 c)\n"\r
148 "{\n"\r
149 "       float4 eqn;\n"\r
150 "       float4 ab = b-a;\n"\r
151 "       float4 ac = c-a;\n"\r
152 "       eqn = normalize3( cross3(ab, ac) );\n"\r
153 "       eqn.w = -dot3F4(eqn,a);\n"\r
154 "       return eqn;\n"\r
155 "}\n"\r
156 "\n"\r
157 "///////////////////////////////////////\n"\r
158 "//     Matrix3x3\n"\r
159 "///////////////////////////////////////\n"\r
160 "\n"\r
161 "typedef struct\n"\r
162 "{\n"\r
163 "       float4 m_row[3];\n"\r
164 "}Matrix3x3;\n"\r
165 "\n"\r
166 "__inline\n"\r
167 "Matrix3x3 mtZero();\n"\r
168 "\n"\r
169 "__inline\n"\r
170 "Matrix3x3 mtIdentity();\n"\r
171 "\n"\r
172 "__inline\n"\r
173 "Matrix3x3 mtTranspose(Matrix3x3 m);\n"\r
174 "\n"\r
175 "__inline\n"\r
176 "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n"\r
177 "\n"\r
178 "__inline\n"\r
179 "float4 mtMul1(Matrix3x3 a, float4 b);\n"\r
180 "\n"\r
181 "__inline\n"\r
182 "float4 mtMul3(float4 a, Matrix3x3 b);\n"\r
183 "\n"\r
184 "__inline\n"\r
185 "Matrix3x3 mtZero()\n"\r
186 "{\n"\r
187 "       Matrix3x3 m;\n"\r
188 "       m.m_row[0] = (float4)(0.f);\n"\r
189 "       m.m_row[1] = (float4)(0.f);\n"\r
190 "       m.m_row[2] = (float4)(0.f);\n"\r
191 "       return m;\n"\r
192 "}\n"\r
193 "\n"\r
194 "__inline\n"\r
195 "Matrix3x3 mtIdentity()\n"\r
196 "{\n"\r
197 "       Matrix3x3 m;\n"\r
198 "       m.m_row[0] = (float4)(1,0,0,0);\n"\r
199 "       m.m_row[1] = (float4)(0,1,0,0);\n"\r
200 "       m.m_row[2] = (float4)(0,0,1,0);\n"\r
201 "       return m;\n"\r
202 "}\n"\r
203 "\n"\r
204 "__inline\n"\r
205 "Matrix3x3 mtTranspose(Matrix3x3 m)\n"\r
206 "{\n"\r
207 "       Matrix3x3 out;\n"\r
208 "       out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"\r
209 "       out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"\r
210 "       out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"\r
211 "       return out;\n"\r
212 "}\n"\r
213 "\n"\r
214 "__inline\n"\r
215 "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n"\r
216 "{\n"\r
217 "       Matrix3x3 transB;\n"\r
218 "       transB = mtTranspose( b );\n"\r
219 "       Matrix3x3 ans;\n"\r
220 "       //      why this doesn't run when 0ing in the for{}\n"\r
221 "       a.m_row[0].w = 0.f;\n"\r
222 "       a.m_row[1].w = 0.f;\n"\r
223 "       a.m_row[2].w = 0.f;\n"\r
224 "       for(int i=0; i<3; i++)\n"\r
225 "       {\n"\r
226 "//     a.m_row[i].w = 0.f;\n"\r
227 "               ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n"\r
228 "               ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n"\r
229 "               ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n"\r
230 "               ans.m_row[i].w = 0.f;\n"\r
231 "       }\n"\r
232 "       return ans;\n"\r
233 "}\n"\r
234 "\n"\r
235 "__inline\n"\r
236 "float4 mtMul1(Matrix3x3 a, float4 b)\n"\r
237 "{\n"\r
238 "       float4 ans;\n"\r
239 "       ans.x = dot3F4( a.m_row[0], b );\n"\r
240 "       ans.y = dot3F4( a.m_row[1], b );\n"\r
241 "       ans.z = dot3F4( a.m_row[2], b );\n"\r
242 "       ans.w = 0.f;\n"\r
243 "       return ans;\n"\r
244 "}\n"\r
245 "\n"\r
246 "__inline\n"\r
247 "float4 mtMul3(float4 a, Matrix3x3 b)\n"\r
248 "{\n"\r
249 "       float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"\r
250 "       float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"\r
251 "       float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"\r
252 "\n"\r
253 "       float4 ans;\n"\r
254 "       ans.x = dot3F4( a, colx );\n"\r
255 "       ans.y = dot3F4( a, coly );\n"\r
256 "       ans.z = dot3F4( a, colz );\n"\r
257 "       return ans;\n"\r
258 "}\n"\r
259 "\n"\r
260 "///////////////////////////////////////\n"\r
261 "//     Quaternion\n"\r
262 "///////////////////////////////////////\n"\r
263 "\n"\r
264 "typedef float4 Quaternion;\n"\r
265 "\n"\r
266 "__inline\n"\r
267 "Quaternion qtMul(Quaternion a, Quaternion b);\n"\r
268 "\n"\r
269 "__inline\n"\r
270 "Quaternion qtNormalize(Quaternion in);\n"\r
271 "\n"\r
272 "__inline\n"\r
273 "float4 qtRotate(Quaternion q, float4 vec);\n"\r
274 "\n"\r
275 "__inline\n"\r
276 "Quaternion qtInvert(Quaternion q);\n"\r
277 "\n"\r
278 "__inline\n"\r
279 "Matrix3x3 qtGetRotationMatrix(Quaternion q);\n"\r
280 "\n"\r
281 "\n"\r
282 "\n"\r
283 "__inline\n"\r
284 "Quaternion qtMul(Quaternion a, Quaternion b)\n"\r
285 "{\n"\r
286 "       Quaternion ans;\n"\r
287 "       ans = cross3( a, b );\n"\r
288 "       ans += a.w*b+b.w*a;\n"\r
289 "//     ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"\r
290 "       ans.w = a.w*b.w - dot3F4(a, b);\n"\r
291 "       return ans;\n"\r
292 "}\n"\r
293 "\n"\r
294 "__inline\n"\r
295 "Quaternion qtNormalize(Quaternion in)\n"\r
296 "{\n"\r
297 "       return fastNormalize4(in);\n"\r
298 "//     in /= length( in );\n"\r
299 "//     return in;\n"\r
300 "}\n"\r
301 "__inline\n"\r
302 "float4 qtRotate(Quaternion q, float4 vec)\n"\r
303 "{\n"\r
304 "       Quaternion qInv = qtInvert( q );\n"\r
305 "       float4 vcpy = vec;\n"\r
306 "       vcpy.w = 0.f;\n"\r
307 "       float4 out = qtMul(qtMul(q,vcpy),qInv);\n"\r
308 "       return out;\n"\r
309 "}\n"\r
310 "\n"\r
311 "__inline\n"\r
312 "Quaternion qtInvert(Quaternion q)\n"\r
313 "{\n"\r
314 "       return (Quaternion)(-q.xyz, q.w);\n"\r
315 "}\n"\r
316 "\n"\r
317 "__inline\n"\r
318 "float4 qtInvRotate(const Quaternion q, float4 vec)\n"\r
319 "{\n"\r
320 "       return qtRotate( qtInvert( q ), vec );\n"\r
321 "}\n"\r
322 "\n"\r
323 "__inline\n"\r
324 "Matrix3x3 qtGetRotationMatrix(Quaternion quat)\n"\r
325 "{\n"\r
326 "       float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"\r
327 "       Matrix3x3 out;\n"\r
328 "\n"\r
329 "       out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"\r
330 "       out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"\r
331 "       out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"\r
332 "       out.m_row[0].w = 0.f;\n"\r
333 "\n"\r
334 "       out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"\r
335 "       out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"\r
336 "       out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"\r
337 "       out.m_row[1].w = 0.f;\n"\r
338 "\n"\r
339 "       out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"\r
340 "       out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"\r
341 "       out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"\r
342 "       out.m_row[2].w = 0.f;\n"\r
343 "\n"\r
344 "       return out;\n"\r
345 "}\n"\r
346 "\n"\r
347 "\n"\r
348 "\n"\r
349 "\n"\r
350 "#define WG_SIZE 64\n"\r
351 "\n"\r
352 "typedef struct\n"\r
353 "{\n"\r
354 "       float4 m_pos;\n"\r
355 "       Quaternion m_quat;\n"\r
356 "       float4 m_linVel;\n"\r
357 "       float4 m_angVel;\n"\r
358 "\n"\r
359 "       u32 m_shapeIdx;\n"\r
360 "       u32 m_shapeType;\n"\r
361 "       float m_invMass;\n"\r
362 "       float m_restituitionCoeff;\n"\r
363 "       float m_frictionCoeff;\n"\r
364 "} Body;\n"\r
365 "\n"\r
366 "typedef struct\n"\r
367 "{\n"\r
368 "       Matrix3x3 m_invInertia;\n"\r
369 "       Matrix3x3 m_initInvInertia;\n"\r
370 "} Shape;\n"\r
371 "\n"\r
372 "typedef struct\n"\r
373 "{\n"\r
374 "       float4 m_linear;\n"\r
375 "       float4 m_worldPos[4];\n"\r
376 "       float4 m_center;        \n"\r
377 "       float m_jacCoeffInv[4];\n"\r
378 "       float m_b[4];\n"\r
379 "       float m_appliedRambdaDt[4];\n"\r
380 "\n"\r
381 "       float m_fJacCoeffInv[2];        \n"\r
382 "       float m_fAppliedRambdaDt[2];    \n"\r
383 "\n"\r
384 "       u32 m_bodyA;\n"\r
385 "       u32 m_bodyB;\n"\r
386 "\n"\r
387 "       int m_batchIdx;\n"\r
388 "       u32 m_paddings[1];\n"\r
389 "} Constraint4;\n"\r
390 "\n"\r
391 "typedef struct\n"\r
392 "{\n"\r
393 "       float4 m_worldPos[4];\n"\r
394 "       float4 m_worldNormal;\n"\r
395 "       u32 m_coeffs;\n"\r
396 "       int m_batchIdx;\n"\r
397 "\n"\r
398 "       u32 m_bodyAPtr;\n"\r
399 "       u32 m_bodyBPtr;\n"\r
400 "} Contact4;\n"\r
401 "\n"\r
402 "typedef struct\n"\r
403 "{\n"\r
404 "       int m_nConstraints;\n"\r
405 "       int m_start;\n"\r
406 "       int m_batchIdx;\n"\r
407 "       int m_nSplit;\n"\r
408 "//     int m_paddings[1];\n"\r
409 "} ConstBuffer;\n"\r
410 "\n"\r
411 "typedef struct\n"\r
412 "{\n"\r
413 "       int m_solveFriction;\n"\r
414 "       int m_maxBatch; //      long batch really kills the performance\n"\r
415 "       int m_batchIdx;\n"\r
416 "       int m_nSplit;\n"\r
417 "//     int m_paddings[1];\n"\r
418 "} ConstBufferBatchSolve;\n"\r
419 "\n"\r
420 "\n"\r
421 "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n"\r
422 "{\n"\r
423 "       *linear = -n;\n"\r
424 "       *angular0 = -cross3(r0, n);\n"\r
425 "       *angular1 = cross3(r1, n);\n"\r
426 "}\n"\r
427 "\n"\r
428 "\n"\r
429 "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n"\r
430 "{\n"\r
431 "       return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n"\r
432 "}\n"\r
433 "\n"\r
434 "\n"\r
435 "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n"\r
436 "                                       float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n"\r
437 "{\n"\r
438 "       //      linear0,1 are normlized\n"\r
439 "       float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n"\r
440 "       float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n"\r
441 "       float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n"\r
442 "       float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n"\r
443 "       return -1.f/(jmj0+jmj1+jmj2+jmj3);\n"\r
444 "}\n"\r
445 "\n"\r
446 "\n"\r
447 "\n"\r
448 "void solveContact(__global Constraint4* cs,\n"\r
449 "                       float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n"\r
450 "                       float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)\n"\r
451 "{\n"\r
452 "       float minRambdaDt = 0;\n"\r
453 "       float maxRambdaDt = FLT_MAX;\n"\r
454 "\n"\r
455 "       for(int ic=0; ic<4; ic++)\n"\r
456 "       {\n"\r
457 "               if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n"\r
458 "\n"\r
459 "               float4 angular0, angular1, linear;\n"\r
460 "               float4 r0 = cs->m_worldPos[ic] - posA;\n"\r
461 "               float4 r1 = cs->m_worldPos[ic] - posB;\n"\r
462 "               setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n"\r
463 "\n"\r
464 "               float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n"\r
465 "                       *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n"\r
466 "               rambdaDt *= cs->m_jacCoeffInv[ic];\n"\r
467 "\n"\r
468 "               {\n"\r
469 "                       float prevSum = cs->m_appliedRambdaDt[ic];\n"\r
470 "                       float updated = prevSum;\n"\r
471 "                       updated += rambdaDt;\n"\r
472 "                       updated = max2( updated, minRambdaDt );\n"\r
473 "                       updated = min2( updated, maxRambdaDt );\n"\r
474 "                       rambdaDt = updated - prevSum;\n"\r
475 "                       cs->m_appliedRambdaDt[ic] = updated;\n"\r
476 "               }\n"\r
477 "\n"\r
478 "               float4 linImp0 = invMassA*linear*rambdaDt;\n"\r
479 "               float4 linImp1 = invMassB*(-linear)*rambdaDt;\n"\r
480 "               float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n"\r
481 "               float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n"\r
482 "\n"\r
483 "               *linVelA += linImp0;\n"\r
484 "               *angVelA += angImp0;\n"\r
485 "               *linVelB += linImp1;\n"\r
486 "               *angVelB += angImp1;\n"\r
487 "       }\n"\r
488 "}\n"\r
489 "\n"\r
490 "\n"\r
491 "void solveFriction(__global Constraint4* cs,\n"\r
492 "                       float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n"\r
493 "                       float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,\n"\r
494 "                       float maxRambdaDt[4], float minRambdaDt[4])\n"\r
495 "{\n"\r
496 "       if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n"\r
497 "       const float4 center = cs->m_center;\n"\r
498 "\n"\r
499 "       float4 n = -cs->m_linear;\n"\r
500 "\n"\r
501 "       float4 tangent[2];\n"\r
502 "       tangent[0] = cross3( n, cs->m_worldPos[0]-center );\n"\r
503 "       tangent[1] = cross3( tangent[0], n );\n"\r
504 "       tangent[0] = normalize3( tangent[0] );\n"\r
505 "       tangent[1] = normalize3( tangent[1] );\n"\r
506 "\n"\r
507 "       float4 angular0, angular1, linear;\n"\r
508 "       float4 r0 = center - posA;\n"\r
509 "       float4 r1 = center - posB;\n"\r
510 "       for(int i=0; i<2; i++)\n"\r
511 "       {\n"\r
512 "               setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n"\r
513 "               float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n"\r
514 "                       *linVelA, *angVelA, *linVelB, *angVelB );\n"\r
515 "               rambdaDt *= cs->m_fJacCoeffInv[i];\n"\r
516 "\n"\r
517 "               {\n"\r
518 "                       float prevSum = cs->m_fAppliedRambdaDt[i];\n"\r
519 "                       float updated = prevSum;\n"\r
520 "                       updated += rambdaDt;\n"\r
521 "                       updated = max2( updated, minRambdaDt[i] );\n"\r
522 "                       updated = min2( updated, maxRambdaDt[i] );\n"\r
523 "                       rambdaDt = updated - prevSum;\n"\r
524 "                       cs->m_fAppliedRambdaDt[i] = updated;\n"\r
525 "               }\n"\r
526 "\n"\r
527 "               float4 linImp0 = invMassA*linear*rambdaDt;\n"\r
528 "               float4 linImp1 = invMassB*(-linear)*rambdaDt;\n"\r
529 "               float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n"\r
530 "               float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n"\r
531 "\n"\r
532 "               *linVelA += linImp0;\n"\r
533 "               *angVelA += angImp0;\n"\r
534 "               *linVelB += linImp1;\n"\r
535 "               *angVelB += angImp1;\n"\r
536 "       }\n"\r
537 "       {       //      angular damping for point constraint\n"\r
538 "               float4 ab = normalize3( posB - posA );\n"\r
539 "               float4 ac = normalize3( center - posA );\n"\r
540 "               if( dot3F4( ab, ac ) > 0.95f  || (invMassA == 0.f || invMassB == 0.f))\n"\r
541 "               {\n"\r
542 "                       float angNA = dot3F4( n, *angVelA );\n"\r
543 "                       float angNB = dot3F4( n, *angVelB );\n"\r
544 "\n"\r
545 "                       *angVelA -= (angNA*0.1f)*n;\n"\r
546 "                       *angVelB -= (angNB*0.1f)*n;\n"\r
547 "               }\n"\r
548 "       }\n"\r
549 "}\n"\r
550 "\n"\r
551 "void solveAConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n"\r
552 "{\n"\r
553 "       float frictionCoeff = ldsCs[0].m_linear.w;\n"\r
554 "       int aIdx = ldsCs[0].m_bodyA;\n"\r
555 "       int bIdx = ldsCs[0].m_bodyB;\n"\r
556 "\n"\r
557 "       float4 posA = gBodies[aIdx].m_pos;\n"\r
558 "       float4 linVelA = gBodies[aIdx].m_linVel;\n"\r
559 "       float4 angVelA = gBodies[aIdx].m_angVel;\n"\r
560 "       float invMassA = gBodies[aIdx].m_invMass;\n"\r
561 "       Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"\r
562 "\n"\r
563 "       float4 posB = gBodies[bIdx].m_pos;\n"\r
564 "       float4 linVelB = gBodies[bIdx].m_linVel;\n"\r
565 "       float4 angVelB = gBodies[bIdx].m_angVel;\n"\r
566 "       float invMassB = gBodies[bIdx].m_invMass;\n"\r
567 "       Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"\r
568 "               \n"\r
569 "               \n"\r
570 "       {\n"\r
571 "               solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"\r
572 "                       posB, &linVelB, &angVelB, invMassB, invInertiaB );\n"\r
573 "       }\n"\r
574 "\n"\r
575 "       {\n"\r
576 "               float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n"\r
577 "               float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n"\r
578 "\n"\r
579 "               float sum = 0;\n"\r
580 "               for(int j=0; j<4; j++)\n"\r
581 "               {\n"\r
582 "                       sum +=ldsCs[0].m_appliedRambdaDt[j];\n"\r
583 "               }\n"\r
584 "               frictionCoeff = 0.7f;\n"\r
585 "               for(int j=0; j<4; j++)\n"\r
586 "               {\n"\r
587 "                       maxRambdaDt[j] = frictionCoeff*sum;\n"\r
588 "                       minRambdaDt[j] = -maxRambdaDt[j];\n"\r
589 "               }\n"\r
590 "\n"\r
591 "               solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"\r
592 "                       posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n"\r
593 "       }\n"\r
594 "\n"\r
595 "       gBodies[aIdx].m_linVel = linVelA;\n"\r
596 "       gBodies[aIdx].m_angVel = angVelA;\n"\r
597 "       gBodies[bIdx].m_linVel = linVelB;\n"\r
598 "       gBodies[bIdx].m_angVel = angVelB;\n"\r
599 "}\n"\r
600 "\n"\r
601 "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n"\r
602 "{\n"\r
603 "       float frictionCoeff = ldsCs[0].m_linear.w;\n"\r
604 "       int aIdx = ldsCs[0].m_bodyA;\n"\r
605 "       int bIdx = ldsCs[0].m_bodyB;\n"\r
606 "\n"\r
607 "       float4 posA = gBodies[aIdx].m_pos;\n"\r
608 "       float4 linVelA = gBodies[aIdx].m_linVel;\n"\r
609 "       float4 angVelA = gBodies[aIdx].m_angVel;\n"\r
610 "       float invMassA = gBodies[aIdx].m_invMass;\n"\r
611 "       Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"\r
612 "\n"\r
613 "       float4 posB = gBodies[bIdx].m_pos;\n"\r
614 "       float4 linVelB = gBodies[bIdx].m_linVel;\n"\r
615 "       float4 angVelB = gBodies[bIdx].m_angVel;\n"\r
616 "       float invMassB = gBodies[bIdx].m_invMass;\n"\r
617 "       Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"\r
618 "\n"\r
619 "       solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"\r
620 "                       posB, &linVelB, &angVelB, invMassB, invInertiaB );\n"\r
621 "\n"\r
622 "       gBodies[aIdx].m_linVel = linVelA;\n"\r
623 "       gBodies[aIdx].m_angVel = angVelA;\n"\r
624 "       gBodies[bIdx].m_linVel = linVelB;\n"\r
625 "       gBodies[bIdx].m_angVel = angVelB;\n"\r
626 "}\n"\r
627 "\n"\r
628 "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n"\r
629 "{\n"\r
630 "       float frictionCoeff = ldsCs[0].m_linear.w;\n"\r
631 "       int aIdx = ldsCs[0].m_bodyA;\n"\r
632 "       int bIdx = ldsCs[0].m_bodyB;\n"\r
633 "\n"\r
634 "       float4 posA = gBodies[aIdx].m_pos;\n"\r
635 "       float4 linVelA = gBodies[aIdx].m_linVel;\n"\r
636 "       float4 angVelA = gBodies[aIdx].m_angVel;\n"\r
637 "       float invMassA = gBodies[aIdx].m_invMass;\n"\r
638 "       Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"\r
639 "\n"\r
640 "       float4 posB = gBodies[bIdx].m_pos;\n"\r
641 "       float4 linVelB = gBodies[bIdx].m_linVel;\n"\r
642 "       float4 angVelB = gBodies[bIdx].m_angVel;\n"\r
643 "       float invMassB = gBodies[bIdx].m_invMass;\n"\r
644 "       Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"\r
645 "\n"\r
646 "       {\n"\r
647 "               float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n"\r
648 "               float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n"\r
649 "\n"\r
650 "               float sum = 0;\n"\r
651 "               for(int j=0; j<4; j++)\n"\r
652 "               {\n"\r
653 "                       sum +=ldsCs[0].m_appliedRambdaDt[j];\n"\r
654 "               }\n"\r
655 "               frictionCoeff = 0.7f;\n"\r
656 "               for(int j=0; j<4; j++)\n"\r
657 "               {\n"\r
658 "                       maxRambdaDt[j] = frictionCoeff*sum;\n"\r
659 "                       minRambdaDt[j] = -maxRambdaDt[j];\n"\r
660 "               }\n"\r
661 "\n"\r
662 "               solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n"\r
663 "                       posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n"\r
664 "       }\n"\r
665 "\n"\r
666 "       gBodies[aIdx].m_linVel = linVelA;\n"\r
667 "       gBodies[aIdx].m_angVel = angVelA;\n"\r
668 "       gBodies[bIdx].m_linVel = linVelB;\n"\r
669 "       gBodies[bIdx].m_angVel = angVelB;\n"\r
670 "}\n"\r
671 "\n"\r
672 "typedef struct \n"\r
673 "{\n"\r
674 "       int m_valInt0;\n"\r
675 "       int m_valInt1;\n"\r
676 "       int m_valInt2;\n"\r
677 "       int m_valInt3;\n"\r
678 "\n"\r
679 "       float m_val0;\n"\r
680 "       float m_val1;\n"\r
681 "       float m_val2;\n"\r
682 "       float m_val3;\n"\r
683 "} SolverDebugInfo;\n"\r
684 "\n"\r
685 "\n"\r
686 "__kernel\n"\r
687 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"\r
688 "//void BatchSolveKernel(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraints,   __global int* gN, __global int* gOffsets, __global SolverDebugInfo* debugInfo, ConstBufferBatchSolve cb)\n"\r
689 "void BatchSolveKernel(__global Body* gBodies, \n"\r
690 "__global Shape* gShapes, \n"\r
691 "__global Constraint4* gConstraints,    \n"\r
692 "__global int* gN, \n"\r
693 "__global int* gOffsets, \n"\r
694 "ConstBufferBatchSolve cb)\n"\r
695 "{\n"\r
696 "       __local int ldsBatchIdx[WG_SIZE+1];\n"\r
697 "\n"\r
698 "       __local int ldsCurBatch;\n"\r
699 "       __local int ldsNextBatch;\n"\r
700 "       __local int ldsStart;\n"\r
701 "\n"\r
702 "       int lIdx = GET_LOCAL_IDX;\n"\r
703 "       int wgIdx = GET_GROUP_IDX;\n"\r
704 "\n"\r
705 "       int gIdx = GET_GLOBAL_IDX;\n"\r
706 "//     debugInfo[gIdx].m_valInt0 = gIdx;\n"\r
707 "       //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n"\r
708 "\n"\r
709 "       const int solveFriction = cb.m_solveFriction;\n"\r
710 "       const int maxBatch = cb.m_maxBatch;\n"\r
711 "       const int bIdx = cb.m_batchIdx;\n"\r
712 "       const int nSplit = cb.m_nSplit;\n"\r
713 "\n"\r
714 "       int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n"\r
715 "       int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n"\r
716 "       int cellIdx = xIdx+yIdx*nSplit;\n"\r
717 "       \n"\r
718 "       if( gN[cellIdx] == 0 ) \n"\r
719 "               return;\n"\r
720 "\n"\r
721 "       const int start = gOffsets[cellIdx];\n"\r
722 "       const int end = start + gN[cellIdx];\n"\r
723 "\n"\r
724 "       \n"\r
725 "       if( lIdx == 0 )\n"\r
726 "       {\n"\r
727 "               ldsCurBatch = 0;\n"\r
728 "               ldsNextBatch = 0;\n"\r
729 "               ldsStart = start;\n"\r
730 "       }\n"\r
731 "\n"\r
732 "\n"\r
733 "       GROUP_LDS_BARRIER;\n"\r
734 "\n"\r
735 "       int idx=ldsStart+lIdx;\n"\r
736 "       while (ldsCurBatch < maxBatch)\n"\r
737 "       {\n"\r
738 "               for(; idx<end; )\n"\r
739 "               {\n"\r
740 "                       if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n"\r
741 "                       {\n"\r
742 "                               if( solveFriction )\n"\r
743 "                                       solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n"\r
744 "                               else\n"\r
745 "                                       solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n"\r
746 "                               idx+=64;\n"\r
747 "                       } else\n"\r
748 "                       {\n"\r
749 "                               break;\n"\r
750 "                       }\n"\r
751 "               }\n"\r
752 "               GROUP_LDS_BARRIER;\n"\r
753 "               if( lIdx == 0 )\n"\r
754 "               {\n"\r
755 "                       ldsCurBatch++;\n"\r
756 "               }\n"\r
757 "               GROUP_LDS_BARRIER;\n"\r
758 "       }\n"\r
759 "       \n"\r
760 "       return;\n"\r
761 "\n"\r
762 "\n"\r
763 "       int counter0 = 0;\n"\r
764 "       int counter1 = 0;\n"\r
765 "\n"\r
766 "       do\n"\r
767 "       {\n"\r
768 "               counter0++;\n"\r
769 "               int curStart = ldsStart;\n"\r
770 "               int curBatch = ldsCurBatch;\n"\r
771 "\n"\r
772 "               GROUP_LDS_BARRIER;\n"\r
773 "\n"\r
774 "               while( curBatch == ldsNextBatch && curStart < end )\n"\r
775 "               {\n"\r
776 "                       counter1++;\n"\r
777 "                       int idx = curStart + lIdx;\n"\r
778 "                       if( lIdx == 0 ) \n"\r
779 "                               ldsBatchIdx[0] = curBatch;\n"\r
780 "                               GROUP_LDS_BARRIER;\n"\r
781 "                       ldsBatchIdx[(lIdx == 0)? lIdx:lIdx+1] = curBatch;\n"\r
782 "                       GROUP_LDS_BARRIER;\n"\r
783 "//                     debugInfo[gIdx].m_valInt2 = gConstraints[idx].m_batchIdx;\n"\r
784 "//                     debugInfo[gIdx].m_valInt3 = idx;\n"\r
785 "\n"\r
786 "\n"\r
787 "                       ldsBatchIdx[lIdx+1] = (idx<end)? gConstraints[idx].m_batchIdx : -1;\n"\r
788 "\n"\r
789 "                       GROUP_LDS_BARRIER;\n"\r
790 "\n"\r
791 "\n"\r
792 "                       if( ldsBatchIdx[lIdx+1] == curBatch )\n"\r
793 "                       {\n"\r
794 "                               //      solve\n"\r
795 "\n"\r
796 "                               if( solveFriction )\n"\r
797 "                               {\n"\r
798 "                                       solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n"\r
799 "                                       }\n"\r
800 "                               else\n"\r
801 "                               {\n"\r
802 "                                       solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n"\r
803 "                                       }\n"\r
804 "                       }\n"\r
805 "                       else\n"\r
806 "                       {\n"\r
807 "                               if( ldsBatchIdx[lIdx] == curBatch )\n"\r
808 "                               {\n"\r
809 "                                       ldsStart = idx;\n"\r
810 "                                       ldsNextBatch = ldsBatchIdx[lIdx+1];\n"\r
811 "                               }\n"\r
812 "                       }\n"\r
813 "\n"\r
814 "                       GROUP_LDS_BARRIER;\n"\r
815 "\n"\r
816 "                       curStart += GET_GROUP_SIZE;\n"\r
817 "               }\n"\r
818 "\n"\r
819 "               if( lIdx == 0 )\n"\r
820 "               {\n"\r
821 "                       ldsCurBatch = ldsNextBatch;\n"\r
822 "               }\n"\r
823 "\n"\r
824 "               GROUP_LDS_BARRIER;\n"\r
825 "       }\n"\r
826 "//     while( ldsCurBatch != -1 && iter <= 10 );\n"\r
827 "       while( ldsCurBatch != -1 && ldsCurBatch <= maxBatch );\n"\r
828 "\n"\r
829 "}\n"\r
830 "\n"\r
831 "\n"\r
832 "\n"\r
833 "//     others\n"\r
834 "__kernel\n"\r
835 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"\r
836 "void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __global int2* sortData, int4 cb )\n"\r
837 "{\n"\r
838 "       int nContacts = cb.x;\n"\r
839 "       int gIdx = GET_GLOBAL_IDX;\n"\r
840 "\n"\r
841 "       if( gIdx < nContacts )\n"\r
842 "       {\n"\r
843 "               int srcIdx = sortData[gIdx].y;\n"\r
844 "               out[gIdx] = in[srcIdx];\n"\r
845 "       }\n"\r
846 "}\n"\r
847 "\n"\r
848 "typedef struct\n"\r
849 "{\n"\r
850 "       int m_nContacts;\n"\r
851 "       int m_staticIdx;\n"\r
852 "       float m_scale;\n"\r
853 "       int m_nSplit;\n"\r
854 "} ConstBufferSSD;\n"\r
855 "\n"\r
856 "__kernel\n"\r
857 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"\r
858 "void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, ConstBufferSSD cb )\n"\r
859 "{\n"\r
860 "       int gIdx = GET_GLOBAL_IDX;\n"\r
861 "       int nContacts = cb.m_nContacts;\n"\r
862 "       int staticIdx = cb.m_staticIdx;\n"\r
863 "       float scale = cb.m_scale;\n"\r
864 "       int N_SPLIT = cb.m_nSplit;\n"\r
865 "\n"\r
866 "       if( gIdx < nContacts )\n"\r
867 "       {\n"\r
868 "               int aIdx = gContact[gIdx].m_bodyAPtr;\n"\r
869 "               int bIdx = gContact[gIdx].m_bodyBPtr;\n"\r
870 "\n"\r
871 "               int idx = (aIdx == staticIdx)? bIdx: aIdx;\n"\r
872 "               float4 p = gBodies[idx].m_pos;\n"\r
873 "               int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"\r
874 "               int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"\r
875 "\n"\r
876 "               gSortDataOut[gIdx].x = (xIdx+zIdx*N_SPLIT);\n"\r
877 "               gSortDataOut[gIdx].y = gIdx;\n"\r
878 "       }\n"\r
879 "       else\n"\r
880 "       {\n"\r
881 "               gSortDataOut[gIdx].x = 0xffffffff;\n"\r
882 "       }\n"\r
883 "}\n"\r
884 "\n"\r
885 "\n"\r
886 "void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA, \n"\r
887 "       const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n"\r
888 "       Contact4 src, float dt, float positionDrift, float positionConstraintCoeff,\n"\r
889 "       Constraint4* dstC )\n"\r
890 "{\n"\r
891 "       dstC->m_bodyA = src.m_bodyAPtr;\n"\r
892 "       dstC->m_bodyB = src.m_bodyBPtr;\n"\r
893 "\n"\r
894 "       float dtInv = 1.f/dt;\n"\r
895 "       for(int ic=0; ic<4; ic++)\n"\r
896 "       {\n"\r
897 "               dstC->m_appliedRambdaDt[ic] = 0.f;\n"\r
898 "       }\n"\r
899 "       dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n"\r
900 "\n"\r
901 "\n"\r
902 "       dstC->m_linear = -src.m_worldNormal;\n"\r
903 "       dstC->m_linear.w = 0.7f ;//src.getFrictionCoeff() );\n"\r
904 "       for(int ic=0; ic<4; ic++)\n"\r
905 "       {\n"\r
906 "               float4 r0 = src.m_worldPos[ic] - posA;\n"\r
907 "               float4 r1 = src.m_worldPos[ic] - posB;\n"\r
908 "\n"\r
909 "               if( ic >= src.m_worldNormal.w )//npoints\n"\r
910 "               {\n"\r
911 "                       dstC->m_jacCoeffInv[ic] = 0.f;\n"\r
912 "                       continue;\n"\r
913 "               }\n"\r
914 "\n"\r
915 "               float relVelN;\n"\r
916 "               {\n"\r
917 "                       float4 linear, angular0, angular1;\n"\r
918 "                       setLinearAndAngular(src.m_worldNormal, r0, r1, &linear, &angular0, &angular1);\n"\r
919 "\n"\r
920 "                       dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n"\r
921 "                               invMassA, &invInertiaA, invMassB, &invInertiaB );\n"\r
922 "\n"\r
923 "                       relVelN = calcRelVel(linear, -linear, angular0, angular1,\n"\r
924 "                               linVelA, angVelA, linVelB, angVelB);\n"\r
925 "\n"\r
926 "                       float e = 0.f;//src.getRestituitionCoeff();\n"\r
927 "                       if( relVelN*relVelN < 0.004f ) e = 0.f;\n"\r
928 "\n"\r
929 "                       dstC->m_b[ic] = e*relVelN;\n"\r
930 "                       //float penetration = src.m_worldPos[ic].w;\n"\r
931 "                       dstC->m_b[ic] += (src.m_worldPos[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n"\r
932 "                       dstC->m_appliedRambdaDt[ic] = 0.f;\n"\r
933 "               }\n"\r
934 "       }\n"\r
935 "\n"\r
936 "       if( src.m_worldNormal.w > 1 )//npoints\n"\r
937 "       {       //      prepare friction\n"\r
938 "               float4 center = make_float4(0.f);\n"\r
939 "               for(int i=0; i<src.m_worldNormal.w; i++) center += src.m_worldPos[i];\n"\r
940 "               center /= (float)src.m_worldNormal.w;\n"\r
941 "\n"\r
942 "               float4 tangent[2];\n"\r
943 "               tangent[0] = cross3( src.m_worldNormal, src.m_worldPos[0]-center );\n"\r
944 "               tangent[1] = cross3( tangent[0], src.m_worldNormal );\n"\r
945 "               tangent[0] = normalize3( tangent[0] );\n"\r
946 "               tangent[1] = normalize3( tangent[1] );\n"\r
947 "               float4 r[2];\n"\r
948 "               r[0] = center - posA;\n"\r
949 "               r[1] = center - posB;\n"\r
950 "\n"\r
951 "               for(int i=0; i<2; i++)\n"\r
952 "               {\n"\r
953 "                       float4 linear, angular0, angular1;\n"\r
954 "                       setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n"\r
955 "\n"\r
956 "                       dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n"\r
957 "                               invMassA, &invInertiaA, invMassB, &invInertiaB );\n"\r
958 "                       dstC->m_fAppliedRambdaDt[i] = 0.f;\n"\r
959 "               }\n"\r
960 "               dstC->m_center = center;\n"\r
961 "       }\n"\r
962 "       else\n"\r
963 "       {\n"\r
964 "               //      single point constraint\n"\r
965 "       }\n"\r
966 "\n"\r
967 "       for(int i=0; i<4; i++)\n"\r
968 "       {\n"\r
969 "               if( i<src.m_worldNormal.w )\n"\r
970 "               {\n"\r
971 "                       dstC->m_worldPos[i] = src.m_worldPos[i];\n"\r
972 "               }\n"\r
973 "               else\n"\r
974 "               {\n"\r
975 "                       dstC->m_worldPos[i] = make_float4(0.f);\n"\r
976 "               }\n"\r
977 "       }\n"\r
978 "}\n"\r
979 "\n"\r
980 "typedef struct\n"\r
981 "{\n"\r
982 "       int m_nContacts;\n"\r
983 "       float m_dt;\n"\r
984 "       float m_positionDrift;\n"\r
985 "       float m_positionConstraintCoeff;\n"\r
986 "} ConstBufferCTC;\n"\r
987 "\n"\r
988 "__kernel\n"\r
989 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"\r
990 "void ContactToConstraintKernel(__global Contact4* gContact, __global Body* gBodies, __global Shape* gShapes, __global Constraint4* gConstraintOut, ConstBufferCTC cb)\n"\r
991 "{\n"\r
992 "       int gIdx = GET_GLOBAL_IDX;\n"\r
993 "       int nContacts = cb.m_nContacts;\n"\r
994 "       float dt = cb.m_dt;\n"\r
995 "       float positionDrift = cb.m_positionDrift;\n"\r
996 "       float positionConstraintCoeff = cb.m_positionConstraintCoeff;\n"\r
997 "\n"\r
998 "       if( gIdx < nContacts )\n"\r
999 "       {\n"\r
1000 "               int aIdx = gContact[gIdx].m_bodyAPtr;\n"\r
1001 "               int bIdx = gContact[gIdx].m_bodyBPtr;\n"\r
1002 "\n"\r
1003 "               float4 posA = gBodies[aIdx].m_pos;\n"\r
1004 "               float4 linVelA = gBodies[aIdx].m_linVel;\n"\r
1005 "               float4 angVelA = gBodies[aIdx].m_angVel;\n"\r
1006 "               float invMassA = gBodies[aIdx].m_invMass;\n"\r
1007 "               Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n"\r
1008 "\n"\r
1009 "               float4 posB = gBodies[bIdx].m_pos;\n"\r
1010 "               float4 linVelB = gBodies[bIdx].m_linVel;\n"\r
1011 "               float4 angVelB = gBodies[bIdx].m_angVel;\n"\r
1012 "               float invMassB = gBodies[bIdx].m_invMass;\n"\r
1013 "               Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n"\r
1014 "\n"\r
1015 "               Constraint4 cs;\n"\r
1016 "\n"\r
1017 "               setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n"\r
1018 "                       gContact[gIdx], dt, positionDrift, positionConstraintCoeff, \n"\r
1019 "                       &cs );\n"\r
1020 "               \n"\r
1021 "               cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n"\r
1022 "\n"\r
1023 "               gConstraintOut[gIdx] = cs;\n"\r
1024 "       }\n"\r
1025 "}\n"\r
1026 "\n"\r
1027 "__kernel\n"\r
1028 "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"\r
1029 "void CopyConstraintKernel(__global Contact4* gIn, __global Contact4* gOut, int4 cb )\n"\r
1030 "{\n"\r
1031 "       int gIdx = GET_GLOBAL_IDX;\n"\r
1032 "       if( gIdx < cb.x )\n"\r
1033 "       {\n"\r
1034 "               gOut[gIdx] = gIn[gIdx];\n"\r
1035 "       }\n"\r
1036 "}\n"\r
1037 ;\r