[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / NarrowphaseCollision / kernels / satClipHullContacts.h
1 //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
2 static const char* satClipKernelsCL =
3         "#define TRIANGLE_NUM_CONVEX_FACES 5\n"
4         "#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
5         "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
6         "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
7         "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
8         "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
9         "#ifdef cl_ext_atomic_counters_32\n"
10         "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
11         "#else\n"
12         "#define counter32_t volatile __global int*\n"
13         "#endif\n"
14         "#define GET_GROUP_IDX get_group_id(0)\n"
15         "#define GET_LOCAL_IDX get_local_id(0)\n"
16         "#define GET_GLOBAL_IDX get_global_id(0)\n"
17         "#define GET_GROUP_SIZE get_local_size(0)\n"
18         "#define GET_NUM_GROUPS get_num_groups(0)\n"
19         "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
20         "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
21         "#define AtomInc(x) atom_inc(&(x))\n"
22         "#define AtomInc1(x, out) out = atom_inc(&(x))\n"
23         "#define AppendInc(x, out) out = atomic_inc(x)\n"
24         "#define AtomAdd(x, value) atom_add(&(x), value)\n"
25         "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
26         "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
27         "#define max2 max\n"
28         "#define min2 min\n"
29         "typedef unsigned int u32;\n"
30         "#ifndef B3_CONTACT4DATA_H\n"
31         "#define B3_CONTACT4DATA_H\n"
32         "#ifndef B3_FLOAT4_H\n"
33         "#define B3_FLOAT4_H\n"
34         "#ifndef B3_PLATFORM_DEFINITIONS_H\n"
35         "#define B3_PLATFORM_DEFINITIONS_H\n"
36         "struct MyTest\n"
37         "{\n"
38         "       int bla;\n"
39         "};\n"
40         "#ifdef __cplusplus\n"
41         "#else\n"
42         "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n"
43         "#define B3_LARGE_FLOAT 1e18f\n"
44         "#define B3_INFINITY 1e18f\n"
45         "#define b3Assert(a)\n"
46         "#define b3ConstArray(a) __global const a*\n"
47         "#define b3AtomicInc atomic_inc\n"
48         "#define b3AtomicAdd atomic_add\n"
49         "#define b3Fabs fabs\n"
50         "#define b3Sqrt native_sqrt\n"
51         "#define b3Sin native_sin\n"
52         "#define b3Cos native_cos\n"
53         "#define B3_STATIC\n"
54         "#endif\n"
55         "#endif\n"
56         "#ifdef __cplusplus\n"
57         "#else\n"
58         "       typedef float4  b3Float4;\n"
59         "       #define b3Float4ConstArg const b3Float4\n"
60         "       #define b3MakeFloat4 (float4)\n"
61         "       float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
62         "       {\n"
63         "               float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
64         "               float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
65         "               return dot(a1, b1);\n"
66         "       }\n"
67         "       b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
68         "       {\n"
69         "               float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
70         "               float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
71         "               return cross(a1, b1);\n"
72         "       }\n"
73         "       #define b3MinFloat4 min\n"
74         "       #define b3MaxFloat4 max\n"
75         "       #define b3Normalized(a) normalize(a)\n"
76         "#endif \n"
77         "               \n"
78         "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"
79         "{\n"
80         "       if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)    \n"
81         "               return false;\n"
82         "       return true;\n"
83         "}\n"
84         "inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n"
85         "{\n"
86         "    float maxDot = -B3_INFINITY;\n"
87         "    int i = 0;\n"
88         "    int ptIndex = -1;\n"
89         "    for( i = 0; i < vecLen; i++ )\n"
90         "    {\n"
91         "        float dot = b3Dot3F4(vecArray[i],vec);\n"
92         "            \n"
93         "        if( dot > maxDot )\n"
94         "        {\n"
95         "            maxDot = dot;\n"
96         "            ptIndex = i;\n"
97         "        }\n"
98         "    }\n"
99         "       b3Assert(ptIndex>=0);\n"
100         "    if (ptIndex<0)\n"
101         "       {\n"
102         "               ptIndex = 0;\n"
103         "       }\n"
104         "    *dotOut = maxDot;\n"
105         "    return ptIndex;\n"
106         "}\n"
107         "#endif //B3_FLOAT4_H\n"
108         "typedef  struct b3Contact4Data b3Contact4Data_t;\n"
109         "struct b3Contact4Data\n"
110         "{\n"
111         "       b3Float4        m_worldPosB[4];\n"
112         "//     b3Float4        m_localPosA[4];\n"
113         "//     b3Float4        m_localPosB[4];\n"
114         "       b3Float4        m_worldNormalOnB;       //      w: m_nPoints\n"
115         "       unsigned short  m_restituitionCoeffCmp;\n"
116         "       unsigned short  m_frictionCoeffCmp;\n"
117         "       int m_batchIdx;\n"
118         "       int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
119         "       int m_bodyBPtrAndSignBit;\n"
120         "       int     m_childIndexA;\n"
121         "       int     m_childIndexB;\n"
122         "       int m_unused1;\n"
123         "       int m_unused2;\n"
124         "};\n"
125         "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
126         "{\n"
127         "       return (int)contact->m_worldNormalOnB.w;\n"
128         "};\n"
129         "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
130         "{\n"
131         "       contact->m_worldNormalOnB.w = (float)numPoints;\n"
132         "};\n"
133         "#endif //B3_CONTACT4DATA_H\n"
134         "#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n"
135         "#define B3_CONVEX_POLYHEDRON_DATA_H\n"
136         "#ifndef B3_FLOAT4_H\n"
137         "#ifdef __cplusplus\n"
138         "#else\n"
139         "#endif \n"
140         "#endif //B3_FLOAT4_H\n"
141         "#ifndef B3_QUAT_H\n"
142         "#define B3_QUAT_H\n"
143         "#ifndef B3_PLATFORM_DEFINITIONS_H\n"
144         "#ifdef __cplusplus\n"
145         "#else\n"
146         "#endif\n"
147         "#endif\n"
148         "#ifndef B3_FLOAT4_H\n"
149         "#ifdef __cplusplus\n"
150         "#else\n"
151         "#endif \n"
152         "#endif //B3_FLOAT4_H\n"
153         "#ifdef __cplusplus\n"
154         "#else\n"
155         "       typedef float4  b3Quat;\n"
156         "       #define b3QuatConstArg const b3Quat\n"
157         "       \n"
158         "       \n"
159         "inline float4 b3FastNormalize4(float4 v)\n"
160         "{\n"
161         "       v = (float4)(v.xyz,0.f);\n"
162         "       return fast_normalize(v);\n"
163         "}\n"
164         "       \n"
165         "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
166         "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n"
167         "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
168         "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
169         "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n"
170         "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
171         "{\n"
172         "       b3Quat ans;\n"
173         "       ans = b3Cross3( a, b );\n"
174         "       ans += a.w*b+b.w*a;\n"
175         "//     ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
176         "       ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
177         "       return ans;\n"
178         "}\n"
179         "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n"
180         "{\n"
181         "       b3Quat q;\n"
182         "       q=in;\n"
183         "       //return b3FastNormalize4(in);\n"
184         "       float len = native_sqrt(dot(q, q));\n"
185         "       if(len > 0.f)\n"
186         "       {\n"
187         "               q *= 1.f / len;\n"
188         "       }\n"
189         "       else\n"
190         "       {\n"
191         "               q.x = q.y = q.z = 0.f;\n"
192         "               q.w = 1.f;\n"
193         "       }\n"
194         "       return q;\n"
195         "}\n"
196         "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
197         "{\n"
198         "       b3Quat qInv = b3QuatInvert( q );\n"
199         "       float4 vcpy = vec;\n"
200         "       vcpy.w = 0.f;\n"
201         "       float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n"
202         "       return out;\n"
203         "}\n"
204         "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n"
205         "{\n"
206         "       return (b3Quat)(-q.xyz, q.w);\n"
207         "}\n"
208         "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n"
209         "{\n"
210         "       return (b3Quat)(-q.xyz, q.w);\n"
211         "}\n"
212         "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
213         "{\n"
214         "       return b3QuatRotate( b3QuatInvert( q ), vec );\n"
215         "}\n"
216         "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n"
217         "{\n"
218         "       return b3QuatRotate( orientation, point ) + (translation);\n"
219         "}\n"
220         "       \n"
221         "#endif \n"
222         "#endif //B3_QUAT_H\n"
223         "typedef struct b3GpuFace b3GpuFace_t;\n"
224         "struct b3GpuFace\n"
225         "{\n"
226         "       b3Float4 m_plane;\n"
227         "       int m_indexOffset;\n"
228         "       int m_numIndices;\n"
229         "       int m_unusedPadding1;\n"
230         "       int m_unusedPadding2;\n"
231         "};\n"
232         "typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n"
233         "struct b3ConvexPolyhedronData\n"
234         "{\n"
235         "       b3Float4                m_localCenter;\n"
236         "       b3Float4                m_extents;\n"
237         "       b3Float4                mC;\n"
238         "       b3Float4                mE;\n"
239         "       float                   m_radius;\n"
240         "       int     m_faceOffset;\n"
241         "       int m_numFaces;\n"
242         "       int     m_numVertices;\n"
243         "       int m_vertexOffset;\n"
244         "       int     m_uniqueEdgesOffset;\n"
245         "       int     m_numUniqueEdges;\n"
246         "       int m_unused;\n"
247         "};\n"
248         "#endif //B3_CONVEX_POLYHEDRON_DATA_H\n"
249         "#ifndef B3_COLLIDABLE_H\n"
250         "#define B3_COLLIDABLE_H\n"
251         "#ifndef B3_FLOAT4_H\n"
252         "#ifdef __cplusplus\n"
253         "#else\n"
254         "#endif \n"
255         "#endif //B3_FLOAT4_H\n"
256         "#ifndef B3_QUAT_H\n"
257         "#ifdef __cplusplus\n"
258         "#else\n"
259         "#endif \n"
260         "#endif //B3_QUAT_H\n"
261         "enum b3ShapeTypes\n"
262         "{\n"
263         "       SHAPE_HEIGHT_FIELD=1,\n"
264         "       SHAPE_CONVEX_HULL=3,\n"
265         "       SHAPE_PLANE=4,\n"
266         "       SHAPE_CONCAVE_TRIMESH=5,\n"
267         "       SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n"
268         "       SHAPE_SPHERE=7,\n"
269         "       MAX_NUM_SHAPE_TYPES,\n"
270         "};\n"
271         "typedef struct b3Collidable b3Collidable_t;\n"
272         "struct b3Collidable\n"
273         "{\n"
274         "       union {\n"
275         "               int m_numChildShapes;\n"
276         "               int m_bvhIndex;\n"
277         "       };\n"
278         "       union\n"
279         "       {\n"
280         "               float m_radius;\n"
281         "               int     m_compoundBvhIndex;\n"
282         "       };\n"
283         "       int m_shapeType;\n"
284         "       int m_shapeIndex;\n"
285         "};\n"
286         "typedef struct b3GpuChildShape b3GpuChildShape_t;\n"
287         "struct b3GpuChildShape\n"
288         "{\n"
289         "       b3Float4        m_childPosition;\n"
290         "       b3Quat          m_childOrientation;\n"
291         "       int m_shapeIndex;\n"
292         "       int m_unused0;\n"
293         "       int m_unused1;\n"
294         "       int m_unused2;\n"
295         "};\n"
296         "struct b3CompoundOverlappingPair\n"
297         "{\n"
298         "       int m_bodyIndexA;\n"
299         "       int m_bodyIndexB;\n"
300         "//     int     m_pairType;\n"
301         "       int m_childShapeIndexA;\n"
302         "       int m_childShapeIndexB;\n"
303         "};\n"
304         "#endif //B3_COLLIDABLE_H\n"
305         "#ifndef B3_RIGIDBODY_DATA_H\n"
306         "#define B3_RIGIDBODY_DATA_H\n"
307         "#ifndef B3_FLOAT4_H\n"
308         "#ifdef __cplusplus\n"
309         "#else\n"
310         "#endif \n"
311         "#endif //B3_FLOAT4_H\n"
312         "#ifndef B3_QUAT_H\n"
313         "#ifdef __cplusplus\n"
314         "#else\n"
315         "#endif \n"
316         "#endif //B3_QUAT_H\n"
317         "#ifndef B3_MAT3x3_H\n"
318         "#define B3_MAT3x3_H\n"
319         "#ifndef B3_QUAT_H\n"
320         "#ifdef __cplusplus\n"
321         "#else\n"
322         "#endif \n"
323         "#endif //B3_QUAT_H\n"
324         "#ifdef __cplusplus\n"
325         "#else\n"
326         "typedef struct\n"
327         "{\n"
328         "       b3Float4 m_row[3];\n"
329         "}b3Mat3x3;\n"
330         "#define b3Mat3x3ConstArg const b3Mat3x3\n"
331         "#define b3GetRow(m,row) (m.m_row[row])\n"
332         "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n"
333         "{\n"
334         "       b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
335         "       b3Mat3x3 out;\n"
336         "       out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
337         "       out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
338         "       out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"
339         "       out.m_row[0].w = 0.f;\n"
340         "       out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"
341         "       out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"
342         "       out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"
343         "       out.m_row[1].w = 0.f;\n"
344         "       out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"
345         "       out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"
346         "       out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"
347         "       out.m_row[2].w = 0.f;\n"
348         "       return out;\n"
349         "}\n"
350         "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n"
351         "{\n"
352         "       b3Mat3x3 out;\n"
353         "       out.m_row[0] = fabs(matIn.m_row[0]);\n"
354         "       out.m_row[1] = fabs(matIn.m_row[1]);\n"
355         "       out.m_row[2] = fabs(matIn.m_row[2]);\n"
356         "       return out;\n"
357         "}\n"
358         "__inline\n"
359         "b3Mat3x3 mtZero();\n"
360         "__inline\n"
361         "b3Mat3x3 mtIdentity();\n"
362         "__inline\n"
363         "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n"
364         "__inline\n"
365         "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n"
366         "__inline\n"
367         "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n"
368         "__inline\n"
369         "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n"
370         "__inline\n"
371         "b3Mat3x3 mtZero()\n"
372         "{\n"
373         "       b3Mat3x3 m;\n"
374         "       m.m_row[0] = (b3Float4)(0.f);\n"
375         "       m.m_row[1] = (b3Float4)(0.f);\n"
376         "       m.m_row[2] = (b3Float4)(0.f);\n"
377         "       return m;\n"
378         "}\n"
379         "__inline\n"
380         "b3Mat3x3 mtIdentity()\n"
381         "{\n"
382         "       b3Mat3x3 m;\n"
383         "       m.m_row[0] = (b3Float4)(1,0,0,0);\n"
384         "       m.m_row[1] = (b3Float4)(0,1,0,0);\n"
385         "       m.m_row[2] = (b3Float4)(0,0,1,0);\n"
386         "       return m;\n"
387         "}\n"
388         "__inline\n"
389         "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n"
390         "{\n"
391         "       b3Mat3x3 out;\n"
392         "       out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
393         "       out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
394         "       out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
395         "       return out;\n"
396         "}\n"
397         "__inline\n"
398         "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n"
399         "{\n"
400         "       b3Mat3x3 transB;\n"
401         "       transB = mtTranspose( b );\n"
402         "       b3Mat3x3 ans;\n"
403         "       //      why this doesn't run when 0ing in the for{}\n"
404         "       a.m_row[0].w = 0.f;\n"
405         "       a.m_row[1].w = 0.f;\n"
406         "       a.m_row[2].w = 0.f;\n"
407         "       for(int i=0; i<3; i++)\n"
408         "       {\n"
409         "//     a.m_row[i].w = 0.f;\n"
410         "               ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n"
411         "               ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n"
412         "               ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n"
413         "               ans.m_row[i].w = 0.f;\n"
414         "       }\n"
415         "       return ans;\n"
416         "}\n"
417         "__inline\n"
418         "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n"
419         "{\n"
420         "       b3Float4 ans;\n"
421         "       ans.x = b3Dot3F4( a.m_row[0], b );\n"
422         "       ans.y = b3Dot3F4( a.m_row[1], b );\n"
423         "       ans.z = b3Dot3F4( a.m_row[2], b );\n"
424         "       ans.w = 0.f;\n"
425         "       return ans;\n"
426         "}\n"
427         "__inline\n"
428         "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n"
429         "{\n"
430         "       b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
431         "       b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
432         "       b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
433         "       b3Float4 ans;\n"
434         "       ans.x = b3Dot3F4( a, colx );\n"
435         "       ans.y = b3Dot3F4( a, coly );\n"
436         "       ans.z = b3Dot3F4( a, colz );\n"
437         "       return ans;\n"
438         "}\n"
439         "#endif\n"
440         "#endif //B3_MAT3x3_H\n"
441         "typedef struct b3RigidBodyData b3RigidBodyData_t;\n"
442         "struct b3RigidBodyData\n"
443         "{\n"
444         "       b3Float4                                m_pos;\n"
445         "       b3Quat                                  m_quat;\n"
446         "       b3Float4                                m_linVel;\n"
447         "       b3Float4                                m_angVel;\n"
448         "       int                                     m_collidableIdx;\n"
449         "       float                           m_invMass;\n"
450         "       float                           m_restituitionCoeff;\n"
451         "       float                           m_frictionCoeff;\n"
452         "};\n"
453         "typedef struct b3InertiaData b3InertiaData_t;\n"
454         "struct b3InertiaData\n"
455         "{\n"
456         "       b3Mat3x3 m_invInertiaWorld;\n"
457         "       b3Mat3x3 m_initInvInertia;\n"
458         "};\n"
459         "#endif //B3_RIGIDBODY_DATA_H\n"
460         "       \n"
461         "#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n"
462         "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
463         "#define make_float4 (float4)\n"
464         "#define make_float2 (float2)\n"
465         "#define make_uint4 (uint4)\n"
466         "#define make_int4 (int4)\n"
467         "#define make_uint2 (uint2)\n"
468         "#define make_int2 (int2)\n"
469         "__inline\n"
470         "float fastDiv(float numerator, float denominator)\n"
471         "{\n"
472         "       return native_divide(numerator, denominator);   \n"
473         "//     return numerator/denominator;   \n"
474         "}\n"
475         "__inline\n"
476         "float4 fastDiv4(float4 numerator, float4 denominator)\n"
477         "{\n"
478         "       return native_divide(numerator, denominator);   \n"
479         "}\n"
480         "__inline\n"
481         "float4 cross3(float4 a, float4 b)\n"
482         "{\n"
483         "       return cross(a,b);\n"
484         "}\n"
485         "//#define dot3F4 dot\n"
486         "__inline\n"
487         "float dot3F4(float4 a, float4 b)\n"
488         "{\n"
489         "       float4 a1 = make_float4(a.xyz,0.f);\n"
490         "       float4 b1 = make_float4(b.xyz,0.f);\n"
491         "       return dot(a1, b1);\n"
492         "}\n"
493         "__inline\n"
494         "float4 fastNormalize4(float4 v)\n"
495         "{\n"
496         "       return fast_normalize(v);\n"
497         "}\n"
498         "///////////////////////////////////////\n"
499         "//     Quaternion\n"
500         "///////////////////////////////////////\n"
501         "typedef float4 Quaternion;\n"
502         "__inline\n"
503         "Quaternion qtMul(Quaternion a, Quaternion b);\n"
504         "__inline\n"
505         "Quaternion qtNormalize(Quaternion in);\n"
506         "__inline\n"
507         "float4 qtRotate(Quaternion q, float4 vec);\n"
508         "__inline\n"
509         "Quaternion qtInvert(Quaternion q);\n"
510         "__inline\n"
511         "Quaternion qtMul(Quaternion a, Quaternion b)\n"
512         "{\n"
513         "       Quaternion ans;\n"
514         "       ans = cross3( a, b );\n"
515         "       ans += a.w*b+b.w*a;\n"
516         "//     ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
517         "       ans.w = a.w*b.w - dot3F4(a, b);\n"
518         "       return ans;\n"
519         "}\n"
520         "__inline\n"
521         "Quaternion qtNormalize(Quaternion in)\n"
522         "{\n"
523         "       return fastNormalize4(in);\n"
524         "//     in /= length( in );\n"
525         "//     return in;\n"
526         "}\n"
527         "__inline\n"
528         "float4 qtRotate(Quaternion q, float4 vec)\n"
529         "{\n"
530         "       Quaternion qInv = qtInvert( q );\n"
531         "       float4 vcpy = vec;\n"
532         "       vcpy.w = 0.f;\n"
533         "       float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
534         "       return out;\n"
535         "}\n"
536         "__inline\n"
537         "Quaternion qtInvert(Quaternion q)\n"
538         "{\n"
539         "       return (Quaternion)(-q.xyz, q.w);\n"
540         "}\n"
541         "__inline\n"
542         "float4 qtInvRotate(const Quaternion q, float4 vec)\n"
543         "{\n"
544         "       return qtRotate( qtInvert( q ), vec );\n"
545         "}\n"
546         "__inline\n"
547         "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
548         "{\n"
549         "       return qtRotate( *orientation, *p ) + (*translation);\n"
550         "}\n"
551         "__inline\n"
552         "float4 normalize3(const float4 a)\n"
553         "{\n"
554         "       float4 n = make_float4(a.x, a.y, a.z, 0.f);\n"
555         "       return fastNormalize4( n );\n"
556         "}\n"
557         "__inline float4 lerp3(const float4 a,const float4 b, float  t)\n"
558         "{\n"
559         "       return make_float4(     a.x + (b.x - a.x) * t,\n"
560         "                                               a.y + (b.y - a.y) * t,\n"
561         "                                               a.z + (b.z - a.z) * t,\n"
562         "                                               0.f);\n"
563         "}\n"
564         "// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n"
565         "int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n"
566         "{\n"
567         "       \n"
568         "       int ve;\n"
569         "       float ds, de;\n"
570         "       int numVertsOut = 0;\n"
571         "    //double-check next test\n"
572         "       if (numVertsIn < 2)\n"
573         "               return 0;\n"
574         "    \n"
575         "       float4 firstVertex=pVtxIn[numVertsIn-1];\n"
576         "       float4 endVertex = pVtxIn[0];\n"
577         "       \n"
578         "       ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n"
579         "    \n"
580         "       for (ve = 0; ve < numVertsIn; ve++)\n"
581         "       {\n"
582         "               endVertex=pVtxIn[ve];\n"
583         "               de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n"
584         "               if (ds<0)\n"
585         "               {\n"
586         "                       if (de<0)\n"
587         "                       {\n"
588         "                               // Start < 0, end < 0, so output endVertex\n"
589         "                               ppVtxOut[numVertsOut++] = endVertex;\n"
590         "                       }\n"
591         "                       else\n"
592         "                       {\n"
593         "                               // Start < 0, end >= 0, so output intersection\n"
594         "                               ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n"
595         "                       }\n"
596         "               }\n"
597         "               else\n"
598         "               {\n"
599         "                       if (de<0)\n"
600         "                       {\n"
601         "                               // Start >= 0, end < 0 so output intersection and end\n"
602         "                               ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n"
603         "                               ppVtxOut[numVertsOut++] = endVertex;\n"
604         "                       }\n"
605         "               }\n"
606         "               firstVertex = endVertex;\n"
607         "               ds = de;\n"
608         "       }\n"
609         "       return numVertsOut;\n"
610         "}\n"
611         "// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n"
612         "int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n"
613         "{\n"
614         "       \n"
615         "       int ve;\n"
616         "       float ds, de;\n"
617         "       int numVertsOut = 0;\n"
618         "//double-check next test\n"
619         "       if (numVertsIn < 2)\n"
620         "               return 0;\n"
621         "       float4 firstVertex=pVtxIn[numVertsIn-1];\n"
622         "       float4 endVertex = pVtxIn[0];\n"
623         "       \n"
624         "       ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n"
625         "       for (ve = 0; ve < numVertsIn; ve++)\n"
626         "       {\n"
627         "               endVertex=pVtxIn[ve];\n"
628         "               de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n"
629         "               if (ds<0)\n"
630         "               {\n"
631         "                       if (de<0)\n"
632         "                       {\n"
633         "                               // Start < 0, end < 0, so output endVertex\n"
634         "                               ppVtxOut[numVertsOut++] = endVertex;\n"
635         "                       }\n"
636         "                       else\n"
637         "                       {\n"
638         "                               // Start < 0, end >= 0, so output intersection\n"
639         "                               ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n"
640         "                       }\n"
641         "               }\n"
642         "               else\n"
643         "               {\n"
644         "                       if (de<0)\n"
645         "                       {\n"
646         "                               // Start >= 0, end < 0 so output intersection and end\n"
647         "                               ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n"
648         "                               ppVtxOut[numVertsOut++] = endVertex;\n"
649         "                       }\n"
650         "               }\n"
651         "               firstVertex = endVertex;\n"
652         "               ds = de;\n"
653         "       }\n"
654         "       return numVertsOut;\n"
655         "}\n"
656         "int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA,  \n"
657         "       const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n"
658         "       float4* worldVertsB2, int capacityWorldVertsB2,\n"
659         "       const float minDist, float maxDist,\n"
660         "       __global const float4* vertices,\n"
661         "       __global const b3GpuFace_t* faces,\n"
662         "       __global const int* indices,\n"
663         "       float4* contactsOut,\n"
664         "       int contactCapacity)\n"
665         "{\n"
666         "       int numContactsOut = 0;\n"
667         "       float4* pVtxIn = worldVertsB1;\n"
668         "       float4* pVtxOut = worldVertsB2;\n"
669         "       \n"
670         "       int numVertsIn = numWorldVertsB1;\n"
671         "       int numVertsOut = 0;\n"
672         "       int closestFaceA=-1;\n"
673         "       {\n"
674         "               float dmin = FLT_MAX;\n"
675         "               for(int face=0;face<hullA->m_numFaces;face++)\n"
676         "               {\n"
677         "                       const float4 Normal = make_float4(\n"
678         "                               faces[hullA->m_faceOffset+face].m_plane.x, \n"
679         "                               faces[hullA->m_faceOffset+face].m_plane.y, \n"
680         "                               faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n"
681         "                       const float4 faceANormalWS = qtRotate(ornA,Normal);\n"
682         "               \n"
683         "                       float d = dot3F4(faceANormalWS,separatingNormal);\n"
684         "                       if (d < dmin)\n"
685         "                       {\n"
686         "                               dmin = d;\n"
687         "                               closestFaceA = face;\n"
688         "                       }\n"
689         "               }\n"
690         "       }\n"
691         "       if (closestFaceA<0)\n"
692         "               return numContactsOut;\n"
693         "       b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n"
694         "       // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
695         "       int numVerticesA = polyA.m_numIndices;\n"
696         "       for(int e0=0;e0<numVerticesA;e0++)\n"
697         "       {\n"
698         "               const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n"
699         "               const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n"
700         "               const float4 edge0 = a - b;\n"
701         "               const float4 WorldEdge0 = qtRotate(ornA,edge0);\n"
702         "               float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n"
703         "               float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n"
704         "               float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n"
705         "               float4 worldA1 = transform(&a,&posA,&ornA);\n"
706         "               float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n"
707         "               \n"
708         "               float4 planeNormalWS = planeNormalWS1;\n"
709         "               float planeEqWS=planeEqWS1;\n"
710         "               \n"
711         "               //clip face\n"
712         "               //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n"
713         "               numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n"
714         "               //btSwap(pVtxIn,pVtxOut);\n"
715         "               float4* tmp = pVtxOut;\n"
716         "               pVtxOut = pVtxIn;\n"
717         "               pVtxIn = tmp;\n"
718         "               numVertsIn = numVertsOut;\n"
719         "               numVertsOut = 0;\n"
720         "       }\n"
721         "       \n"
722         "       // only keep points that are behind the witness face\n"
723         "       {\n"
724         "               float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n"
725         "               float localPlaneEq = polyA.m_plane.w;\n"
726         "               float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n"
727         "               float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n"
728         "               for (int i=0;i<numVertsIn;i++)\n"
729         "               {\n"
730         "                       float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n"
731         "                       if (depth <=minDist)\n"
732         "                       {\n"
733         "                               depth = minDist;\n"
734         "                       }\n"
735         "                       if (depth <=maxDist)\n"
736         "                       {\n"
737         "                               float4 pointInWorld = pVtxIn[i];\n"
738         "                               //resultOut.addContactPoint(separatingNormal,point,depth);\n"
739         "                               contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n"
740         "                       }\n"
741         "               }\n"
742         "       }\n"
743         "       return numContactsOut;\n"
744         "}\n"
745         "int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA,  \n"
746         "       const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n"
747         "       float4* worldVertsB2, int capacityWorldVertsB2,\n"
748         "       const float minDist, float maxDist,\n"
749         "       const float4* verticesA,\n"
750         "       const b3GpuFace_t* facesA,\n"
751         "       const int* indicesA,\n"
752         "       __global const float4* verticesB,\n"
753         "       __global const b3GpuFace_t* facesB,\n"
754         "       __global const int* indicesB,\n"
755         "       float4* contactsOut,\n"
756         "       int contactCapacity)\n"
757         "{\n"
758         "       int numContactsOut = 0;\n"
759         "       float4* pVtxIn = worldVertsB1;\n"
760         "       float4* pVtxOut = worldVertsB2;\n"
761         "       \n"
762         "       int numVertsIn = numWorldVertsB1;\n"
763         "       int numVertsOut = 0;\n"
764         "       int closestFaceA=-1;\n"
765         "       {\n"
766         "               float dmin = FLT_MAX;\n"
767         "               for(int face=0;face<hullA->m_numFaces;face++)\n"
768         "               {\n"
769         "                       const float4 Normal = make_float4(\n"
770         "                               facesA[hullA->m_faceOffset+face].m_plane.x, \n"
771         "                               facesA[hullA->m_faceOffset+face].m_plane.y, \n"
772         "                               facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n"
773         "                       const float4 faceANormalWS = qtRotate(ornA,Normal);\n"
774         "               \n"
775         "                       float d = dot3F4(faceANormalWS,separatingNormal);\n"
776         "                       if (d < dmin)\n"
777         "                       {\n"
778         "                               dmin = d;\n"
779         "                               closestFaceA = face;\n"
780         "                       }\n"
781         "               }\n"
782         "       }\n"
783         "       if (closestFaceA<0)\n"
784         "               return numContactsOut;\n"
785         "       b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n"
786         "       // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
787         "       int numVerticesA = polyA.m_numIndices;\n"
788         "       for(int e0=0;e0<numVerticesA;e0++)\n"
789         "       {\n"
790         "               const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n"
791         "               const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n"
792         "               const float4 edge0 = a - b;\n"
793         "               const float4 WorldEdge0 = qtRotate(ornA,edge0);\n"
794         "               float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n"
795         "               float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n"
796         "               float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n"
797         "               float4 worldA1 = transform(&a,&posA,&ornA);\n"
798         "               float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n"
799         "               \n"
800         "               float4 planeNormalWS = planeNormalWS1;\n"
801         "               float planeEqWS=planeEqWS1;\n"
802         "               \n"
803         "               //clip face\n"
804         "               //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n"
805         "               numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n"
806         "               //btSwap(pVtxIn,pVtxOut);\n"
807         "               float4* tmp = pVtxOut;\n"
808         "               pVtxOut = pVtxIn;\n"
809         "               pVtxIn = tmp;\n"
810         "               numVertsIn = numVertsOut;\n"
811         "               numVertsOut = 0;\n"
812         "       }\n"
813         "       \n"
814         "       // only keep points that are behind the witness face\n"
815         "       {\n"
816         "               float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n"
817         "               float localPlaneEq = polyA.m_plane.w;\n"
818         "               float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n"
819         "               float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n"
820         "               for (int i=0;i<numVertsIn;i++)\n"
821         "               {\n"
822         "                       float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n"
823         "                       if (depth <=minDist)\n"
824         "                       {\n"
825         "                               depth = minDist;\n"
826         "                       }\n"
827         "                       if (depth <=maxDist)\n"
828         "                       {\n"
829         "                               float4 pointInWorld = pVtxIn[i];\n"
830         "                               //resultOut.addContactPoint(separatingNormal,point,depth);\n"
831         "                               contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n"
832         "                       }\n"
833         "               }\n"
834         "       }\n"
835         "       return numContactsOut;\n"
836         "}\n"
837         "int    clipHullAgainstHull(const float4 separatingNormal,\n"
838         "       __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n"
839         "       const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n"
840         "       float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n"
841         "       const float minDist, float maxDist,\n"
842         "       __global const float4* vertices,\n"
843         "       __global const b3GpuFace_t* faces,\n"
844         "       __global const int* indices,\n"
845         "       float4* localContactsOut,\n"
846         "       int localContactCapacity)\n"
847         "{\n"
848         "       int numContactsOut = 0;\n"
849         "       int numWorldVertsB1= 0;\n"
850         "       int closestFaceB=-1;\n"
851         "       float dmax = -FLT_MAX;\n"
852         "       {\n"
853         "               for(int face=0;face<hullB->m_numFaces;face++)\n"
854         "               {\n"
855         "                       const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n"
856         "                               faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n"
857         "                       const float4 WorldNormal = qtRotate(ornB, Normal);\n"
858         "                       float d = dot3F4(WorldNormal,separatingNormal);\n"
859         "                       if (d > dmax)\n"
860         "                       {\n"
861         "                               dmax = d;\n"
862         "                               closestFaceB = face;\n"
863         "                       }\n"
864         "               }\n"
865         "       }\n"
866         "       {\n"
867         "               const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
868         "               const int numVertices = polyB.m_numIndices;\n"
869         "               for(int e0=0;e0<numVertices;e0++)\n"
870         "               {\n"
871         "                       const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n"
872         "                       worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n"
873         "               }\n"
874         "       }\n"
875         "       if (closestFaceB>=0)\n"
876         "       {\n"
877         "               numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n"
878         "                               posA,ornA,\n"
879         "                               worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n"
880         "                               faces,\n"
881         "                               indices,localContactsOut,localContactCapacity);\n"
882         "       }\n"
883         "       return numContactsOut;\n"
884         "}\n"
885         "int    clipHullAgainstHullLocalA(const float4 separatingNormal,\n"
886         "       const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n"
887         "       const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n"
888         "       float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n"
889         "       const float minDist, float maxDist,\n"
890         "       const float4* verticesA,\n"
891         "       const b3GpuFace_t* facesA,\n"
892         "       const int* indicesA,\n"
893         "       __global const float4* verticesB,\n"
894         "       __global const b3GpuFace_t* facesB,\n"
895         "       __global const int* indicesB,\n"
896         "       float4* localContactsOut,\n"
897         "       int localContactCapacity)\n"
898         "{\n"
899         "       int numContactsOut = 0;\n"
900         "       int numWorldVertsB1= 0;\n"
901         "       int closestFaceB=-1;\n"
902         "       float dmax = -FLT_MAX;\n"
903         "       {\n"
904         "               for(int face=0;face<hullB->m_numFaces;face++)\n"
905         "               {\n"
906         "                       const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n"
907         "                               facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n"
908         "                       const float4 WorldNormal = qtRotate(ornB, Normal);\n"
909         "                       float d = dot3F4(WorldNormal,separatingNormal);\n"
910         "                       if (d > dmax)\n"
911         "                       {\n"
912         "                               dmax = d;\n"
913         "                               closestFaceB = face;\n"
914         "                       }\n"
915         "               }\n"
916         "       }\n"
917         "       {\n"
918         "               const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n"
919         "               const int numVertices = polyB.m_numIndices;\n"
920         "               for(int e0=0;e0<numVertices;e0++)\n"
921         "               {\n"
922         "                       const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n"
923         "                       worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n"
924         "               }\n"
925         "       }\n"
926         "       if (closestFaceB>=0)\n"
927         "       {\n"
928         "               numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n"
929         "                               posA,ornA,\n"
930         "                               worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n"
931         "                               verticesA,facesA,indicesA,\n"
932         "                               verticesB,facesB,indicesB,\n"
933         "                               localContactsOut,localContactCapacity);\n"
934         "       }\n"
935         "       return numContactsOut;\n"
936         "}\n"
937         "#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];\n"
938         "#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}\n"
939         "#define REDUCE_MAX(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }\n"
940         "#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }\n"
941         "int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n"
942         "{\n"
943         "       if( nPoints == 0 )\n"
944         "        return 0;\n"
945         "    \n"
946         "    if (nPoints <=4)\n"
947         "        return nPoints;\n"
948         "    \n"
949         "    \n"
950         "    if (nPoints >64)\n"
951         "        nPoints = 64;\n"
952         "    \n"
953         "       float4 center = make_float4(0.f);\n"
954         "       {\n"
955         "               \n"
956         "               for (int i=0;i<nPoints;i++)\n"
957         "                       center += p[i];\n"
958         "               center /= (float)nPoints;\n"
959         "       }\n"
960         "    \n"
961         "       \n"
962         "    \n"
963         "       //      sample 4 directions\n"
964         "    \n"
965         "    float4 aVector = p[0] - center;\n"
966         "    float4 u = cross3( nearNormal, aVector );\n"
967         "    float4 v = cross3( nearNormal, u );\n"
968         "    u = normalize3( u );\n"
969         "    v = normalize3( v );\n"
970         "    \n"
971         "    \n"
972         "    //keep point with deepest penetration\n"
973         "    float minW= FLT_MAX;\n"
974         "    \n"
975         "    int minIndex=-1;\n"
976         "    \n"
977         "    float4 maxDots;\n"
978         "    maxDots.x = FLT_MIN;\n"
979         "    maxDots.y = FLT_MIN;\n"
980         "    maxDots.z = FLT_MIN;\n"
981         "    maxDots.w = FLT_MIN;\n"
982         "    \n"
983         "    // idx, distance\n"
984         "    for(int ie = 0; ie<nPoints; ie++ )\n"
985         "    {\n"
986         "        if (p[ie].w<minW)\n"
987         "        {\n"
988         "            minW = p[ie].w;\n"
989         "            minIndex=ie;\n"
990         "        }\n"
991         "        float f;\n"
992         "        float4 r = p[ie]-center;\n"
993         "        f = dot3F4( u, r );\n"
994         "        if (f<maxDots.x)\n"
995         "        {\n"
996         "            maxDots.x = f;\n"
997         "            contactIdx[0].x = ie;\n"
998         "        }\n"
999         "        \n"
1000         "        f = dot3F4( -u, r );\n"
1001         "        if (f<maxDots.y)\n"
1002         "        {\n"
1003         "            maxDots.y = f;\n"
1004         "            contactIdx[0].y = ie;\n"
1005         "        }\n"
1006         "        \n"
1007         "        \n"
1008         "        f = dot3F4( v, r );\n"
1009         "        if (f<maxDots.z)\n"
1010         "        {\n"
1011         "            maxDots.z = f;\n"
1012         "            contactIdx[0].z = ie;\n"
1013         "        }\n"
1014         "        \n"
1015         "        f = dot3F4( -v, r );\n"
1016         "        if (f<maxDots.w)\n"
1017         "        {\n"
1018         "            maxDots.w = f;\n"
1019         "            contactIdx[0].w = ie;\n"
1020         "        }\n"
1021         "        \n"
1022         "    }\n"
1023         "    \n"
1024         "    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n"
1025         "    {\n"
1026         "        //replace the first contact with minimum (todo: replace contact with least penetration)\n"
1027         "        contactIdx[0].x = minIndex;\n"
1028         "    }\n"
1029         "    \n"
1030         "    return 4;\n"
1031         "    \n"
1032         "}\n"
1033         "int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n"
1034         "{\n"
1035         "    contactIdx[0] = 0;\n"
1036         "    contactIdx[1] = 1;\n"
1037         "    contactIdx[2] = 2;\n"
1038         "    contactIdx[3] = 3;\n"
1039         "    \n"
1040         "       if( nPoints == 0 ) return 0;\n"
1041         "    \n"
1042         "       nPoints = min2( nPoints, 4 );\n"
1043         "    return nPoints;\n"
1044         "    \n"
1045         "}\n"
1046         "int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n"
1047         "{\n"
1048         "       if( nPoints == 0 ) return 0;\n"
1049         "       nPoints = min2( nPoints, 64 );\n"
1050         "       float4 center = make_float4(0.f);\n"
1051         "       {\n"
1052         "               float4 v[64];\n"
1053         "               for (int i=0;i<nPoints;i++)\n"
1054         "                       v[i] = p[i];\n"
1055         "               //memcpy( v, p, nPoints*sizeof(float4) );\n"
1056         "               PARALLEL_SUM( v, nPoints );\n"
1057         "               center = v[0]/(float)nPoints;\n"
1058         "       }\n"
1059         "       \n"
1060         "       {       //      sample 4 directions\n"
1061         "               if( nPoints < 4 )\n"
1062         "               {\n"
1063         "                       for(int i=0; i<nPoints; i++) \n"
1064         "                               contactIdx[i] = i;\n"
1065         "                       return nPoints;\n"
1066         "               }\n"
1067         "               float4 aVector = p[0] - center;\n"
1068         "               float4 u = cross3( nearNormal, aVector );\n"
1069         "               float4 v = cross3( nearNormal, u );\n"
1070         "               u = normalize3( u );\n"
1071         "               v = normalize3( v );\n"
1072         "               int idx[4];\n"
1073         "               float2 max00 = make_float2(0,FLT_MAX);\n"
1074         "               {\n"
1075         "                       //      idx, distance\n"
1076         "                       {\n"
1077         "                               {\n"
1078         "                                       int4 a[64];\n"
1079         "                                       for(int ie = 0; ie<nPoints; ie++ )\n"
1080         "                                       {\n"
1081         "                                               \n"
1082         "                                               \n"
1083         "                                               float f;\n"
1084         "                                               float4 r = p[ie]-center;\n"
1085         "                                               f = dot3F4( u, r );\n"
1086         "                                               a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n"
1087         "                                               f = dot3F4( -u, r );\n"
1088         "                                               a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n"
1089         "                                               f = dot3F4( v, r );\n"
1090         "                                               a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n"
1091         "                                               f = dot3F4( -v, r );\n"
1092         "                                               a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n"
1093         "                                       }\n"
1094         "                                       for(int ie=0; ie<nPoints; ie++)\n"
1095         "                                       {\n"
1096         "                                               a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;\n"
1097         "                                               a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n"
1098         "                                               a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n"
1099         "                                               a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n"
1100         "                                       }\n"
1101         "                                       idx[0] = (int)a[0].x & 0xff;\n"
1102         "                                       idx[1] = (int)a[0].y & 0xff;\n"
1103         "                                       idx[2] = (int)a[0].z & 0xff;\n"
1104         "                                       idx[3] = (int)a[0].w & 0xff;\n"
1105         "                               }\n"
1106         "                       }\n"
1107         "                       {\n"
1108         "                               float2 h[64];\n"
1109         "                               PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n"
1110         "                               REDUCE_MIN( h, nPoints );\n"
1111         "                               max00 = h[0];\n"
1112         "                       }\n"
1113         "               }\n"
1114         "               contactIdx[0] = idx[0];\n"
1115         "               contactIdx[1] = idx[1];\n"
1116         "               contactIdx[2] = idx[2];\n"
1117         "               contactIdx[3] = idx[3];\n"
1118         "               return 4;\n"
1119         "       }\n"
1120         "}\n"
1121         "__kernel void   extractManifoldAndAddContactKernel(__global const int4* pairs, \n"
1122         "                                                                                                                                       __global const b3RigidBodyData_t* rigidBodies, \n"
1123         "                                                                                                                                       __global const float4* closestPointsWorld,\n"
1124         "                                                                                                                                       __global const float4* separatingNormalsWorld,\n"
1125         "                                                                                                                                       __global const int* contactCounts,\n"
1126         "                                                                                                                                       __global const int* contactOffsets,\n"
1127         "                                                                                                                                       __global struct b3Contact4Data* restrict contactsOut,\n"
1128         "                                                                                                                                       counter32_t nContactsOut,\n"
1129         "                                                                                                                                       int contactCapacity,\n"
1130         "                                                                                                                                       int numPairs,\n"
1131         "                                                                                                                                       int pairIndex\n"
1132         "                                                                                                                                       )\n"
1133         "{\n"
1134         "       int idx = get_global_id(0);\n"
1135         "       \n"
1136         "       if (idx<numPairs)\n"
1137         "       {\n"
1138         "               float4 normal = separatingNormalsWorld[idx];\n"
1139         "               int nPoints = contactCounts[idx];\n"
1140         "               __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];\n"
1141         "               float4 localPoints[64];\n"
1142         "               for (int i=0;i<nPoints;i++)\n"
1143         "               {\n"
1144         "                       localPoints[i] = pointsIn[i];\n"
1145         "               }\n"
1146         "               int contactIdx[4];// = {-1,-1,-1,-1};\n"
1147         "               contactIdx[0] = -1;\n"
1148         "               contactIdx[1] = -1;\n"
1149         "               contactIdx[2] = -1;\n"
1150         "               contactIdx[3] = -1;\n"
1151         "               int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n"
1152         "               int dstIdx;\n"
1153         "               AppendInc( nContactsOut, dstIdx );\n"
1154         "               if (dstIdx<contactCapacity)\n"
1155         "               {\n"
1156         "                       __global struct b3Contact4Data* c = contactsOut + dstIdx;\n"
1157         "                       c->m_worldNormalOnB = -normal;\n"
1158         "                       c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
1159         "                       c->m_batchIdx = idx;\n"
1160         "                       int bodyA = pairs[pairIndex].x;\n"
1161         "                       int bodyB = pairs[pairIndex].y;\n"
1162         "                       c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n"
1163         "                       c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n"
1164         "                       c->m_childIndexA = -1;\n"
1165         "                       c->m_childIndexB = -1;\n"
1166         "                       for (int i=0;i<nContacts;i++)\n"
1167         "                       {\n"
1168         "                               c->m_worldPosB[i] = localPoints[contactIdx[i]];\n"
1169         "                       }\n"
1170         "                       GET_NPOINTS(*c) = nContacts;\n"
1171         "               }\n"
1172         "       }\n"
1173         "}\n"
1174         "void   trInverse(float4 translationIn, Quaternion orientationIn,\n"
1175         "               float4* translationOut, Quaternion* orientationOut)\n"
1176         "{\n"
1177         "       *orientationOut = qtInvert(orientationIn);\n"
1178         "       *translationOut = qtRotate(*orientationOut, -translationIn);\n"
1179         "}\n"
1180         "void   trMul(float4 translationA, Quaternion orientationA,\n"
1181         "                                               float4 translationB, Quaternion orientationB,\n"
1182         "               float4* translationOut, Quaternion* orientationOut)\n"
1183         "{\n"
1184         "       *orientationOut = qtMul(orientationA,orientationB);\n"
1185         "       *translationOut = transform(&translationB,&translationA,&orientationA);\n"
1186         "}\n"
1187         "__kernel void   clipHullHullKernel( __global int4* pairs, \n"
1188         "                                                                                                                                                                       __global const b3RigidBodyData_t* rigidBodies, \n"
1189         "                                                                                                                                                                       __global const b3Collidable_t* collidables,\n"
1190         "                                                                                                                                                                       __global const b3ConvexPolyhedronData_t* convexShapes, \n"
1191         "                                                                                                                                                                       __global const float4* vertices,\n"
1192         "                                                                                                                                                                       __global const float4* uniqueEdges,\n"
1193         "                                                                                                                                                                       __global const b3GpuFace_t* faces,\n"
1194         "                                                                                                                                                                       __global const int* indices,\n"
1195         "                                                                                                                                                                       __global const float4* separatingNormals,\n"
1196         "                                                                                                                                                                       __global const int* hasSeparatingAxis,\n"
1197         "                                                                                                                                                                       __global struct b3Contact4Data* restrict globalContactsOut,\n"
1198         "                                                                                                                                                                       counter32_t nGlobalContactsOut,\n"
1199         "                                                                                                                                                                       int numPairs,\n"
1200         "                                                                                                                                                                       int contactCapacity)\n"
1201         "{\n"
1202         "       int i = get_global_id(0);\n"
1203         "       int pairIndex = i;\n"
1204         "       \n"
1205         "       float4 worldVertsB1[64];\n"
1206         "       float4 worldVertsB2[64];\n"
1207         "       int capacityWorldVerts = 64;    \n"
1208         "       float4 localContactsOut[64];\n"
1209         "       int localContactCapacity=64;\n"
1210         "       \n"
1211         "       float minDist = -1e30f;\n"
1212         "       float maxDist = 0.02f;\n"
1213         "       if (i<numPairs)\n"
1214         "       {\n"
1215         "               int bodyIndexA = pairs[i].x;\n"
1216         "               int bodyIndexB = pairs[i].y;\n"
1217         "                       \n"
1218         "               int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
1219         "               int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
1220         "               if (hasSeparatingAxis[i])\n"
1221         "               {\n"
1222         "                       \n"
1223         "                       int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
1224         "                       int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
1225         "                       \n"
1226         "               \n"
1227         "                       int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n"
1228         "                                                                                                               &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n"
1229         "                                                                                                               rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n"
1230         "                                                                                                         rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n"
1231         "                                                                                                         worldVertsB1,worldVertsB2,capacityWorldVerts,\n"
1232         "                                                                                                               minDist, maxDist,\n"
1233         "                                                                                                               vertices,faces,indices,\n"
1234         "                                                                                                               localContactsOut,localContactCapacity);\n"
1235         "                                                                                               \n"
1236         "               if (numLocalContactsOut>0)\n"
1237         "               {\n"
1238         "                               float4 normal = -separatingNormals[i];\n"
1239         "                               int nPoints = numLocalContactsOut;\n"
1240         "                               float4* pointsIn = localContactsOut;\n"
1241         "                               int contactIdx[4];// = {-1,-1,-1,-1};\n"
1242         "                               contactIdx[0] = -1;\n"
1243         "                               contactIdx[1] = -1;\n"
1244         "                               contactIdx[2] = -1;\n"
1245         "                               contactIdx[3] = -1;\n"
1246         "               \n"
1247         "                               int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n"
1248         "               \n"
1249         "                               \n"
1250         "                               int mprContactIndex = pairs[pairIndex].z;\n"
1251         "                               int dstIdx = mprContactIndex;\n"
1252         "                               if (dstIdx<0)\n"
1253         "                               {\n"
1254         "                                       AppendInc( nGlobalContactsOut, dstIdx );\n"
1255         "                               }\n"
1256         "                               if (dstIdx<contactCapacity)\n"
1257         "                               {\n"
1258         "                                       pairs[pairIndex].z = dstIdx;\n"
1259         "                                       __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n"
1260         "                                       c->m_worldNormalOnB = -normal;\n"
1261         "                                       c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
1262         "                                       c->m_batchIdx = pairIndex;\n"
1263         "                                       int bodyA = pairs[pairIndex].x;\n"
1264         "                                       int bodyB = pairs[pairIndex].y;\n"
1265         "                                       c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
1266         "                                       c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
1267         "                                       c->m_childIndexA = -1;\n"
1268         "                                       c->m_childIndexB = -1;\n"
1269         "                                       for (int i=0;i<nReducedContacts;i++)\n"
1270         "                                       {\n"
1271         "                                       //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n"
1272         "                                               if (i>0||(mprContactIndex<0))\n"
1273         "                                               {\n"
1274         "                                                       c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n"
1275         "                                               }\n"
1276         "                                       }\n"
1277         "                                       GET_NPOINTS(*c) = nReducedContacts;\n"
1278         "                               }\n"
1279         "                               \n"
1280         "                       }//             if (numContactsOut>0)\n"
1281         "               }//             if (hasSeparatingAxis[i])\n"
1282         "       }//     if (i<numPairs)\n"
1283         "}\n"
1284         "__kernel void   clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n"
1285         "                                                                                                                                                                       __global const b3RigidBodyData_t* rigidBodies, \n"
1286         "                                                                                                                                                                       __global const b3Collidable_t* collidables,\n"
1287         "                                                                                                                                                                       __global const b3ConvexPolyhedronData_t* convexShapes, \n"
1288         "                                                                                                                                                                       __global const float4* vertices,\n"
1289         "                                                                                                                                                                       __global const float4* uniqueEdges,\n"
1290         "                                                                                                                                                                       __global const b3GpuFace_t* faces,\n"
1291         "                                                                                                                                                                       __global const int* indices,\n"
1292         "                                                                                                                                                                       __global const b3GpuChildShape_t* gpuChildShapes,\n"
1293         "                                                                                                                                                                       __global const float4* gpuCompoundSepNormalsOut,\n"
1294         "                                                                                                                                                                       __global const int* gpuHasCompoundSepNormalsOut,\n"
1295         "                                                                                                                                                                       __global struct b3Contact4Data* restrict globalContactsOut,\n"
1296         "                                                                                                                                                                       counter32_t nGlobalContactsOut,\n"
1297         "                                                                                                                                                                       int numCompoundPairs, int maxContactCapacity)\n"
1298         "{\n"
1299         "       int i = get_global_id(0);\n"
1300         "       int pairIndex = i;\n"
1301         "       \n"
1302         "       float4 worldVertsB1[64];\n"
1303         "       float4 worldVertsB2[64];\n"
1304         "       int capacityWorldVerts = 64;    \n"
1305         "       float4 localContactsOut[64];\n"
1306         "       int localContactCapacity=64;\n"
1307         "       \n"
1308         "       float minDist = -1e30f;\n"
1309         "       float maxDist = 0.02f;\n"
1310         "       if (i<numCompoundPairs)\n"
1311         "       {\n"
1312         "               if (gpuHasCompoundSepNormalsOut[i])\n"
1313         "               {\n"
1314         "                       int bodyIndexA = gpuCompoundPairs[i].x;\n"
1315         "                       int bodyIndexB = gpuCompoundPairs[i].y;\n"
1316         "                       \n"
1317         "                       int childShapeIndexA = gpuCompoundPairs[i].z;\n"
1318         "                       int childShapeIndexB = gpuCompoundPairs[i].w;\n"
1319         "                       \n"
1320         "                       int collidableIndexA = -1;\n"
1321         "                       int collidableIndexB = -1;\n"
1322         "                       \n"
1323         "                       float4 ornA = rigidBodies[bodyIndexA].m_quat;\n"
1324         "                       float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
1325         "                       \n"
1326         "                       float4 ornB = rigidBodies[bodyIndexB].m_quat;\n"
1327         "                       float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
1328         "                                                               \n"
1329         "                       if (childShapeIndexA >= 0)\n"
1330         "                       {\n"
1331         "                               collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n"
1332         "                               float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n"
1333         "                               float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n"
1334         "                               float4 newPosA = qtRotate(ornA,childPosA)+posA;\n"
1335         "                               float4 newOrnA = qtMul(ornA,childOrnA);\n"
1336         "                               posA = newPosA;\n"
1337         "                               ornA = newOrnA;\n"
1338         "                       } else\n"
1339         "                       {\n"
1340         "                               collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
1341         "                       }\n"
1342         "                       \n"
1343         "                       if (childShapeIndexB>=0)\n"
1344         "                       {\n"
1345         "                               collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n"
1346         "                               float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n"
1347         "                               float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n"
1348         "                               float4 newPosB = transform(&childPosB,&posB,&ornB);\n"
1349         "                               float4 newOrnB = qtMul(ornB,childOrnB);\n"
1350         "                               posB = newPosB;\n"
1351         "                               ornB = newOrnB;\n"
1352         "                       } else\n"
1353         "                       {\n"
1354         "                               collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;     \n"
1355         "                       }\n"
1356         "                       \n"
1357         "                       int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
1358         "                       int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
1359         "               \n"
1360         "                       int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n"
1361         "                                                                                                               &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n"
1362         "                                                                                                               posA,ornA,\n"
1363         "                                                                                                         posB,ornB,\n"
1364         "                                                                                                         worldVertsB1,worldVertsB2,capacityWorldVerts,\n"
1365         "                                                                                                               minDist, maxDist,\n"
1366         "                                                                                                               vertices,faces,indices,\n"
1367         "                                                                                                               localContactsOut,localContactCapacity);\n"
1368         "                                                                                               \n"
1369         "               if (numLocalContactsOut>0)\n"
1370         "               {\n"
1371         "                               float4 normal = -gpuCompoundSepNormalsOut[i];\n"
1372         "                               int nPoints = numLocalContactsOut;\n"
1373         "                               float4* pointsIn = localContactsOut;\n"
1374         "                               int contactIdx[4];// = {-1,-1,-1,-1};\n"
1375         "                               contactIdx[0] = -1;\n"
1376         "                               contactIdx[1] = -1;\n"
1377         "                               contactIdx[2] = -1;\n"
1378         "                               contactIdx[3] = -1;\n"
1379         "               \n"
1380         "                               int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n"
1381         "               \n"
1382         "                               int dstIdx;\n"
1383         "                               AppendInc( nGlobalContactsOut, dstIdx );\n"
1384         "                               if ((dstIdx+nReducedContacts) < maxContactCapacity)\n"
1385         "                               {\n"
1386         "                                       __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n"
1387         "                                       c->m_worldNormalOnB = -normal;\n"
1388         "                                       c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
1389         "                                       c->m_batchIdx = pairIndex;\n"
1390         "                                       int bodyA = gpuCompoundPairs[pairIndex].x;\n"
1391         "                                       int bodyB = gpuCompoundPairs[pairIndex].y;\n"
1392         "                                       c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
1393         "                                       c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
1394         "                                       c->m_childIndexA = childShapeIndexA;\n"
1395         "                                       c->m_childIndexB = childShapeIndexB;\n"
1396         "                                       for (int i=0;i<nReducedContacts;i++)\n"
1397         "                                       {\n"
1398         "                                               c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n"
1399         "                                       }\n"
1400         "                                       GET_NPOINTS(*c) = nReducedContacts;\n"
1401         "                               }\n"
1402         "                               \n"
1403         "                       }//             if (numContactsOut>0)\n"
1404         "               }//             if (gpuHasCompoundSepNormalsOut[i])\n"
1405         "       }//     if (i<numCompoundPairs)\n"
1406         "}\n"
1407         "__kernel void   sphereSphereCollisionKernel( __global const int4* pairs, \n"
1408         "                                                                                                                                                                       __global const b3RigidBodyData_t* rigidBodies, \n"
1409         "                                                                                                                                                                       __global const b3Collidable_t* collidables,\n"
1410         "                                                                                                                                                                       __global const float4* separatingNormals,\n"
1411         "                                                                                                                                                                       __global const int* hasSeparatingAxis,\n"
1412         "                                                                                                                                                                       __global struct b3Contact4Data* restrict globalContactsOut,\n"
1413         "                                                                                                                                                                       counter32_t nGlobalContactsOut,\n"
1414         "                                                                                                                                                                       int contactCapacity,\n"
1415         "                                                                                                                                                                       int numPairs)\n"
1416         "{\n"
1417         "       int i = get_global_id(0);\n"
1418         "       int pairIndex = i;\n"
1419         "       \n"
1420         "       if (i<numPairs)\n"
1421         "       {\n"
1422         "               int bodyIndexA = pairs[i].x;\n"
1423         "               int bodyIndexB = pairs[i].y;\n"
1424         "                       \n"
1425         "               int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
1426         "               int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
1427         "               if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n"
1428         "                       collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n"
1429         "               {\n"
1430         "                       //sphere-sphere\n"
1431         "                       float radiusA = collidables[collidableIndexA].m_radius;\n"
1432         "                       float radiusB = collidables[collidableIndexB].m_radius;\n"
1433         "                       float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
1434         "                       float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
1435         "                       float4 diff = posA-posB;\n"
1436         "                       float len = length(diff);\n"
1437         "                       \n"
1438         "                       ///iff distance positive, don't generate a new contact\n"
1439         "                       if ( len <= (radiusA+radiusB))\n"
1440         "                       {\n"
1441         "                               ///distance (negative means penetration)\n"
1442         "                               float dist = len - (radiusA+radiusB);\n"
1443         "                               float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n"
1444         "                               if (len > 0.00001)\n"
1445         "                               {\n"
1446         "                                       normalOnSurfaceB = diff / len;\n"
1447         "                               }\n"
1448         "                               float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n"
1449         "                               contactPosB.w = dist;\n"
1450         "                                                               \n"
1451         "                               int dstIdx;\n"
1452         "                               AppendInc( nGlobalContactsOut, dstIdx );\n"
1453         "                               if (dstIdx < contactCapacity)\n"
1454         "                               {\n"
1455         "                                       __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
1456         "                                       c->m_worldNormalOnB = -normalOnSurfaceB;\n"
1457         "                                       c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
1458         "                                       c->m_batchIdx = pairIndex;\n"
1459         "                                       int bodyA = pairs[pairIndex].x;\n"
1460         "                                       int bodyB = pairs[pairIndex].y;\n"
1461         "                                       c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
1462         "                                       c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
1463         "                                       c->m_worldPosB[0] = contactPosB;\n"
1464         "                                       c->m_childIndexA = -1;\n"
1465         "                                       c->m_childIndexB = -1;\n"
1466         "                                       GET_NPOINTS(*c) = 1;\n"
1467         "                               }//if (dstIdx < numPairs)\n"
1468         "                       }//if ( len <= (radiusA+radiusB))\n"
1469         "               }//SHAPE_SPHERE SHAPE_SPHERE\n"
1470         "       }//if (i<numPairs)\n"
1471         "}                              \n"
1472         "__kernel void   clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n"
1473         "                                                                                                                                                                       __global const b3RigidBodyData_t* rigidBodies, \n"
1474         "                                                                                                                                                                       __global const b3Collidable_t* collidables,\n"
1475         "                                                                                                                                                                       __global const b3ConvexPolyhedronData_t* convexShapes, \n"
1476         "                                                                                                                                                                       __global const float4* vertices,\n"
1477         "                                                                                                                                                                       __global const float4* uniqueEdges,\n"
1478         "                                                                                                                                                                       __global const b3GpuFace_t* faces,\n"
1479         "                                                                                                                                                                       __global const int* indices,\n"
1480         "                                                                                                                                                                       __global const b3GpuChildShape_t* gpuChildShapes,\n"
1481         "                                                                                                                                                                       __global const float4* separatingNormals,\n"
1482         "                                                                                                                                                                       __global struct b3Contact4Data* restrict globalContactsOut,\n"
1483         "                                                                                                                                                                       counter32_t nGlobalContactsOut,\n"
1484         "                                                                                                                                                                       int contactCapacity,\n"
1485         "                                                                                                                                                                       int numConcavePairs)\n"
1486         "{\n"
1487         "       int i = get_global_id(0);\n"
1488         "       int pairIndex = i;\n"
1489         "       \n"
1490         "       float4 worldVertsB1[64];\n"
1491         "       float4 worldVertsB2[64];\n"
1492         "       int capacityWorldVerts = 64;    \n"
1493         "       float4 localContactsOut[64];\n"
1494         "       int localContactCapacity=64;\n"
1495         "       \n"
1496         "       float minDist = -1e30f;\n"
1497         "       float maxDist = 0.02f;\n"
1498         "       if (i<numConcavePairs)\n"
1499         "       {\n"
1500         "               //negative value means that the pair is invalid\n"
1501         "               if (concavePairsIn[i].w<0)\n"
1502         "                       return;\n"
1503         "               int bodyIndexA = concavePairsIn[i].x;\n"
1504         "               int bodyIndexB = concavePairsIn[i].y;\n"
1505         "               int f = concavePairsIn[i].z;\n"
1506         "               int childShapeIndexA = f;\n"
1507         "               \n"
1508         "               int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
1509         "               int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
1510         "               \n"
1511         "               int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
1512         "               int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
1513         "               \n"
1514         "               ///////////////////////////////////////////////////////////////\n"
1515         "               \n"
1516         "       \n"
1517         "               bool overlap = false;\n"
1518         "               \n"
1519         "               b3ConvexPolyhedronData_t convexPolyhedronA;\n"
1520         "       //add 3 vertices of the triangle\n"
1521         "               convexPolyhedronA.m_numVertices = 3;\n"
1522         "               convexPolyhedronA.m_vertexOffset = 0;\n"
1523         "               float4  localCenter = make_float4(0.f,0.f,0.f,0.f);\n"
1524         "               b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
1525         "               \n"
1526         "               float4 verticesA[3];\n"
1527         "               for (int i=0;i<3;i++)\n"
1528         "               {\n"
1529         "                       int index = indices[face.m_indexOffset+i];\n"
1530         "                       float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n"
1531         "                       verticesA[i] = vert;\n"
1532         "                       localCenter += vert;\n"
1533         "               }\n"
1534         "               float dmin = FLT_MAX;\n"
1535         "               int localCC=0;\n"
1536         "               //a triangle has 3 unique edges\n"
1537         "               convexPolyhedronA.m_numUniqueEdges = 3;\n"
1538         "               convexPolyhedronA.m_uniqueEdgesOffset = 0;\n"
1539         "               float4 uniqueEdgesA[3];\n"
1540         "               \n"
1541         "               uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n"
1542         "               uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n"
1543         "               uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n"
1544         "               convexPolyhedronA.m_faceOffset = 0;\n"
1545         "                                  \n"
1546         "               float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n"
1547         "                             \n"
1548         "               b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
1549         "               int indicesA[3+3+2+2+2];\n"
1550         "               int curUsedIndices=0;\n"
1551         "               int fidx=0;\n"
1552         "               //front size of triangle\n"
1553         "               {\n"
1554         "                       facesA[fidx].m_indexOffset=curUsedIndices;\n"
1555         "                       indicesA[0] = 0;\n"
1556         "                       indicesA[1] = 1;\n"
1557         "                       indicesA[2] = 2;\n"
1558         "                       curUsedIndices+=3;\n"
1559         "                       float c = face.m_plane.w;\n"
1560         "                       facesA[fidx].m_plane.x = normal.x;\n"
1561         "                       facesA[fidx].m_plane.y = normal.y;\n"
1562         "                       facesA[fidx].m_plane.z = normal.z;\n"
1563         "                       facesA[fidx].m_plane.w = c;\n"
1564         "                       facesA[fidx].m_numIndices=3;\n"
1565         "               }\n"
1566         "               fidx++;\n"
1567         "               //back size of triangle\n"
1568         "               {\n"
1569         "                       facesA[fidx].m_indexOffset=curUsedIndices;\n"
1570         "                       indicesA[3]=2;\n"
1571         "                       indicesA[4]=1;\n"
1572         "                       indicesA[5]=0;\n"
1573         "                       curUsedIndices+=3;\n"
1574         "                       float c = dot3F4(normal,verticesA[0]);\n"
1575         "                       float c1 = -face.m_plane.w;\n"
1576         "                       facesA[fidx].m_plane.x = -normal.x;\n"
1577         "                       facesA[fidx].m_plane.y = -normal.y;\n"
1578         "                       facesA[fidx].m_plane.z = -normal.z;\n"
1579         "                       facesA[fidx].m_plane.w = c;\n"
1580         "                       facesA[fidx].m_numIndices=3;\n"
1581         "               }\n"
1582         "               fidx++;\n"
1583         "               bool addEdgePlanes = true;\n"
1584         "               if (addEdgePlanes)\n"
1585         "               {\n"
1586         "                       int numVertices=3;\n"
1587         "                       int prevVertex = numVertices-1;\n"
1588         "                       for (int i=0;i<numVertices;i++)\n"
1589         "                       {\n"
1590         "                               float4 v0 = verticesA[i];\n"
1591         "                               float4 v1 = verticesA[prevVertex];\n"
1592         "                                            \n"
1593         "                               float4 edgeNormal = normalize(cross(normal,v1-v0));\n"
1594         "                               float c = -dot3F4(edgeNormal,v0);\n"
1595         "                               facesA[fidx].m_numIndices = 2;\n"
1596         "                               facesA[fidx].m_indexOffset=curUsedIndices;\n"
1597         "                               indicesA[curUsedIndices++]=i;\n"
1598         "                               indicesA[curUsedIndices++]=prevVertex;\n"
1599         "                                            \n"
1600         "                               facesA[fidx].m_plane.x = edgeNormal.x;\n"
1601         "                               facesA[fidx].m_plane.y = edgeNormal.y;\n"
1602         "                               facesA[fidx].m_plane.z = edgeNormal.z;\n"
1603         "                               facesA[fidx].m_plane.w = c;\n"
1604         "                               fidx++;\n"
1605         "                               prevVertex = i;\n"
1606         "                       }\n"
1607         "               }\n"
1608         "               convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n"
1609         "               convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n"
1610         "               float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
1611         "               posA.w = 0.f;\n"
1612         "               float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
1613         "               posB.w = 0.f;\n"
1614         "               float4 ornA = rigidBodies[bodyIndexA].m_quat;\n"
1615         "               float4 ornB =rigidBodies[bodyIndexB].m_quat;\n"
1616         "               float4 sepAxis = separatingNormals[i];\n"
1617         "               \n"
1618         "               int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n"
1619         "               int childShapeIndexB =-1;\n"
1620         "               if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n"
1621         "               {\n"
1622         "                       ///////////////////\n"
1623         "                       ///compound shape support\n"
1624         "                       \n"
1625         "                       childShapeIndexB = concavePairsIn[pairIndex].w;\n"
1626         "                       int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n"
1627         "                       shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n"
1628         "                       float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n"
1629         "                       float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n"
1630         "                       float4 newPosB = transform(&childPosB,&posB,&ornB);\n"
1631         "                       float4 newOrnB = qtMul(ornB,childOrnB);\n"
1632         "                       posB = newPosB;\n"
1633         "                       ornB = newOrnB;\n"
1634         "                       \n"
1635         "               }\n"
1636         "               \n"
1637         "               ////////////////////////////////////////\n"
1638         "               \n"
1639         "               \n"
1640         "               \n"
1641         "               int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,\n"
1642         "                                                                                                               &convexPolyhedronA, &convexShapes[shapeIndexB],\n"
1643         "                                                                                                               posA,ornA,\n"
1644         "                                                                                                         posB,ornB,\n"
1645         "                                                                                                         worldVertsB1,worldVertsB2,capacityWorldVerts,\n"
1646         "                                                                                                               minDist, maxDist,\n"
1647         "                                                                                                               &verticesA,&facesA,&indicesA,\n"
1648         "                                                                                                               vertices,faces,indices,\n"
1649         "                                                                                                               localContactsOut,localContactCapacity);\n"
1650         "                                                                                               \n"
1651         "               if (numLocalContactsOut>0)\n"
1652         "               {\n"
1653         "                       float4 normal = -separatingNormals[i];\n"
1654         "                       int nPoints = numLocalContactsOut;\n"
1655         "                       float4* pointsIn = localContactsOut;\n"
1656         "                       int contactIdx[4];// = {-1,-1,-1,-1};\n"
1657         "                       contactIdx[0] = -1;\n"
1658         "                       contactIdx[1] = -1;\n"
1659         "                       contactIdx[2] = -1;\n"
1660         "                       contactIdx[3] = -1;\n"
1661         "       \n"
1662         "                       int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n"
1663         "       \n"
1664         "                       int dstIdx;\n"
1665         "                       AppendInc( nGlobalContactsOut, dstIdx );\n"
1666         "                       if (dstIdx<contactCapacity)\n"
1667         "                       {\n"
1668         "                               __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n"
1669         "                               c->m_worldNormalOnB = -normal;\n"
1670         "                               c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
1671         "                               c->m_batchIdx = pairIndex;\n"
1672         "                               int bodyA = concavePairsIn[pairIndex].x;\n"
1673         "                               int bodyB = concavePairsIn[pairIndex].y;\n"
1674         "                               c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
1675         "                               c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
1676         "                               c->m_childIndexA = childShapeIndexA;\n"
1677         "                               c->m_childIndexB = childShapeIndexB;\n"
1678         "                               for (int i=0;i<nReducedContacts;i++)\n"
1679         "                               {\n"
1680         "                                       c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n"
1681         "                               }\n"
1682         "                               GET_NPOINTS(*c) = nReducedContacts;\n"
1683         "                       }\n"
1684         "                               \n"
1685         "               }//             if (numContactsOut>0)\n"
1686         "       }//     if (i<numPairs)\n"
1687         "}\n"
1688         "int    findClippingFaces(const float4 separatingNormal,\n"
1689         "                      __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,\n"
1690         "                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n"
1691         "                       __global float4* worldVertsA1,\n"
1692         "                      __global float4* worldNormalsA1,\n"
1693         "                      __global float4* worldVertsB1,\n"
1694         "                      int capacityWorldVerts,\n"
1695         "                      const float minDist, float maxDist,\n"
1696         "                      __global const float4* vertices,\n"
1697         "                      __global const b3GpuFace_t* faces,\n"
1698         "                      __global const int* indices,\n"
1699         "                      __global int4* clippingFaces, int pairIndex)\n"
1700         "{\n"
1701         "       int numContactsOut = 0;\n"
1702         "       int numWorldVertsB1= 0;\n"
1703         "    \n"
1704         "    \n"
1705         "       int closestFaceB=-1;\n"
1706         "       float dmax = -FLT_MAX;\n"
1707         "    \n"
1708         "       {\n"
1709         "               for(int face=0;face<hullB->m_numFaces;face++)\n"
1710         "               {\n"
1711         "                       const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n"
1712         "                                              faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n"
1713         "                       const float4 WorldNormal = qtRotate(ornB, Normal);\n"
1714         "                       float d = dot3F4(WorldNormal,separatingNormal);\n"
1715         "                       if (d > dmax)\n"
1716         "                       {\n"
1717         "                               dmax = d;\n"
1718         "                               closestFaceB = face;\n"
1719         "                       }\n"
1720         "               }\n"
1721         "       }\n"
1722         "    \n"
1723         "       {\n"
1724         "               const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
1725         "               const int numVertices = polyB.m_numIndices;\n"
1726         "               for(int e0=0;e0<numVertices;e0++)\n"
1727         "               {\n"
1728         "                       const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n"
1729         "                       worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n"
1730         "               }\n"
1731         "       }\n"
1732         "    \n"
1733         "    int closestFaceA=-1;\n"
1734         "       {\n"
1735         "               float dmin = FLT_MAX;\n"
1736         "               for(int face=0;face<hullA->m_numFaces;face++)\n"
1737         "               {\n"
1738         "                       const float4 Normal = make_float4(\n"
1739         "                                              faces[hullA->m_faceOffset+face].m_plane.x,\n"
1740         "                                              faces[hullA->m_faceOffset+face].m_plane.y,\n"
1741         "                                              faces[hullA->m_faceOffset+face].m_plane.z,\n"
1742         "                                              0.f);\n"
1743         "                       const float4 faceANormalWS = qtRotate(ornA,Normal);\n"
1744         "            \n"
1745         "                       float d = dot3F4(faceANormalWS,separatingNormal);\n"
1746         "                       if (d < dmin)\n"
1747         "                       {\n"
1748         "                               dmin = d;\n"
1749         "                               closestFaceA = face;\n"
1750         "                worldNormalsA1[pairIndex] = faceANormalWS;\n"
1751         "                       }\n"
1752         "               }\n"
1753         "       }\n"
1754         "    \n"
1755         "    int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n"
1756         "       for(int e0=0;e0<numVerticesA;e0++)\n"
1757         "       {\n"
1758         "        const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n"
1759         "        worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n"
1760         "    }\n"
1761         "    \n"
1762         "    clippingFaces[pairIndex].x = closestFaceA;\n"
1763         "    clippingFaces[pairIndex].y = closestFaceB;\n"
1764         "    clippingFaces[pairIndex].z = numVerticesA;\n"
1765         "    clippingFaces[pairIndex].w = numWorldVertsB1;\n"
1766         "    \n"
1767         "    \n"
1768         "       return numContactsOut;\n"
1769         "}\n"
1770         "int clipFaces(__global float4* worldVertsA1,\n"
1771         "              __global float4* worldNormalsA1,\n"
1772         "              __global float4* worldVertsB1,\n"
1773         "              __global float4* worldVertsB2, \n"
1774         "              int capacityWorldVertsB2,\n"
1775         "              const float minDist, float maxDist,\n"
1776         "              __global int4* clippingFaces,\n"
1777         "              int pairIndex)\n"
1778         "{\n"
1779         "       int numContactsOut = 0;\n"
1780         "    \n"
1781         "    int closestFaceA = clippingFaces[pairIndex].x;\n"
1782         "    int closestFaceB = clippingFaces[pairIndex].y;\n"
1783         "       int numVertsInA = clippingFaces[pairIndex].z;\n"
1784         "       int numVertsInB = clippingFaces[pairIndex].w;\n"
1785         "    \n"
1786         "       int numVertsOut = 0;\n"
1787         "    \n"
1788         "       if (closestFaceA<0)\n"
1789         "               return numContactsOut;\n"
1790         "    \n"
1791         "    __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n"
1792         "    __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n"
1793         "    \n"
1794         "    \n"
1795         "       \n"
1796         "       // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
1797         "    \n"
1798         "       for(int e0=0;e0<numVertsInA;e0++)\n"
1799         "       {\n"
1800         "               const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n"
1801         "               const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n"
1802         "               const float4 WorldEdge0 = aw - bw;\n"
1803         "               float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n"
1804         "               float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n"
1805         "               float4 worldA1 = aw;\n"
1806         "               float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n"
1807         "               float4 planeNormalWS = planeNormalWS1;\n"
1808         "               float planeEqWS=planeEqWS1;\n"
1809         "               numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n"
1810         "               __global float4* tmp = pVtxOut;\n"
1811         "               pVtxOut = pVtxIn;\n"
1812         "               pVtxIn = tmp;\n"
1813         "               numVertsInB = numVertsOut;\n"
1814         "               numVertsOut = 0;\n"
1815         "       }\n"
1816         "    \n"
1817         "    //float4 planeNormalWS = worldNormalsA1[pairIndex];\n"
1818         "    //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n"
1819         "    \n"
1820         "    /*for (int i=0;i<numVertsInB;i++)\n"
1821         "    {\n"
1822         "        pVtxOut[i] = pVtxIn[i];\n"
1823         "    }*/\n"
1824         "    \n"
1825         "    \n"
1826         "    \n"
1827         "    \n"
1828         "    //numVertsInB=0;\n"
1829         "       \n"
1830         "    float4 planeNormalWS = worldNormalsA1[pairIndex];\n"
1831         "    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n"
1832         "    for (int i=0;i<numVertsInB;i++)\n"
1833         "    {\n"
1834         "        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n"
1835         "        if (depth <=minDist)\n"
1836         "        {\n"
1837         "            depth = minDist;\n"
1838         "        }\n"
1839         "        \n"
1840         "        if (depth <=maxDist)\n"
1841         "        {\n"
1842         "            float4 pointInWorld = pVtxIn[i];\n"
1843         "            pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n"
1844         "        }\n"
1845         "    }\n"
1846         "   \n"
1847         "    clippingFaces[pairIndex].w =numContactsOut;\n"
1848         "   \n"
1849         "    \n"
1850         "       return numContactsOut;\n"
1851         "}\n"
1852         "__kernel void   findClippingFacesKernel(  __global const int4* pairs,\n"
1853         "                                        __global const b3RigidBodyData_t* rigidBodies,\n"
1854         "                                        __global const b3Collidable_t* collidables,\n"
1855         "                                        __global const b3ConvexPolyhedronData_t* convexShapes,\n"
1856         "                                        __global const float4* vertices,\n"
1857         "                                        __global const float4* uniqueEdges,\n"
1858         "                                        __global const b3GpuFace_t* faces,\n"
1859         "                                        __global const int* indices,\n"
1860         "                                        __global const float4* separatingNormals,\n"
1861         "                                        __global const int* hasSeparatingAxis,\n"
1862         "                                        __global int4* clippingFacesOut,\n"
1863         "                                        __global float4* worldVertsA1,\n"
1864         "                                        __global float4* worldNormalsA1,\n"
1865         "                                        __global float4* worldVertsB1,\n"
1866         "                                        int capacityWorldVerts,\n"
1867         "                                        int numPairs\n"
1868         "                                        )\n"
1869         "{\n"
1870         "    \n"
1871         "       int i = get_global_id(0);\n"
1872         "       int pairIndex = i;\n"
1873         "    \n"
1874         "       \n"
1875         "       float minDist = -1e30f;\n"
1876         "       float maxDist = 0.02f;\n"
1877         "    \n"
1878         "       if (i<numPairs)\n"
1879         "       {\n"
1880         "        \n"
1881         "               if (hasSeparatingAxis[i])\n"
1882         "               {\n"
1883         "            \n"
1884         "                       int bodyIndexA = pairs[i].x;\n"
1885         "                       int bodyIndexB = pairs[i].y;\n"
1886         "                       \n"
1887         "                       int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
1888         "                       int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
1889         "                       \n"
1890         "                       int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
1891         "                       int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
1892         "                       \n"
1893         "            \n"
1894         "            \n"
1895         "                       int numLocalContactsOut = findClippingFaces(separatingNormals[i],\n"
1896         "                                                        &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n"
1897         "                                                        rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n"
1898         "                                                        rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n"
1899         "                                                        worldVertsA1,\n"
1900         "                                                        worldNormalsA1,\n"
1901         "                                                        worldVertsB1,capacityWorldVerts,\n"
1902         "                                                        minDist, maxDist,\n"
1903         "                                                        vertices,faces,indices,\n"
1904         "                                                        clippingFacesOut,i);\n"
1905         "            \n"
1906         "            \n"
1907         "               }//             if (hasSeparatingAxis[i])\n"
1908         "       }//     if (i<numPairs)\n"
1909         "    \n"
1910         "}\n"
1911         "__kernel void   clipFacesAndFindContactsKernel(    __global const float4* separatingNormals,\n"
1912         "                                                   __global const int* hasSeparatingAxis,\n"
1913         "                                                   __global int4* clippingFacesOut,\n"
1914         "                                                   __global float4* worldVertsA1,\n"
1915         "                                                   __global float4* worldNormalsA1,\n"
1916         "                                                   __global float4* worldVertsB1,\n"
1917         "                                                   __global float4* worldVertsB2,\n"
1918         "                                                    int vertexFaceCapacity,\n"
1919         "                                                   int numPairs,\n"
1920         "                                                                               int debugMode\n"
1921         "                                                   )\n"
1922         "{\n"
1923         "    int i = get_global_id(0);\n"
1924         "       int pairIndex = i;\n"
1925         "       \n"
1926         "    \n"
1927         "       float minDist = -1e30f;\n"
1928         "       float maxDist = 0.02f;\n"
1929         "    \n"
1930         "       if (i<numPairs)\n"
1931         "       {\n"
1932         "        \n"
1933         "               if (hasSeparatingAxis[i])\n"
1934         "               {\n"
1935         "            \n"
1936         "//                     int bodyIndexA = pairs[i].x;\n"
1937         "       //              int bodyIndexB = pairs[i].y;\n"
1938         "                   \n"
1939         "            int numLocalContactsOut = 0;\n"
1940         "            int capacityWorldVertsB2 = vertexFaceCapacity;\n"
1941         "            \n"
1942         "            __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n"
1943         "            __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n"
1944         "            \n"
1945         "            {\n"
1946         "                __global int4* clippingFaces = clippingFacesOut;\n"
1947         "            \n"
1948         "                \n"
1949         "                int closestFaceA = clippingFaces[pairIndex].x;\n"
1950         "                int closestFaceB = clippingFaces[pairIndex].y;\n"
1951         "                int numVertsInA = clippingFaces[pairIndex].z;\n"
1952         "                int numVertsInB = clippingFaces[pairIndex].w;\n"
1953         "                \n"
1954         "                int numVertsOut = 0;\n"
1955         "                \n"
1956         "                if (closestFaceA>=0)\n"
1957         "                {\n"
1958         "                    \n"
1959         "                    \n"
1960         "                    \n"
1961         "                    // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
1962         "                    \n"
1963         "                    for(int e0=0;e0<numVertsInA;e0++)\n"
1964         "                    {\n"
1965         "                        const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n"
1966         "                        const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n"
1967         "                        const float4 WorldEdge0 = aw - bw;\n"
1968         "                        float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n"
1969         "                        float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n"
1970         "                        float4 worldA1 = aw;\n"
1971         "                        float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n"
1972         "                        float4 planeNormalWS = planeNormalWS1;\n"
1973         "                        float planeEqWS=planeEqWS1;\n"
1974         "                        numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n"
1975         "                        __global float4* tmp = pVtxOut;\n"
1976         "                        pVtxOut = pVtxIn;\n"
1977         "                        pVtxIn = tmp;\n"
1978         "                        numVertsInB = numVertsOut;\n"
1979         "                        numVertsOut = 0;\n"
1980         "                    }\n"
1981         "                    \n"
1982         "                    float4 planeNormalWS = worldNormalsA1[pairIndex];\n"
1983         "                    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n"
1984         "                    \n"
1985         "                    for (int i=0;i<numVertsInB;i++)\n"
1986         "                    {\n"
1987         "                        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n"
1988         "                        if (depth <=minDist)\n"
1989         "                        {\n"
1990         "                            depth = minDist;\n"
1991         "                        }\n"
1992         "                        \n"
1993         "                        if (depth <=maxDist)\n"
1994         "                        {\n"
1995         "                            float4 pointInWorld = pVtxIn[i];\n"
1996         "                            pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n"
1997         "                        }\n"
1998         "                    }\n"
1999         "                    \n"
2000         "                }\n"
2001         "                clippingFaces[pairIndex].w =numLocalContactsOut;\n"
2002         "                \n"
2003         "            }\n"
2004         "            \n"
2005         "            for (int i=0;i<numLocalContactsOut;i++)\n"
2006         "                pVtxIn[i] = pVtxOut[i];\n"
2007         "                \n"
2008         "               }//             if (hasSeparatingAxis[i])\n"
2009         "       }//     if (i<numPairs)\n"
2010         "    \n"
2011         "}\n"
2012         "__kernel void   newContactReductionKernel( __global int4* pairs,\n"
2013         "                                                   __global const b3RigidBodyData_t* rigidBodies,\n"
2014         "                                                   __global const float4* separatingNormals,\n"
2015         "                                                   __global const int* hasSeparatingAxis,\n"
2016         "                                                   __global struct b3Contact4Data* globalContactsOut,\n"
2017         "                                                   __global int4* clippingFaces,\n"
2018         "                                                   __global float4* worldVertsB2,\n"
2019         "                                                   volatile __global int* nGlobalContactsOut,\n"
2020         "                                                   int vertexFaceCapacity,\n"
2021         "                                                                                                  int contactCapacity,\n"
2022         "                                                   int numPairs\n"
2023         "                                                   )\n"
2024         "{\n"
2025         "    int i = get_global_id(0);\n"
2026         "       int pairIndex = i;\n"
2027         "       \n"
2028         "    int4 contactIdx;\n"
2029         "    contactIdx=make_int4(0,1,2,3);\n"
2030         "    \n"
2031         "       if (i<numPairs)\n"
2032         "       {\n"
2033         "        \n"
2034         "               if (hasSeparatingAxis[i])\n"
2035         "               {\n"
2036         "            \n"
2037         "                       \n"
2038         "            \n"
2039         "            \n"
2040         "                       int nPoints = clippingFaces[pairIndex].w;\n"
2041         "           \n"
2042         "            if (nPoints>0)\n"
2043         "            {\n"
2044         "                 __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n"
2045         "                float4 normal = -separatingNormals[i];\n"
2046         "                \n"
2047         "                int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n"
2048         "            \n"
2049         "                               int mprContactIndex = pairs[pairIndex].z;\n"
2050         "                int dstIdx = mprContactIndex;\n"
2051         "                               if (dstIdx<0)\n"
2052         "                               {\n"
2053         "                       AppendInc( nGlobalContactsOut, dstIdx );\n"
2054         "                               }\n"
2055         "//#if 0\n"
2056         "                \n"
2057         "                               if (dstIdx < contactCapacity)\n"
2058         "                               {\n"
2059         "                                       __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
2060         "                                       c->m_worldNormalOnB = -normal;\n"
2061         "                                       c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
2062         "                                       c->m_batchIdx = pairIndex;\n"
2063         "                                       int bodyA = pairs[pairIndex].x;\n"
2064         "                                       int bodyB = pairs[pairIndex].y;\n"
2065         "                                       pairs[pairIndex].w = dstIdx;\n"
2066         "                                       c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n"
2067         "                                       c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n"
2068         "                    c->m_childIndexA =-1;\n"
2069         "                                       c->m_childIndexB =-1;\n"
2070         "                    switch (nReducedContacts)\n"
2071         "                    {\n"
2072         "                        case 4:\n"
2073         "                            c->m_worldPosB[3] = pointsIn[contactIdx.w];\n"
2074         "                        case 3:\n"
2075         "                            c->m_worldPosB[2] = pointsIn[contactIdx.z];\n"
2076         "                        case 2:\n"
2077         "                            c->m_worldPosB[1] = pointsIn[contactIdx.y];\n"
2078         "                        case 1:\n"
2079         "                                                       if (mprContactIndex<0)//test\n"
2080         "                                   c->m_worldPosB[0] = pointsIn[contactIdx.x];\n"
2081         "                        default:\n"
2082         "                        {\n"
2083         "                        }\n"
2084         "                    };\n"
2085         "                    \n"
2086         "                                       GET_NPOINTS(*c) = nReducedContacts;\n"
2087         "                    \n"
2088         "                 }\n"
2089         "                 \n"
2090         "                \n"
2091         "//#endif\n"
2092         "                               \n"
2093         "                       }//             if (numContactsOut>0)\n"
2094         "               }//             if (hasSeparatingAxis[i])\n"
2095         "       }//     if (i<numPairs)\n"
2096         "    \n"
2097         "    \n"
2098         "}\n";