[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / NarrowphaseCollision / kernels / mprKernels.h
1 //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
2 static const char* mprKernelsCL =
3         "/***\n"
4         " * ---------------------------------\n"
5         " * Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>\n"
6         " *\n"
7         " *  This file was ported from mpr.c file, part of libccd.\n"
8         " *  The Minkoski Portal Refinement implementation was ported \n"
9         " *  to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n"
10         " *  at http://github.com/erwincoumans/bullet3\n"
11         " *\n"
12         " *  Distributed under the OSI-approved BSD License (the \"License\");\n"
13         " *  see <http://www.opensource.org/licenses/bsd-license.php>.\n"
14         " *  This software is distributed WITHOUT ANY WARRANTY; without even the\n"
15         " *  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
16         " *  See the License for more information.\n"
17         " */\n"
18         "#ifndef B3_MPR_PENETRATION_H\n"
19         "#define B3_MPR_PENETRATION_H\n"
20         "#ifndef B3_PLATFORM_DEFINITIONS_H\n"
21         "#define B3_PLATFORM_DEFINITIONS_H\n"
22         "struct MyTest\n"
23         "{\n"
24         "       int bla;\n"
25         "};\n"
26         "#ifdef __cplusplus\n"
27         "#else\n"
28         "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n"
29         "#define B3_LARGE_FLOAT 1e18f\n"
30         "#define B3_INFINITY 1e18f\n"
31         "#define b3Assert(a)\n"
32         "#define b3ConstArray(a) __global const a*\n"
33         "#define b3AtomicInc atomic_inc\n"
34         "#define b3AtomicAdd atomic_add\n"
35         "#define b3Fabs fabs\n"
36         "#define b3Sqrt native_sqrt\n"
37         "#define b3Sin native_sin\n"
38         "#define b3Cos native_cos\n"
39         "#define B3_STATIC\n"
40         "#endif\n"
41         "#endif\n"
42         "#ifndef B3_FLOAT4_H\n"
43         "#define B3_FLOAT4_H\n"
44         "#ifndef B3_PLATFORM_DEFINITIONS_H\n"
45         "#ifdef __cplusplus\n"
46         "#else\n"
47         "#endif\n"
48         "#endif\n"
49         "#ifdef __cplusplus\n"
50         "#else\n"
51         "       typedef float4  b3Float4;\n"
52         "       #define b3Float4ConstArg const b3Float4\n"
53         "       #define b3MakeFloat4 (float4)\n"
54         "       float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
55         "       {\n"
56         "               float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
57         "               float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
58         "               return dot(a1, b1);\n"
59         "       }\n"
60         "       b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
61         "       {\n"
62         "               float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
63         "               float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
64         "               return cross(a1, b1);\n"
65         "       }\n"
66         "       #define b3MinFloat4 min\n"
67         "       #define b3MaxFloat4 max\n"
68         "       #define b3Normalized(a) normalize(a)\n"
69         "#endif \n"
70         "               \n"
71         "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"
72         "{\n"
73         "       if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)    \n"
74         "               return false;\n"
75         "       return true;\n"
76         "}\n"
77         "inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n"
78         "{\n"
79         "    float maxDot = -B3_INFINITY;\n"
80         "    int i = 0;\n"
81         "    int ptIndex = -1;\n"
82         "    for( i = 0; i < vecLen; i++ )\n"
83         "    {\n"
84         "        float dot = b3Dot3F4(vecArray[i],vec);\n"
85         "            \n"
86         "        if( dot > maxDot )\n"
87         "        {\n"
88         "            maxDot = dot;\n"
89         "            ptIndex = i;\n"
90         "        }\n"
91         "    }\n"
92         "       b3Assert(ptIndex>=0);\n"
93         "    if (ptIndex<0)\n"
94         "       {\n"
95         "               ptIndex = 0;\n"
96         "       }\n"
97         "    *dotOut = maxDot;\n"
98         "    return ptIndex;\n"
99         "}\n"
100         "#endif //B3_FLOAT4_H\n"
101         "#ifndef B3_RIGIDBODY_DATA_H\n"
102         "#define B3_RIGIDBODY_DATA_H\n"
103         "#ifndef B3_FLOAT4_H\n"
104         "#ifdef __cplusplus\n"
105         "#else\n"
106         "#endif \n"
107         "#endif //B3_FLOAT4_H\n"
108         "#ifndef B3_QUAT_H\n"
109         "#define B3_QUAT_H\n"
110         "#ifndef B3_PLATFORM_DEFINITIONS_H\n"
111         "#ifdef __cplusplus\n"
112         "#else\n"
113         "#endif\n"
114         "#endif\n"
115         "#ifndef B3_FLOAT4_H\n"
116         "#ifdef __cplusplus\n"
117         "#else\n"
118         "#endif \n"
119         "#endif //B3_FLOAT4_H\n"
120         "#ifdef __cplusplus\n"
121         "#else\n"
122         "       typedef float4  b3Quat;\n"
123         "       #define b3QuatConstArg const b3Quat\n"
124         "       \n"
125         "       \n"
126         "inline float4 b3FastNormalize4(float4 v)\n"
127         "{\n"
128         "       v = (float4)(v.xyz,0.f);\n"
129         "       return fast_normalize(v);\n"
130         "}\n"
131         "       \n"
132         "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
133         "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n"
134         "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
135         "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
136         "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n"
137         "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
138         "{\n"
139         "       b3Quat ans;\n"
140         "       ans = b3Cross3( a, b );\n"
141         "       ans += a.w*b+b.w*a;\n"
142         "//     ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
143         "       ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
144         "       return ans;\n"
145         "}\n"
146         "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n"
147         "{\n"
148         "       b3Quat q;\n"
149         "       q=in;\n"
150         "       //return b3FastNormalize4(in);\n"
151         "       float len = native_sqrt(dot(q, q));\n"
152         "       if(len > 0.f)\n"
153         "       {\n"
154         "               q *= 1.f / len;\n"
155         "       }\n"
156         "       else\n"
157         "       {\n"
158         "               q.x = q.y = q.z = 0.f;\n"
159         "               q.w = 1.f;\n"
160         "       }\n"
161         "       return q;\n"
162         "}\n"
163         "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
164         "{\n"
165         "       b3Quat qInv = b3QuatInvert( q );\n"
166         "       float4 vcpy = vec;\n"
167         "       vcpy.w = 0.f;\n"
168         "       float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n"
169         "       return out;\n"
170         "}\n"
171         "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n"
172         "{\n"
173         "       return (b3Quat)(-q.xyz, q.w);\n"
174         "}\n"
175         "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n"
176         "{\n"
177         "       return (b3Quat)(-q.xyz, q.w);\n"
178         "}\n"
179         "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
180         "{\n"
181         "       return b3QuatRotate( b3QuatInvert( q ), vec );\n"
182         "}\n"
183         "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n"
184         "{\n"
185         "       return b3QuatRotate( orientation, point ) + (translation);\n"
186         "}\n"
187         "       \n"
188         "#endif \n"
189         "#endif //B3_QUAT_H\n"
190         "#ifndef B3_MAT3x3_H\n"
191         "#define B3_MAT3x3_H\n"
192         "#ifndef B3_QUAT_H\n"
193         "#ifdef __cplusplus\n"
194         "#else\n"
195         "#endif \n"
196         "#endif //B3_QUAT_H\n"
197         "#ifdef __cplusplus\n"
198         "#else\n"
199         "typedef struct\n"
200         "{\n"
201         "       b3Float4 m_row[3];\n"
202         "}b3Mat3x3;\n"
203         "#define b3Mat3x3ConstArg const b3Mat3x3\n"
204         "#define b3GetRow(m,row) (m.m_row[row])\n"
205         "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n"
206         "{\n"
207         "       b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
208         "       b3Mat3x3 out;\n"
209         "       out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
210         "       out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
211         "       out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"
212         "       out.m_row[0].w = 0.f;\n"
213         "       out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"
214         "       out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"
215         "       out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"
216         "       out.m_row[1].w = 0.f;\n"
217         "       out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"
218         "       out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"
219         "       out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"
220         "       out.m_row[2].w = 0.f;\n"
221         "       return out;\n"
222         "}\n"
223         "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n"
224         "{\n"
225         "       b3Mat3x3 out;\n"
226         "       out.m_row[0] = fabs(matIn.m_row[0]);\n"
227         "       out.m_row[1] = fabs(matIn.m_row[1]);\n"
228         "       out.m_row[2] = fabs(matIn.m_row[2]);\n"
229         "       return out;\n"
230         "}\n"
231         "__inline\n"
232         "b3Mat3x3 mtZero();\n"
233         "__inline\n"
234         "b3Mat3x3 mtIdentity();\n"
235         "__inline\n"
236         "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n"
237         "__inline\n"
238         "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n"
239         "__inline\n"
240         "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n"
241         "__inline\n"
242         "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n"
243         "__inline\n"
244         "b3Mat3x3 mtZero()\n"
245         "{\n"
246         "       b3Mat3x3 m;\n"
247         "       m.m_row[0] = (b3Float4)(0.f);\n"
248         "       m.m_row[1] = (b3Float4)(0.f);\n"
249         "       m.m_row[2] = (b3Float4)(0.f);\n"
250         "       return m;\n"
251         "}\n"
252         "__inline\n"
253         "b3Mat3x3 mtIdentity()\n"
254         "{\n"
255         "       b3Mat3x3 m;\n"
256         "       m.m_row[0] = (b3Float4)(1,0,0,0);\n"
257         "       m.m_row[1] = (b3Float4)(0,1,0,0);\n"
258         "       m.m_row[2] = (b3Float4)(0,0,1,0);\n"
259         "       return m;\n"
260         "}\n"
261         "__inline\n"
262         "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n"
263         "{\n"
264         "       b3Mat3x3 out;\n"
265         "       out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
266         "       out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
267         "       out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
268         "       return out;\n"
269         "}\n"
270         "__inline\n"
271         "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n"
272         "{\n"
273         "       b3Mat3x3 transB;\n"
274         "       transB = mtTranspose( b );\n"
275         "       b3Mat3x3 ans;\n"
276         "       //      why this doesn't run when 0ing in the for{}\n"
277         "       a.m_row[0].w = 0.f;\n"
278         "       a.m_row[1].w = 0.f;\n"
279         "       a.m_row[2].w = 0.f;\n"
280         "       for(int i=0; i<3; i++)\n"
281         "       {\n"
282         "//     a.m_row[i].w = 0.f;\n"
283         "               ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n"
284         "               ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n"
285         "               ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n"
286         "               ans.m_row[i].w = 0.f;\n"
287         "       }\n"
288         "       return ans;\n"
289         "}\n"
290         "__inline\n"
291         "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n"
292         "{\n"
293         "       b3Float4 ans;\n"
294         "       ans.x = b3Dot3F4( a.m_row[0], b );\n"
295         "       ans.y = b3Dot3F4( a.m_row[1], b );\n"
296         "       ans.z = b3Dot3F4( a.m_row[2], b );\n"
297         "       ans.w = 0.f;\n"
298         "       return ans;\n"
299         "}\n"
300         "__inline\n"
301         "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n"
302         "{\n"
303         "       b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
304         "       b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
305         "       b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
306         "       b3Float4 ans;\n"
307         "       ans.x = b3Dot3F4( a, colx );\n"
308         "       ans.y = b3Dot3F4( a, coly );\n"
309         "       ans.z = b3Dot3F4( a, colz );\n"
310         "       return ans;\n"
311         "}\n"
312         "#endif\n"
313         "#endif //B3_MAT3x3_H\n"
314         "typedef struct b3RigidBodyData b3RigidBodyData_t;\n"
315         "struct b3RigidBodyData\n"
316         "{\n"
317         "       b3Float4                                m_pos;\n"
318         "       b3Quat                                  m_quat;\n"
319         "       b3Float4                                m_linVel;\n"
320         "       b3Float4                                m_angVel;\n"
321         "       int                                     m_collidableIdx;\n"
322         "       float                           m_invMass;\n"
323         "       float                           m_restituitionCoeff;\n"
324         "       float                           m_frictionCoeff;\n"
325         "};\n"
326         "typedef struct b3InertiaData b3InertiaData_t;\n"
327         "struct b3InertiaData\n"
328         "{\n"
329         "       b3Mat3x3 m_invInertiaWorld;\n"
330         "       b3Mat3x3 m_initInvInertia;\n"
331         "};\n"
332         "#endif //B3_RIGIDBODY_DATA_H\n"
333         "       \n"
334         "#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n"
335         "#define B3_CONVEX_POLYHEDRON_DATA_H\n"
336         "#ifndef B3_FLOAT4_H\n"
337         "#ifdef __cplusplus\n"
338         "#else\n"
339         "#endif \n"
340         "#endif //B3_FLOAT4_H\n"
341         "#ifndef B3_QUAT_H\n"
342         "#ifdef __cplusplus\n"
343         "#else\n"
344         "#endif \n"
345         "#endif //B3_QUAT_H\n"
346         "typedef struct b3GpuFace b3GpuFace_t;\n"
347         "struct b3GpuFace\n"
348         "{\n"
349         "       b3Float4 m_plane;\n"
350         "       int m_indexOffset;\n"
351         "       int m_numIndices;\n"
352         "       int m_unusedPadding1;\n"
353         "       int m_unusedPadding2;\n"
354         "};\n"
355         "typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n"
356         "struct b3ConvexPolyhedronData\n"
357         "{\n"
358         "       b3Float4                m_localCenter;\n"
359         "       b3Float4                m_extents;\n"
360         "       b3Float4                mC;\n"
361         "       b3Float4                mE;\n"
362         "       float                   m_radius;\n"
363         "       int     m_faceOffset;\n"
364         "       int m_numFaces;\n"
365         "       int     m_numVertices;\n"
366         "       int m_vertexOffset;\n"
367         "       int     m_uniqueEdgesOffset;\n"
368         "       int     m_numUniqueEdges;\n"
369         "       int m_unused;\n"
370         "};\n"
371         "#endif //B3_CONVEX_POLYHEDRON_DATA_H\n"
372         "#ifndef B3_COLLIDABLE_H\n"
373         "#define B3_COLLIDABLE_H\n"
374         "#ifndef B3_FLOAT4_H\n"
375         "#ifdef __cplusplus\n"
376         "#else\n"
377         "#endif \n"
378         "#endif //B3_FLOAT4_H\n"
379         "#ifndef B3_QUAT_H\n"
380         "#ifdef __cplusplus\n"
381         "#else\n"
382         "#endif \n"
383         "#endif //B3_QUAT_H\n"
384         "enum b3ShapeTypes\n"
385         "{\n"
386         "       SHAPE_HEIGHT_FIELD=1,\n"
387         "       SHAPE_CONVEX_HULL=3,\n"
388         "       SHAPE_PLANE=4,\n"
389         "       SHAPE_CONCAVE_TRIMESH=5,\n"
390         "       SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n"
391         "       SHAPE_SPHERE=7,\n"
392         "       MAX_NUM_SHAPE_TYPES,\n"
393         "};\n"
394         "typedef struct b3Collidable b3Collidable_t;\n"
395         "struct b3Collidable\n"
396         "{\n"
397         "       union {\n"
398         "               int m_numChildShapes;\n"
399         "               int m_bvhIndex;\n"
400         "       };\n"
401         "       union\n"
402         "       {\n"
403         "               float m_radius;\n"
404         "               int     m_compoundBvhIndex;\n"
405         "       };\n"
406         "       int m_shapeType;\n"
407         "       int m_shapeIndex;\n"
408         "};\n"
409         "typedef struct b3GpuChildShape b3GpuChildShape_t;\n"
410         "struct b3GpuChildShape\n"
411         "{\n"
412         "       b3Float4        m_childPosition;\n"
413         "       b3Quat          m_childOrientation;\n"
414         "       int m_shapeIndex;\n"
415         "       int m_unused0;\n"
416         "       int m_unused1;\n"
417         "       int m_unused2;\n"
418         "};\n"
419         "struct b3CompoundOverlappingPair\n"
420         "{\n"
421         "       int m_bodyIndexA;\n"
422         "       int m_bodyIndexB;\n"
423         "//     int     m_pairType;\n"
424         "       int m_childShapeIndexA;\n"
425         "       int m_childShapeIndexB;\n"
426         "};\n"
427         "#endif //B3_COLLIDABLE_H\n"
428         "#ifdef __cplusplus\n"
429         "#else\n"
430         "#define B3_MPR_SQRT sqrt\n"
431         "#endif\n"
432         "#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n"
433         "#define B3_MPR_FABS fabs\n"
434         "#define B3_MPR_TOLERANCE 1E-6f\n"
435         "#define B3_MPR_MAX_ITERATIONS 1000\n"
436         "struct _b3MprSupport_t \n"
437         "{\n"
438         "    b3Float4 v;  //!< Support point in minkowski sum\n"
439         "    b3Float4 v1; //!< Support point in obj1\n"
440         "    b3Float4 v2; //!< Support point in obj2\n"
441         "};\n"
442         "typedef struct _b3MprSupport_t b3MprSupport_t;\n"
443         "struct _b3MprSimplex_t \n"
444         "{\n"
445         "    b3MprSupport_t ps[4];\n"
446         "    int last; //!< index of last added point\n"
447         "};\n"
448         "typedef struct _b3MprSimplex_t b3MprSimplex_t;\n"
449         "inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n"
450         "{\n"
451         "    return &s->ps[idx];\n"
452         "}\n"
453         "inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n"
454         "{\n"
455         "    s->last = size - 1;\n"
456         "}\n"
457         "inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n"
458         "{\n"
459         "    return s->last + 1;\n"
460         "}\n"
461         "inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n"
462         "{\n"
463         "    // here is no check on boundaries\n"
464         "    return &s->ps[idx];\n"
465         "}\n"
466         "inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n"
467         "{\n"
468         "    *d = *s;\n"
469         "}\n"
470         "inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n"
471         "{\n"
472         "    b3MprSupportCopy(s->ps + pos, a);\n"
473         "}\n"
474         "inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n"
475         "{\n"
476         "    b3MprSupport_t supp;\n"
477         "    b3MprSupportCopy(&supp, &s->ps[pos1]);\n"
478         "    b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n"
479         "    b3MprSupportCopy(&s->ps[pos2], &supp);\n"
480         "}\n"
481         "inline int b3MprIsZero(float val)\n"
482         "{\n"
483         "    return B3_MPR_FABS(val) < FLT_EPSILON;\n"
484         "}\n"
485         "inline int b3MprEq(float _a, float _b)\n"
486         "{\n"
487         "    float ab;\n"
488         "    float a, b;\n"
489         "    ab = B3_MPR_FABS(_a - _b);\n"
490         "    if (B3_MPR_FABS(ab) < FLT_EPSILON)\n"
491         "        return 1;\n"
492         "    a = B3_MPR_FABS(_a);\n"
493         "    b = B3_MPR_FABS(_b);\n"
494         "    if (b > a){\n"
495         "        return ab < FLT_EPSILON * b;\n"
496         "    }else{\n"
497         "        return ab < FLT_EPSILON * a;\n"
498         "    }\n"
499         "}\n"
500         "inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n"
501         "{\n"
502         "    return b3MprEq((*a).x, (*b).x)\n"
503         "            && b3MprEq((*a).y, (*b).y)\n"
504         "            && b3MprEq((*a).z, (*b).z);\n"
505         "}\n"
506         "inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull,     b3ConstArray(b3Float4) verticesA)\n"
507         "{\n"
508         "       b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n"
509         "       float maxDot = -B3_LARGE_FLOAT;\n"
510         "    if( 0 < hull->m_numVertices )\n"
511         "    {\n"
512         "        const b3Float4 scaled = supportVec;\n"
513         "               int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n"
514         "        return verticesA[hull->m_vertexOffset+index];\n"
515         "    }\n"
516         "    return supVec;\n"
517         "}\n"
518         "B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
519         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
520         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
521         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
522         "                                                                                                       __global b3Float4* sepAxis,\n"
523         "                                                                                                               const b3Float4* _dir, b3Float4* outp, int logme)\n"
524         "{\n"
525         "       //dir is in worldspace, move to local space\n"
526         "       \n"
527         "       b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n"
528         "       b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n"
529         "       \n"
530         "       b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n"
531         "       \n"
532         "       const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n"
533         "       \n"
534         "       //find local support vertex\n"
535         "       int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n"
536         "       \n"
537         "       b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n"
538         "       __global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n"
539         "       \n"
540         "       b3Float4 pInA;\n"
541         "       if (logme)\n"
542         "       {\n"
543         "               b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n"
544         "               float maxDot = -B3_LARGE_FLOAT;\n"
545         "               if( 0 < hull->m_numVertices )\n"
546         "               {\n"
547         "                       const b3Float4 scaled = localDir;\n"
548         "                       int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n"
549         "                       pInA = cpuVertices[hull->m_vertexOffset+index];\n"
550         "                       \n"
551         "               }\n"
552         "       } else\n"
553         "       {\n"
554         "               pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n"
555         "       }\n"
556         "       //move vertex to world space\n"
557         "       *outp = b3TransformPoint(pInA,pos,orn);\n"
558         "       \n"
559         "}\n"
560         "inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB,   b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
561         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
562         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
563         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
564         "                                                                                                       __global b3Float4* sepAxis,\n"
565         "                                                                                                       const b3Float4* _dir, b3MprSupport_t *supp)\n"
566         "{\n"
567         "    b3Float4 dir;\n"
568         "       dir = *_dir;\n"
569         "       b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n"
570         "    dir = *_dir*-1.f;\n"
571         "       b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n"
572         "    supp->v = supp->v1 - supp->v2;\n"
573         "}\n"
574         "inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n"
575         "{\n"
576         "    center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n"
577         "       center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n"
578         "    center->v = center->v1 - center->v2;\n"
579         "}\n"
580         "inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n"
581         "{\n"
582         "       (*v).x = x;\n"
583         "       (*v).y = y;\n"
584         "       (*v).z = z;\n"
585         "       (*v).w = 0.f;\n"
586         "}\n"
587         "inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n"
588         "{\n"
589         "    (*v).x += (*w).x;\n"
590         "    (*v).y += (*w).y;\n"
591         "    (*v).z += (*w).z;\n"
592         "}\n"
593         "inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n"
594         "{\n"
595         "    *v = *w;\n"
596         "}\n"
597         "inline void b3MprVec3Scale(b3Float4 *d, float k)\n"
598         "{\n"
599         "    *d *= k;\n"
600         "}\n"
601         "inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n"
602         "{\n"
603         "    float dot;\n"
604         "       dot = b3Dot3F4(*a,*b);\n"
605         "    return dot;\n"
606         "}\n"
607         "inline float b3MprVec3Len2(const b3Float4 *v)\n"
608         "{\n"
609         "    return b3MprVec3Dot(v, v);\n"
610         "}\n"
611         "inline void b3MprVec3Normalize(b3Float4 *d)\n"
612         "{\n"
613         "    float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n"
614         "    b3MprVec3Scale(d, k);\n"
615         "}\n"
616         "inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n"
617         "{\n"
618         "       *d = b3Cross3(*a,*b);\n"
619         "       \n"
620         "}\n"
621         "inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n"
622         "{\n"
623         "       *d = *v - *w;\n"
624         "}\n"
625         "inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n"
626         "{\n"
627         "    b3Float4 v2v1, v3v1;\n"
628         "    b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n"
629         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
630         "    b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n"
631         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
632         "    b3MprVec3Cross(dir, &v2v1, &v3v1);\n"
633         "    b3MprVec3Normalize(dir);\n"
634         "}\n"
635         "inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n"
636         "                                       const b3Float4 *dir)\n"
637         "{\n"
638         "    float dot;\n"
639         "    dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n"
640         "    return b3MprIsZero(dot) || dot > 0.f;\n"
641         "}\n"
642         "inline int portalReachTolerance(const b3MprSimplex_t *portal,\n"
643         "                                     const b3MprSupport_t *v4,\n"
644         "                                     const b3Float4 *dir)\n"
645         "{\n"
646         "    float dv1, dv2, dv3, dv4;\n"
647         "    float dot1, dot2, dot3;\n"
648         "    // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n"
649         "    dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n"
650         "    dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n"
651         "    dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n"
652         "    dv4 = b3MprVec3Dot(&v4->v, dir);\n"
653         "    dot1 = dv4 - dv1;\n"
654         "    dot2 = dv4 - dv2;\n"
655         "    dot3 = dv4 - dv3;\n"
656         "    dot1 = B3_MPR_FMIN(dot1, dot2);\n"
657         "    dot1 = B3_MPR_FMIN(dot1, dot3);\n"
658         "    return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n"
659         "}\n"
660         "inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal,   \n"
661         "                                         const b3MprSupport_t *v4,\n"
662         "                                         const b3Float4 *dir)\n"
663         "{\n"
664         "    float dot;\n"
665         "    dot = b3MprVec3Dot(&v4->v, dir);\n"
666         "    return b3MprIsZero(dot) || dot > 0.f;\n"
667         "}\n"
668         "inline void b3ExpandPortal(b3MprSimplex_t *portal,\n"
669         "                              const b3MprSupport_t *v4)\n"
670         "{\n"
671         "    float dot;\n"
672         "    b3Float4 v4v0;\n"
673         "    b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n"
674         "    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n"
675         "    if (dot > 0.f){\n"
676         "        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n"
677         "        if (dot > 0.f){\n"
678         "            b3MprSimplexSet(portal, 1, v4);\n"
679         "        }else{\n"
680         "            b3MprSimplexSet(portal, 3, v4);\n"
681         "        }\n"
682         "    }else{\n"
683         "        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n"
684         "        if (dot > 0.f){\n"
685         "            b3MprSimplexSet(portal, 2, v4);\n"
686         "        }else{\n"
687         "            b3MprSimplexSet(portal, 1, v4);\n"
688         "        }\n"
689         "    }\n"
690         "}\n"
691         "B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
692         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
693         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
694         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
695         "                                                                                                       __global b3Float4* sepAxis,\n"
696         "                                                                                                       __global int*   hasSepAxis,\n"
697         "                                                                                                       b3MprSimplex_t *portal)\n"
698         "{\n"
699         "    b3Float4 dir, va, vb;\n"
700         "    float dot;\n"
701         "    int cont;\n"
702         "       \n"
703         "       \n"
704         "    // vertex 0 is center of portal\n"
705         "    b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n"
706         "    // vertex 0 is center of portal\n"
707         "    b3MprSimplexSetSize(portal, 1);\n"
708         "       \n"
709         "       b3Float4 zero = b3MakeFloat4(0,0,0,0);\n"
710         "       b3Float4* b3mpr_vec3_origin = &zero;\n"
711         "    if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n"
712         "        // Portal's center lies on origin (0,0,0) => we know that objects\n"
713         "        // intersect but we would need to know penetration info.\n"
714         "        // So move center little bit...\n"
715         "        b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n"
716         "        b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n"
717         "    }\n"
718         "    // vertex 1 = support in direction of origin\n"
719         "    b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n"
720         "    b3MprVec3Scale(&dir, -1.f);\n"
721         "    b3MprVec3Normalize(&dir);\n"
722         "    b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n"
723         "    b3MprSimplexSetSize(portal, 2);\n"
724         "    // test if origin isn't outside of v1\n"
725         "    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n"
726         "       \n"
727         "    if (b3MprIsZero(dot) || dot < 0.f)\n"
728         "        return -1;\n"
729         "    // vertex 2\n"
730         "    b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n"
731         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
732         "    if (b3MprIsZero(b3MprVec3Len2(&dir))){\n"
733         "        if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n"
734         "            // origin lies on v1\n"
735         "            return 1;\n"
736         "        }else{\n"
737         "            // origin lies on v0-v1 segment\n"
738         "            return 2;\n"
739         "        }\n"
740         "    }\n"
741         "    b3MprVec3Normalize(&dir);\n"
742         "        b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n"
743         "    \n"
744         "    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n"
745         "    if (b3MprIsZero(dot) || dot < 0.f)\n"
746         "        return -1;\n"
747         "    b3MprSimplexSetSize(portal, 3);\n"
748         "    // vertex 3 direction\n"
749         "    b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n"
750         "                     &b3MprSimplexPoint(portal, 0)->v);\n"
751         "    b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n"
752         "                     &b3MprSimplexPoint(portal, 0)->v);\n"
753         "    b3MprVec3Cross(&dir, &va, &vb);\n"
754         "    b3MprVec3Normalize(&dir);\n"
755         "    // it is better to form portal faces to be oriented \"outside\" origin\n"
756         "    dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n"
757         "    if (dot > 0.f){\n"
758         "        b3MprSimplexSwap(portal, 1, 2);\n"
759         "        b3MprVec3Scale(&dir, -1.f);\n"
760         "    }\n"
761         "    while (b3MprSimplexSize(portal) < 4){\n"
762         "                b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n"
763         "        \n"
764         "        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n"
765         "        if (b3MprIsZero(dot) || dot < 0.f)\n"
766         "            return -1;\n"
767         "        cont = 0;\n"
768         "        // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n"
769         "        // continue\n"
770         "        b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n"
771         "                          &b3MprSimplexPoint(portal, 3)->v);\n"
772         "        dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n"
773         "        if (dot < 0.f && !b3MprIsZero(dot)){\n"
774         "            b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n"
775         "            cont = 1;\n"
776         "        }\n"
777         "        if (!cont){\n"
778         "            // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n"
779         "            // continue\n"
780         "            b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n"
781         "                              &b3MprSimplexPoint(portal, 2)->v);\n"
782         "            dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n"
783         "            if (dot < 0.f && !b3MprIsZero(dot)){\n"
784         "                b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n"
785         "                cont = 1;\n"
786         "            }\n"
787         "        }\n"
788         "        if (cont){\n"
789         "            b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n"
790         "                             &b3MprSimplexPoint(portal, 0)->v);\n"
791         "            b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n"
792         "                             &b3MprSimplexPoint(portal, 0)->v);\n"
793         "            b3MprVec3Cross(&dir, &va, &vb);\n"
794         "            b3MprVec3Normalize(&dir);\n"
795         "        }else{\n"
796         "            b3MprSimplexSetSize(portal, 4);\n"
797         "        }\n"
798         "    }\n"
799         "    return 0;\n"
800         "}\n"
801         "B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
802         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
803         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
804         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
805         "                                                                                                       __global b3Float4* sepAxis,\n"
806         "                                                                                                       b3MprSimplex_t *portal)\n"
807         "{\n"
808         "    b3Float4 dir;\n"
809         "    b3MprSupport_t v4;\n"
810         "       for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n"
811         "    //while (1)\n"
812         "       {\n"
813         "        // compute direction outside the portal (from v0 throught v1,v2,v3\n"
814         "        // face)\n"
815         "        b3PortalDir(portal, &dir);\n"
816         "        // test if origin is inside the portal\n"
817         "        if (portalEncapsulesOrigin(portal, &dir))\n"
818         "            return 0;\n"
819         "        // get next support point\n"
820         "        \n"
821         "                b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n"
822         "        // test if v4 can expand portal to contain origin and if portal\n"
823         "        // expanding doesn't reach given tolerance\n"
824         "        if (!portalCanEncapsuleOrigin(portal, &v4, &dir)\n"
825         "                || portalReachTolerance(portal, &v4, &dir))\n"
826         "               {\n"
827         "            return -1;\n"
828         "        }\n"
829         "        // v1-v2-v3 triangle must be rearranged to face outside Minkowski\n"
830         "        // difference (direction from v0).\n"
831         "        b3ExpandPortal(portal, &v4);\n"
832         "    }\n"
833         "    return -1;\n"
834         "}\n"
835         "B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)\n"
836         "{\n"
837         "       b3Float4 zero = b3MakeFloat4(0,0,0,0);\n"
838         "       b3Float4* b3mpr_vec3_origin = &zero;\n"
839         "    b3Float4 dir;\n"
840         "    size_t i;\n"
841         "    float b[4], sum, inv;\n"
842         "    b3Float4 vec, p1, p2;\n"
843         "    b3PortalDir(portal, &dir);\n"
844         "    // use barycentric coordinates of tetrahedron to find origin\n"
845         "    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n"
846         "                       &b3MprSimplexPoint(portal, 2)->v);\n"
847         "    b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n"
848         "    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n"
849         "                       &b3MprSimplexPoint(portal, 2)->v);\n"
850         "    b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n"
851         "    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n"
852         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
853         "    b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n"
854         "    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n"
855         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
856         "    b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n"
857         "       sum = b[0] + b[1] + b[2] + b[3];\n"
858         "    if (b3MprIsZero(sum) || sum < 0.f){\n"
859         "               b[0] = 0.f;\n"
860         "        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n"
861         "                           &b3MprSimplexPoint(portal, 3)->v);\n"
862         "        b[1] = b3MprVec3Dot(&vec, &dir);\n"
863         "        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n"
864         "                           &b3MprSimplexPoint(portal, 1)->v);\n"
865         "        b[2] = b3MprVec3Dot(&vec, &dir);\n"
866         "        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n"
867         "                           &b3MprSimplexPoint(portal, 2)->v);\n"
868         "        b[3] = b3MprVec3Dot(&vec, &dir);\n"
869         "               sum = b[1] + b[2] + b[3];\n"
870         "       }\n"
871         "       inv = 1.f / sum;\n"
872         "    b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n"
873         "    b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n"
874         "    for (i = 0; i < 4; i++){\n"
875         "        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n"
876         "        b3MprVec3Scale(&vec, b[i]);\n"
877         "        b3MprVec3Add(&p1, &vec);\n"
878         "        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n"
879         "        b3MprVec3Scale(&vec, b[i]);\n"
880         "        b3MprVec3Add(&p2, &vec);\n"
881         "    }\n"
882         "    b3MprVec3Scale(&p1, inv);\n"
883         "    b3MprVec3Scale(&p2, inv);\n"
884         "    b3MprVec3Copy(pos, &p1);\n"
885         "    b3MprVec3Add(pos, &p2);\n"
886         "    b3MprVec3Scale(pos, 0.5);\n"
887         "}\n"
888         "inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n"
889         "{\n"
890         "    b3Float4 ab;\n"
891         "    b3MprVec3Sub2(&ab, a, b);\n"
892         "    return b3MprVec3Len2(&ab);\n"
893         "}\n"
894         "inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n"
895         "                                                  const b3Float4 *x0,\n"
896         "                                                  const b3Float4 *b,\n"
897         "                                                  b3Float4 *witness)\n"
898         "{\n"
899         "    // The computation comes from solving equation of segment:\n"
900         "    //      S(t) = x0 + t.d\n"
901         "    //          where - x0 is initial point of segment\n"
902         "    //                - d is direction of segment from x0 (|d| > 0)\n"
903         "    //                - t belongs to <0, 1> interval\n"
904         "    // \n"
905         "    // Than, distance from a segment to some point P can be expressed:\n"
906         "    //      D(t) = |x0 + t.d - P|^2\n"
907         "    //          which is distance from any point on segment. Minimization\n"
908         "    //          of this function brings distance from P to segment.\n"
909         "    // Minimization of D(t) leads to simple quadratic equation that's\n"
910         "    // solving is straightforward.\n"
911         "    //\n"
912         "    // Bonus of this method is witness point for free.\n"
913         "    float dist, t;\n"
914         "    b3Float4 d, a;\n"
915         "    // direction of segment\n"
916         "    b3MprVec3Sub2(&d, b, x0);\n"
917         "    // precompute vector from P to x0\n"
918         "    b3MprVec3Sub2(&a, x0, P);\n"
919         "    t  = -1.f * b3MprVec3Dot(&a, &d);\n"
920         "    t /= b3MprVec3Len2(&d);\n"
921         "    if (t < 0.f || b3MprIsZero(t)){\n"
922         "        dist = b3MprVec3Dist2(x0, P);\n"
923         "        if (witness)\n"
924         "            b3MprVec3Copy(witness, x0);\n"
925         "    }else if (t > 1.f || b3MprEq(t, 1.f)){\n"
926         "        dist = b3MprVec3Dist2(b, P);\n"
927         "        if (witness)\n"
928         "            b3MprVec3Copy(witness, b);\n"
929         "    }else{\n"
930         "        if (witness){\n"
931         "            b3MprVec3Copy(witness, &d);\n"
932         "            b3MprVec3Scale(witness, t);\n"
933         "            b3MprVec3Add(witness, x0);\n"
934         "            dist = b3MprVec3Dist2(witness, P);\n"
935         "        }else{\n"
936         "            // recycling variables\n"
937         "            b3MprVec3Scale(&d, t);\n"
938         "            b3MprVec3Add(&d, &a);\n"
939         "            dist = b3MprVec3Len2(&d);\n"
940         "        }\n"
941         "    }\n"
942         "    return dist;\n"
943         "}\n"
944         "inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n"
945         "                                const b3Float4 *x0, const b3Float4 *B,\n"
946         "                                const b3Float4 *C,\n"
947         "                                b3Float4 *witness)\n"
948         "{\n"
949         "    // Computation comes from analytic expression for triangle (x0, B, C)\n"
950         "    //      T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n"
951         "    // Then equation for distance is:\n"
952         "    //      D(s, t) = | T(s, t) - P |^2\n"
953         "    // This leads to minimization of quadratic function of two variables.\n"
954         "    // The solution from is taken only if s is between 0 and 1, t is\n"
955         "    // between 0 and 1 and t + s < 1, otherwise distance from segment is\n"
956         "    // computed.\n"
957         "    b3Float4 d1, d2, a;\n"
958         "    float u, v, w, p, q, r;\n"
959         "    float s, t, dist, dist2;\n"
960         "    b3Float4 witness2;\n"
961         "    b3MprVec3Sub2(&d1, B, x0);\n"
962         "    b3MprVec3Sub2(&d2, C, x0);\n"
963         "    b3MprVec3Sub2(&a, x0, P);\n"
964         "    u = b3MprVec3Dot(&a, &a);\n"
965         "    v = b3MprVec3Dot(&d1, &d1);\n"
966         "    w = b3MprVec3Dot(&d2, &d2);\n"
967         "    p = b3MprVec3Dot(&a, &d1);\n"
968         "    q = b3MprVec3Dot(&a, &d2);\n"
969         "    r = b3MprVec3Dot(&d1, &d2);\n"
970         "    s = (q * r - w * p) / (w * v - r * r);\n"
971         "    t = (-s * r - q) / w;\n"
972         "    if ((b3MprIsZero(s) || s > 0.f)\n"
973         "            && (b3MprEq(s, 1.f) || s < 1.f)\n"
974         "            && (b3MprIsZero(t) || t > 0.f)\n"
975         "            && (b3MprEq(t, 1.f) || t < 1.f)\n"
976         "            && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n"
977         "        if (witness){\n"
978         "            b3MprVec3Scale(&d1, s);\n"
979         "            b3MprVec3Scale(&d2, t);\n"
980         "            b3MprVec3Copy(witness, x0);\n"
981         "            b3MprVec3Add(witness, &d1);\n"
982         "            b3MprVec3Add(witness, &d2);\n"
983         "            dist = b3MprVec3Dist2(witness, P);\n"
984         "        }else{\n"
985         "            dist  = s * s * v;\n"
986         "            dist += t * t * w;\n"
987         "            dist += 2.f * s * t * r;\n"
988         "            dist += 2.f * s * p;\n"
989         "            dist += 2.f * t * q;\n"
990         "            dist += u;\n"
991         "        }\n"
992         "    }else{\n"
993         "        dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n"
994         "        dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n"
995         "        if (dist2 < dist){\n"
996         "            dist = dist2;\n"
997         "            if (witness)\n"
998         "                b3MprVec3Copy(witness, &witness2);\n"
999         "        }\n"
1000         "        dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n"
1001         "        if (dist2 < dist){\n"
1002         "            dist = dist2;\n"
1003         "            if (witness)\n"
1004         "                b3MprVec3Copy(witness, &witness2);\n"
1005         "        }\n"
1006         "    }\n"
1007         "    return dist;\n"
1008         "}\n"
1009         "B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
1010         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
1011         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
1012         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
1013         "                                                                                                       __global b3Float4* sepAxis,\n"
1014         "                       b3MprSimplex_t *portal,\n"
1015         "                       float *depth, b3Float4 *pdir, b3Float4 *pos)\n"
1016         "{\n"
1017         "    b3Float4 dir;\n"
1018         "    b3MprSupport_t v4;\n"
1019         "    unsigned long iterations;\n"
1020         "       b3Float4 zero = b3MakeFloat4(0,0,0,0);\n"
1021         "       b3Float4* b3mpr_vec3_origin = &zero;\n"
1022         "    iterations = 1UL;\n"
1023         "       for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n"
1024         "    //while (1)\n"
1025         "       {\n"
1026         "        // compute portal direction and obtain next support point\n"
1027         "        b3PortalDir(portal, &dir);\n"
1028         "        \n"
1029         "                b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n"
1030         "        // reached tolerance -> find penetration info\n"
1031         "        if (portalReachTolerance(portal, &v4, &dir)\n"
1032         "                || iterations ==B3_MPR_MAX_ITERATIONS)\n"
1033         "               {\n"
1034         "            *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n"
1035         "            *depth = B3_MPR_SQRT(*depth);\n"
1036         "                       \n"
1037         "                       if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n"
1038         "                       {\n"
1039         "                               \n"
1040         "                               *pdir = dir;\n"
1041         "                       } \n"
1042         "                       b3MprVec3Normalize(pdir);\n"
1043         "                       \n"
1044         "            // barycentric coordinates:\n"
1045         "            b3FindPos(portal, pos);\n"
1046         "            return;\n"
1047         "        }\n"
1048         "        b3ExpandPortal(portal, &v4);\n"
1049         "        iterations++;\n"
1050         "    }\n"
1051         "}\n"
1052         "B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n"
1053         "{\n"
1054         "    // Touching contact on portal's v1 - so depth is zero and direction\n"
1055         "    // is unimportant and pos can be guessed\n"
1056         "    *depth = 0.f;\n"
1057         "    b3Float4 zero = b3MakeFloat4(0,0,0,0);\n"
1058         "       b3Float4* b3mpr_vec3_origin = &zero;\n"
1059         "       b3MprVec3Copy(dir, b3mpr_vec3_origin);\n"
1060         "    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n"
1061         "    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n"
1062         "    b3MprVec3Scale(pos, 0.5);\n"
1063         "}\n"
1064         "B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n"
1065         "                              float *depth, b3Float4 *dir, b3Float4 *pos)\n"
1066         "{\n"
1067         "    \n"
1068         "    // Origin lies on v0-v1 segment.\n"
1069         "    // Depth is distance to v1, direction also and position must be\n"
1070         "    // computed\n"
1071         "    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n"
1072         "    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n"
1073         "    b3MprVec3Scale(pos, 0.5f);\n"
1074         "    \n"
1075         "    b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n"
1076         "    *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n"
1077         "    b3MprVec3Normalize(dir);\n"
1078         "}\n"
1079         "inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n"
1080         "                                       b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n"
1081         "                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
1082         "                                       b3ConstArray(b3Collidable_t)    cpuCollidables,\n"
1083         "                                       b3ConstArray(b3Float4)  cpuVertices,\n"
1084         "                                       __global b3Float4* sepAxis,\n"
1085         "                                       __global int*   hasSepAxis,\n"
1086         "                                       float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n"
1087         "{\n"
1088         "       \n"
1089         "        b3MprSimplex_t portal;\n"
1090         "        \n"
1091         "//     if (!hasSepAxis[pairIndex])\n"
1092         "       //      return -1;\n"
1093         "       \n"
1094         "       hasSepAxis[pairIndex] = 0;\n"
1095         "        int res;\n"
1096         "    // Phase 1: Portal discovery\n"
1097         "    res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n"
1098         "       \n"
1099         "         \n"
1100         "       //sepAxis[pairIndex] = *pdir;//or -dir?\n"
1101         "       switch (res)\n"
1102         "       {\n"
1103         "       case 0:\n"
1104         "               {\n"
1105         "                       // Phase 2: Portal refinement\n"
1106         "               \n"
1107         "                       res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n"
1108         "                       if (res < 0)\n"
1109         "                               return -1;\n"
1110         "                       // Phase 3. Penetration info\n"
1111         "                       b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n"
1112         "                       hasSepAxis[pairIndex] = 1;\n"
1113         "                       sepAxis[pairIndex] = -*dirOut;\n"
1114         "                       break;\n"
1115         "               }\n"
1116         "       case 1:\n"
1117         "               {\n"
1118         "                        // Touching contact on portal's v1.\n"
1119         "                       b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n"
1120         "                       break;\n"
1121         "               }\n"
1122         "       case 2:\n"
1123         "               {\n"
1124         "                       \n"
1125         "                       b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n"
1126         "                       break;\n"
1127         "               }\n"
1128         "       default:\n"
1129         "               {\n"
1130         "                       hasSepAxis[pairIndex]=0;\n"
1131         "                       //if (res < 0)\n"
1132         "                       //{\n"
1133         "                               // Origin isn't inside portal - no collision.\n"
1134         "                               return -1;\n"
1135         "                       //}\n"
1136         "               }\n"
1137         "       };\n"
1138         "       \n"
1139         "       return 0;\n"
1140         "};\n"
1141         "#endif //B3_MPR_PENETRATION_H\n"
1142         "#ifndef B3_CONTACT4DATA_H\n"
1143         "#define B3_CONTACT4DATA_H\n"
1144         "#ifndef B3_FLOAT4_H\n"
1145         "#ifdef __cplusplus\n"
1146         "#else\n"
1147         "#endif \n"
1148         "#endif //B3_FLOAT4_H\n"
1149         "typedef  struct b3Contact4Data b3Contact4Data_t;\n"
1150         "struct b3Contact4Data\n"
1151         "{\n"
1152         "       b3Float4        m_worldPosB[4];\n"
1153         "//     b3Float4        m_localPosA[4];\n"
1154         "//     b3Float4        m_localPosB[4];\n"
1155         "       b3Float4        m_worldNormalOnB;       //      w: m_nPoints\n"
1156         "       unsigned short  m_restituitionCoeffCmp;\n"
1157         "       unsigned short  m_frictionCoeffCmp;\n"
1158         "       int m_batchIdx;\n"
1159         "       int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
1160         "       int m_bodyBPtrAndSignBit;\n"
1161         "       int     m_childIndexA;\n"
1162         "       int     m_childIndexB;\n"
1163         "       int m_unused1;\n"
1164         "       int m_unused2;\n"
1165         "};\n"
1166         "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
1167         "{\n"
1168         "       return (int)contact->m_worldNormalOnB.w;\n"
1169         "};\n"
1170         "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
1171         "{\n"
1172         "       contact->m_worldNormalOnB.w = (float)numPoints;\n"
1173         "};\n"
1174         "#endif //B3_CONTACT4DATA_H\n"
1175         "#define AppendInc(x, out) out = atomic_inc(x)\n"
1176         "#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n"
1177         "#ifdef cl_ext_atomic_counters_32\n"
1178         "       #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
1179         "#else\n"
1180         "       #define counter32_t volatile __global int*\n"
1181         "#endif\n"
1182         "__kernel void   mprPenetrationKernel( __global int4* pairs,\n"
1183         "                                                                                                                                                                       __global const b3RigidBodyData_t* rigidBodies, \n"
1184         "                                                                                                                                                                       __global const b3Collidable_t* collidables,\n"
1185         "                                                                                                                                                                       __global const b3ConvexPolyhedronData_t* convexShapes, \n"
1186         "                                                                                                                                                                       __global const float4* vertices,\n"
1187         "                                                                                                                                                                       __global float4* separatingNormals,\n"
1188         "                                                                                                                                                                       __global int* hasSeparatingAxis,\n"
1189         "                                                                                                                                                                       __global struct b3Contact4Data* restrict globalContactsOut,\n"
1190         "                                                                                                                                                                       counter32_t nGlobalContactsOut,\n"
1191         "                                                                                                                                                                       int contactCapacity,\n"
1192         "                                                                                                                                                                       int numPairs)\n"
1193         "{\n"
1194         "       int i = get_global_id(0);\n"
1195         "       int pairIndex = i;\n"
1196         "       if (i<numPairs)\n"
1197         "       {\n"
1198         "               int bodyIndexA = pairs[i].x;\n"
1199         "               int bodyIndexB = pairs[i].y;\n"
1200         "               int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
1201         "               int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
1202         "       \n"
1203         "               int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
1204         "               int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
1205         "               \n"
1206         "               \n"
1207         "               //once the broadphase avoids static-static pairs, we can remove this test\n"
1208         "               if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n"
1209         "               {\n"
1210         "                       return;\n"
1211         "               }\n"
1212         "               \n"
1213         "               if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n"
1214         "               {\n"
1215         "                       return;\n"
1216         "               }\n"
1217         "               float depthOut;\n"
1218         "               b3Float4 dirOut;\n"
1219         "               b3Float4 posOut;\n"
1220         "               int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut);\n"
1221         "               \n"
1222         "               \n"
1223         "               \n"
1224         "               \n"
1225         "               if (res==0)\n"
1226         "               {\n"
1227         "                       //add a contact\n"
1228         "                       int dstIdx;\n"
1229         "                       AppendInc( nGlobalContactsOut, dstIdx );\n"
1230         "                       if (dstIdx<contactCapacity)\n"
1231         "                       {\n"
1232         "                               pairs[pairIndex].z = dstIdx;\n"
1233         "                               __global struct b3Contact4Data* c = globalContactsOut + dstIdx;\n"
1234         "                               c->m_worldNormalOnB = -dirOut;//normal;\n"
1235         "                               c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
1236         "                               c->m_batchIdx = pairIndex;\n"
1237         "                               int bodyA = pairs[pairIndex].x;\n"
1238         "                               int bodyB = pairs[pairIndex].y;\n"
1239         "                               c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n"
1240         "                               c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n"
1241         "                               c->m_childIndexA = -1;\n"
1242         "                               c->m_childIndexB = -1;\n"
1243         "                               //for (int i=0;i<nContacts;i++)\n"
1244         "                               posOut.w = -depthOut;\n"
1245         "                               c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n"
1246         "                               GET_NPOINTS(*c) = 1;//nContacts;\n"
1247         "                       }\n"
1248         "               }\n"
1249         "       }\n"
1250         "}\n"
1251         "typedef float4 Quaternion;\n"
1252         "#define make_float4 (float4)\n"
1253         "__inline\n"
1254         "float dot3F4(float4 a, float4 b)\n"
1255         "{\n"
1256         "       float4 a1 = make_float4(a.xyz,0.f);\n"
1257         "       float4 b1 = make_float4(b.xyz,0.f);\n"
1258         "       return dot(a1, b1);\n"
1259         "}\n"
1260         "__inline\n"
1261         "float4 cross3(float4 a, float4 b)\n"
1262         "{\n"
1263         "       return cross(a,b);\n"
1264         "}\n"
1265         "__inline\n"
1266         "Quaternion qtMul(Quaternion a, Quaternion b)\n"
1267         "{\n"
1268         "       Quaternion ans;\n"
1269         "       ans = cross3( a, b );\n"
1270         "       ans += a.w*b+b.w*a;\n"
1271         "//     ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
1272         "       ans.w = a.w*b.w - dot3F4(a, b);\n"
1273         "       return ans;\n"
1274         "}\n"
1275         "__inline\n"
1276         "Quaternion qtInvert(Quaternion q)\n"
1277         "{\n"
1278         "       return (Quaternion)(-q.xyz, q.w);\n"
1279         "}\n"
1280         "__inline\n"
1281         "float4 qtRotate(Quaternion q, float4 vec)\n"
1282         "{\n"
1283         "       Quaternion qInv = qtInvert( q );\n"
1284         "       float4 vcpy = vec;\n"
1285         "       vcpy.w = 0.f;\n"
1286         "       float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
1287         "       return out;\n"
1288         "}\n"
1289         "__inline\n"
1290         "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
1291         "{\n"
1292         "       return qtRotate( *orientation, *p ) + (*translation);\n"
1293         "}\n"
1294         "__inline\n"
1295         "float4 qtInvRotate(const Quaternion q, float4 vec)\n"
1296         "{\n"
1297         "       return qtRotate( qtInvert( q ), vec );\n"
1298         "}\n"
1299         "inline void project(__global const b3ConvexPolyhedronData_t* hull,  const float4 pos, const float4 orn, \n"
1300         "const float4* dir, __global const float4* vertices, float* min, float* max)\n"
1301         "{\n"
1302         "       min[0] = FLT_MAX;\n"
1303         "       max[0] = -FLT_MAX;\n"
1304         "       int numVerts = hull->m_numVertices;\n"
1305         "       const float4 localDir = qtInvRotate(orn,*dir);\n"
1306         "       float offset = dot(pos,*dir);\n"
1307         "       for(int i=0;i<numVerts;i++)\n"
1308         "       {\n"
1309         "               float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n"
1310         "               if(dp < min[0]) \n"
1311         "                       min[0] = dp;\n"
1312         "               if(dp > max[0]) \n"
1313         "                       max[0] = dp;\n"
1314         "       }\n"
1315         "       if(min[0]>max[0])\n"
1316         "       {\n"
1317         "               float tmp = min[0];\n"
1318         "               min[0] = max[0];\n"
1319         "               max[0] = tmp;\n"
1320         "       }\n"
1321         "       min[0] += offset;\n"
1322         "       max[0] += offset;\n"
1323         "}\n"
1324         "bool findSeparatingAxisUnitSphere(     __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n"
1325         "       const float4 posA1,\n"
1326         "       const float4 ornA,\n"
1327         "       const float4 posB1,\n"
1328         "       const float4 ornB,\n"
1329         "       const float4 DeltaC2,\n"
1330         "       __global const float4* vertices,\n"
1331         "       __global const float4* unitSphereDirections,\n"
1332         "       int numUnitSphereDirections,\n"
1333         "       float4* sep,\n"
1334         "       float* dmin)\n"
1335         "{\n"
1336         "       \n"
1337         "       float4 posA = posA1;\n"
1338         "       posA.w = 0.f;\n"
1339         "       float4 posB = posB1;\n"
1340         "       posB.w = 0.f;\n"
1341         "       int curPlaneTests=0;\n"
1342         "       int curEdgeEdge = 0;\n"
1343         "       // Test unit sphere directions\n"
1344         "       for (int i=0;i<numUnitSphereDirections;i++)\n"
1345         "       {\n"
1346         "               float4 crossje;\n"
1347         "               crossje = unitSphereDirections[i];      \n"
1348         "               if (dot3F4(DeltaC2,crossje)>0)\n"
1349         "                       crossje *= -1.f;\n"
1350         "               {\n"
1351         "                       float dist;\n"
1352         "                       bool result = true;\n"
1353         "                       float Min0,Max0;\n"
1354         "                       float Min1,Max1;\n"
1355         "                       project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n"
1356         "                       project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n"
1357         "               \n"
1358         "                       if(Max0<Min1 || Max1<Min0)\n"
1359         "                               return false;\n"
1360         "               \n"
1361         "                       float d0 = Max0 - Min1;\n"
1362         "                       float d1 = Max1 - Min0;\n"
1363         "                       dist = d0<d1 ? d0:d1;\n"
1364         "                       result = true;\n"
1365         "       \n"
1366         "                       if(dist<*dmin)\n"
1367         "                       {\n"
1368         "                               *dmin = dist;\n"
1369         "                               *sep = crossje;\n"
1370         "                       }\n"
1371         "               }\n"
1372         "       }\n"
1373         "       \n"
1374         "       if((dot3F4(-DeltaC2,*sep))>0.0f)\n"
1375         "       {\n"
1376         "               *sep = -(*sep);\n"
1377         "       }\n"
1378         "       return true;\n"
1379         "}\n"
1380         "__kernel void   findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n"
1381         "                                                                                                                                                                       __global const b3RigidBodyData_t* rigidBodies, \n"
1382         "                                                                                                                                                                       __global const b3Collidable_t* collidables,\n"
1383         "                                                                                                                                                                       __global const b3ConvexPolyhedronData_t* convexShapes, \n"
1384         "                                                                                                                                                                       __global const float4* vertices,\n"
1385         "                                                                                                                                                                       __global const float4* unitSphereDirections,\n"
1386         "                                                                                                                                                                       __global  float4* separatingNormals,\n"
1387         "                                                                                                                                                                       __global  int* hasSeparatingAxis,\n"
1388         "                                                                                                                                                                       __global  float* dmins,\n"
1389         "                                                                                                                                                                       int numUnitSphereDirections,\n"
1390         "                                                                                                                                                                       int numPairs\n"
1391         "                                                                                                                                                                       )\n"
1392         "{\n"
1393         "       int i = get_global_id(0);\n"
1394         "       \n"
1395         "       if (i<numPairs)\n"
1396         "       {\n"
1397         "               if (hasSeparatingAxis[i])\n"
1398         "               {\n"
1399         "       \n"
1400         "                       int bodyIndexA = pairs[i].x;\n"
1401         "                       int bodyIndexB = pairs[i].y;\n"
1402         "       \n"
1403         "                       int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
1404         "                       int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
1405         "               \n"
1406         "                       int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
1407         "                       int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
1408         "                       \n"
1409         "                       \n"
1410         "                       int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n"
1411         "       \n"
1412         "                       float dmin = dmins[i];\n"
1413         "       \n"
1414         "                       float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
1415         "                       posA.w = 0.f;\n"
1416         "                       float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
1417         "                       posB.w = 0.f;\n"
1418         "                       float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n"
1419         "                       float4 ornA = rigidBodies[bodyIndexA].m_quat;\n"
1420         "                       float4 c0 = transform(&c0local, &posA, &ornA);\n"
1421         "                       float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n"
1422         "                       float4 ornB =rigidBodies[bodyIndexB].m_quat;\n"
1423         "                       float4 c1 = transform(&c1local,&posB,&ornB);\n"
1424         "                       const float4 DeltaC2 = c0 - c1;\n"
1425         "                       float4 sepNormal = separatingNormals[i];\n"
1426         "                       \n"
1427         "                       int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n"
1428         "                       if (numEdgeEdgeDirections>numUnitSphereDirections)\n"
1429         "                       {\n"
1430         "                               bool sepEE = findSeparatingAxisUnitSphere(      &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n"
1431         "                                                                                                                                                                                                               posB,ornB,\n"
1432         "                                                                                                                                                                                                               DeltaC2,\n"
1433         "                                                                                                                                                                                                               vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n"
1434         "                               if (!sepEE)\n"
1435         "                               {\n"
1436         "                                       hasSeparatingAxis[i] = 0;\n"
1437         "                               } else\n"
1438         "                               {\n"
1439         "                                       hasSeparatingAxis[i] = 1;\n"
1440         "                                       separatingNormals[i] = sepNormal;\n"
1441         "                               }\n"
1442         "                       }\n"
1443         "               }               //if (hasSeparatingAxis[i])\n"
1444         "       }//(i<numPairs)\n"
1445         "}\n";