dali-physics/third-party/bullet3/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h

   1 //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
   2 static const char* mprKernelsCL =
   3         "/***\n"
   4         " * ---------------------------------\n"
   5         " * Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>\n"
   6         " *\n"
   7         " *  This file was ported from mpr.c file, part of libccd.\n"
   8         " *  The Minkoski Portal Refinement implementation was ported \n"
   9         " *  to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n"
  10         " *  at http://github.com/erwincoumans/bullet3\n"
  11         " *\n"
  12         " *  Distributed under the OSI-approved BSD License (the \"License\");\n"
  13         " *  see <http://www.opensource.org/licenses/bsd-license.php>.\n"
  14         " *  This software is distributed WITHOUT ANY WARRANTY; without even the\n"
  15         " *  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
  16         " *  See the License for more information.\n"
  17         " */\n"
  18         "#ifndef B3_MPR_PENETRATION_H\n"
  19         "#define B3_MPR_PENETRATION_H\n"
  20         "#ifndef B3_PLATFORM_DEFINITIONS_H\n"
  21         "#define B3_PLATFORM_DEFINITIONS_H\n"
  22         "struct MyTest\n"
  23         "{\n"
  24         "       int bla;\n"
  25         "};\n"
  26         "#ifdef __cplusplus\n"
  27         "#else\n"
  28         "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n"
  29         "#define B3_LARGE_FLOAT 1e18f\n"
  30         "#define B3_INFINITY 1e18f\n"
  31         "#define b3Assert(a)\n"
  32         "#define b3ConstArray(a) __global const a*\n"
  33         "#define b3AtomicInc atomic_inc\n"
  34         "#define b3AtomicAdd atomic_add\n"
  35         "#define b3Fabs fabs\n"
  36         "#define b3Sqrt native_sqrt\n"
  37         "#define b3Sin native_sin\n"
  38         "#define b3Cos native_cos\n"
  39         "#define B3_STATIC\n"
  40         "#endif\n"
  41         "#endif\n"
  42         "#ifndef B3_FLOAT4_H\n"
  43         "#define B3_FLOAT4_H\n"
  44         "#ifndef B3_PLATFORM_DEFINITIONS_H\n"
  45         "#ifdef __cplusplus\n"
  46         "#else\n"
  47         "#endif\n"
  48         "#endif\n"
  49         "#ifdef __cplusplus\n"
  50         "#else\n"
  51         "       typedef float4  b3Float4;\n"
  52         "       #define b3Float4ConstArg const b3Float4\n"
  53         "       #define b3MakeFloat4 (float4)\n"
  54         "       float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
  55         "       {\n"
  56         "               float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
  57         "               float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
  58         "               return dot(a1, b1);\n"
  59         "       }\n"
  60         "       b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n"
  61         "       {\n"
  62         "               float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n"
  63         "               float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
  64         "               return cross(a1, b1);\n"
  65         "       }\n"
  66         "       #define b3MinFloat4 min\n"
  67         "       #define b3MaxFloat4 max\n"
  68         "       #define b3Normalized(a) normalize(a)\n"
  69         "#endif \n"
  70         "               \n"
  71         "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"
  72         "{\n"
  73         "       if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)    \n"
  74         "               return false;\n"
  75         "       return true;\n"
  76         "}\n"
  77         "inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n"
  78         "{\n"
  79         "    float maxDot = -B3_INFINITY;\n"
  80         "    int i = 0;\n"
  81         "    int ptIndex = -1;\n"
  82         "    for( i = 0; i < vecLen; i++ )\n"
  83         "    {\n"
  84         "        float dot = b3Dot3F4(vecArray[i],vec);\n"
  85         "            \n"
  86         "        if( dot > maxDot )\n"
  87         "        {\n"
  88         "            maxDot = dot;\n"
  89         "            ptIndex = i;\n"
  90         "        }\n"
  91         "    }\n"
  92         "       b3Assert(ptIndex>=0);\n"
  93         "    if (ptIndex<0)\n"
  94         "       {\n"
  95         "               ptIndex = 0;\n"
  96         "       }\n"
  97         "    *dotOut = maxDot;\n"
  98         "    return ptIndex;\n"
  99         "}\n"
 100         "#endif //B3_FLOAT4_H\n"
 101         "#ifndef B3_RIGIDBODY_DATA_H\n"
 102         "#define B3_RIGIDBODY_DATA_H\n"
 103         "#ifndef B3_FLOAT4_H\n"
 104         "#ifdef __cplusplus\n"
 105         "#else\n"
 106         "#endif \n"
 107         "#endif //B3_FLOAT4_H\n"
 108         "#ifndef B3_QUAT_H\n"
 109         "#define B3_QUAT_H\n"
 110         "#ifndef B3_PLATFORM_DEFINITIONS_H\n"
 111         "#ifdef __cplusplus\n"
 112         "#else\n"
 113         "#endif\n"
 114         "#endif\n"
 115         "#ifndef B3_FLOAT4_H\n"
 116         "#ifdef __cplusplus\n"
 117         "#else\n"
 118         "#endif \n"
 119         "#endif //B3_FLOAT4_H\n"
 120         "#ifdef __cplusplus\n"
 121         "#else\n"
 122         "       typedef float4  b3Quat;\n"
 123         "       #define b3QuatConstArg const b3Quat\n"
 124         "       \n"
 125         "       \n"
 126         "inline float4 b3FastNormalize4(float4 v)\n"
 127         "{\n"
 128         "       v = (float4)(v.xyz,0.f);\n"
 129         "       return fast_normalize(v);\n"
 130         "}\n"
 131         "       \n"
 132         "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n"
 133         "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n"
 134         "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n"
 135         "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n"
 136         "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n"
 137         "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n"
 138         "{\n"
 139         "       b3Quat ans;\n"
 140         "       ans = b3Cross3( a, b );\n"
 141         "       ans += a.w*b+b.w*a;\n"
 142         "//     ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
 143         "       ans.w = a.w*b.w - b3Dot3F4(a, b);\n"
 144         "       return ans;\n"
 145         "}\n"
 146         "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n"
 147         "{\n"
 148         "       b3Quat q;\n"
 149         "       q=in;\n"
 150         "       //return b3FastNormalize4(in);\n"
 151         "       float len = native_sqrt(dot(q, q));\n"
 152         "       if(len > 0.f)\n"
 153         "       {\n"
 154         "               q *= 1.f / len;\n"
 155         "       }\n"
 156         "       else\n"
 157         "       {\n"
 158         "               q.x = q.y = q.z = 0.f;\n"
 159         "               q.w = 1.f;\n"
 160         "       }\n"
 161         "       return q;\n"
 162         "}\n"
 163         "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
 164         "{\n"
 165         "       b3Quat qInv = b3QuatInvert( q );\n"
 166         "       float4 vcpy = vec;\n"
 167         "       vcpy.w = 0.f;\n"
 168         "       float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n"
 169         "       return out;\n"
 170         "}\n"
 171         "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n"
 172         "{\n"
 173         "       return (b3Quat)(-q.xyz, q.w);\n"
 174         "}\n"
 175         "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n"
 176         "{\n"
 177         "       return (b3Quat)(-q.xyz, q.w);\n"
 178         "}\n"
 179         "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n"
 180         "{\n"
 181         "       return b3QuatRotate( b3QuatInvert( q ), vec );\n"
 182         "}\n"
 183         "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n"
 184         "{\n"
 185         "       return b3QuatRotate( orientation, point ) + (translation);\n"
 186         "}\n"
 187         "       \n"
 188         "#endif \n"
 189         "#endif //B3_QUAT_H\n"
 190         "#ifndef B3_MAT3x3_H\n"
 191         "#define B3_MAT3x3_H\n"
 192         "#ifndef B3_QUAT_H\n"
 193         "#ifdef __cplusplus\n"
 194         "#else\n"
 195         "#endif \n"
 196         "#endif //B3_QUAT_H\n"
 197         "#ifdef __cplusplus\n"
 198         "#else\n"
 199         "typedef struct\n"
 200         "{\n"
 201         "       b3Float4 m_row[3];\n"
 202         "}b3Mat3x3;\n"
 203         "#define b3Mat3x3ConstArg const b3Mat3x3\n"
 204         "#define b3GetRow(m,row) (m.m_row[row])\n"
 205         "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n"
 206         "{\n"
 207         "       b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
 208         "       b3Mat3x3 out;\n"
 209         "       out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n"
 210         "       out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n"
 211         "       out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n"
 212         "       out.m_row[0].w = 0.f;\n"
 213         "       out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n"
 214         "       out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n"
 215         "       out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n"
 216         "       out.m_row[1].w = 0.f;\n"
 217         "       out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n"
 218         "       out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n"
 219         "       out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n"
 220         "       out.m_row[2].w = 0.f;\n"
 221         "       return out;\n"
 222         "}\n"
 223         "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n"
 224         "{\n"
 225         "       b3Mat3x3 out;\n"
 226         "       out.m_row[0] = fabs(matIn.m_row[0]);\n"
 227         "       out.m_row[1] = fabs(matIn.m_row[1]);\n"
 228         "       out.m_row[2] = fabs(matIn.m_row[2]);\n"
 229         "       return out;\n"
 230         "}\n"
 231         "__inline\n"
 232         "b3Mat3x3 mtZero();\n"
 233         "__inline\n"
 234         "b3Mat3x3 mtIdentity();\n"
 235         "__inline\n"
 236         "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n"
 237         "__inline\n"
 238         "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n"
 239         "__inline\n"
 240         "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n"
 241         "__inline\n"
 242         "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n"
 243         "__inline\n"
 244         "b3Mat3x3 mtZero()\n"
 245         "{\n"
 246         "       b3Mat3x3 m;\n"
 247         "       m.m_row[0] = (b3Float4)(0.f);\n"
 248         "       m.m_row[1] = (b3Float4)(0.f);\n"
 249         "       m.m_row[2] = (b3Float4)(0.f);\n"
 250         "       return m;\n"
 251         "}\n"
 252         "__inline\n"
 253         "b3Mat3x3 mtIdentity()\n"
 254         "{\n"
 255         "       b3Mat3x3 m;\n"
 256         "       m.m_row[0] = (b3Float4)(1,0,0,0);\n"
 257         "       m.m_row[1] = (b3Float4)(0,1,0,0);\n"
 258         "       m.m_row[2] = (b3Float4)(0,0,1,0);\n"
 259         "       return m;\n"
 260         "}\n"
 261         "__inline\n"
 262         "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n"
 263         "{\n"
 264         "       b3Mat3x3 out;\n"
 265         "       out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
 266         "       out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
 267         "       out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
 268         "       return out;\n"
 269         "}\n"
 270         "__inline\n"
 271         "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n"
 272         "{\n"
 273         "       b3Mat3x3 transB;\n"
 274         "       transB = mtTranspose( b );\n"
 275         "       b3Mat3x3 ans;\n"
 276         "       //      why this doesn't run when 0ing in the for{}\n"
 277         "       a.m_row[0].w = 0.f;\n"
 278         "       a.m_row[1].w = 0.f;\n"
 279         "       a.m_row[2].w = 0.f;\n"
 280         "       for(int i=0; i<3; i++)\n"
 281         "       {\n"
 282         "//     a.m_row[i].w = 0.f;\n"
 283         "               ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n"
 284         "               ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n"
 285         "               ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n"
 286         "               ans.m_row[i].w = 0.f;\n"
 287         "       }\n"
 288         "       return ans;\n"
 289         "}\n"
 290         "__inline\n"
 291         "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n"
 292         "{\n"
 293         "       b3Float4 ans;\n"
 294         "       ans.x = b3Dot3F4( a.m_row[0], b );\n"
 295         "       ans.y = b3Dot3F4( a.m_row[1], b );\n"
 296         "       ans.z = b3Dot3F4( a.m_row[2], b );\n"
 297         "       ans.w = 0.f;\n"
 298         "       return ans;\n"
 299         "}\n"
 300         "__inline\n"
 301         "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n"
 302         "{\n"
 303         "       b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n"
 304         "       b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n"
 305         "       b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n"
 306         "       b3Float4 ans;\n"
 307         "       ans.x = b3Dot3F4( a, colx );\n"
 308         "       ans.y = b3Dot3F4( a, coly );\n"
 309         "       ans.z = b3Dot3F4( a, colz );\n"
 310         "       return ans;\n"
 311         "}\n"
 312         "#endif\n"
 313         "#endif //B3_MAT3x3_H\n"
 314         "typedef struct b3RigidBodyData b3RigidBodyData_t;\n"
 315         "struct b3RigidBodyData\n"
 316         "{\n"
 317         "       b3Float4                                m_pos;\n"
 318         "       b3Quat                                  m_quat;\n"
 319         "       b3Float4                                m_linVel;\n"
 320         "       b3Float4                                m_angVel;\n"
 321         "       int                                     m_collidableIdx;\n"
 322         "       float                           m_invMass;\n"
 323         "       float                           m_restituitionCoeff;\n"
 324         "       float                           m_frictionCoeff;\n"
 325         "};\n"
 326         "typedef struct b3InertiaData b3InertiaData_t;\n"
 327         "struct b3InertiaData\n"
 328         "{\n"
 329         "       b3Mat3x3 m_invInertiaWorld;\n"
 330         "       b3Mat3x3 m_initInvInertia;\n"
 331         "};\n"
 332         "#endif //B3_RIGIDBODY_DATA_H\n"
 333         "       \n"
 334         "#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n"
 335         "#define B3_CONVEX_POLYHEDRON_DATA_H\n"
 336         "#ifndef B3_FLOAT4_H\n"
 337         "#ifdef __cplusplus\n"
 338         "#else\n"
 339         "#endif \n"
 340         "#endif //B3_FLOAT4_H\n"
 341         "#ifndef B3_QUAT_H\n"
 342         "#ifdef __cplusplus\n"
 343         "#else\n"
 344         "#endif \n"
 345         "#endif //B3_QUAT_H\n"
 346         "typedef struct b3GpuFace b3GpuFace_t;\n"
 347         "struct b3GpuFace\n"
 348         "{\n"
 349         "       b3Float4 m_plane;\n"
 350         "       int m_indexOffset;\n"
 351         "       int m_numIndices;\n"
 352         "       int m_unusedPadding1;\n"
 353         "       int m_unusedPadding2;\n"
 354         "};\n"
 355         "typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n"
 356         "struct b3ConvexPolyhedronData\n"
 357         "{\n"
 358         "       b3Float4                m_localCenter;\n"
 359         "       b3Float4                m_extents;\n"
 360         "       b3Float4                mC;\n"
 361         "       b3Float4                mE;\n"
 362         "       float                   m_radius;\n"
 363         "       int     m_faceOffset;\n"
 364         "       int m_numFaces;\n"
 365         "       int     m_numVertices;\n"
 366         "       int m_vertexOffset;\n"
 367         "       int     m_uniqueEdgesOffset;\n"
 368         "       int     m_numUniqueEdges;\n"
 369         "       int m_unused;\n"
 370         "};\n"
 371         "#endif //B3_CONVEX_POLYHEDRON_DATA_H\n"
 372         "#ifndef B3_COLLIDABLE_H\n"
 373         "#define B3_COLLIDABLE_H\n"
 374         "#ifndef B3_FLOAT4_H\n"
 375         "#ifdef __cplusplus\n"
 376         "#else\n"
 377         "#endif \n"
 378         "#endif //B3_FLOAT4_H\n"
 379         "#ifndef B3_QUAT_H\n"
 380         "#ifdef __cplusplus\n"
 381         "#else\n"
 382         "#endif \n"
 383         "#endif //B3_QUAT_H\n"
 384         "enum b3ShapeTypes\n"
 385         "{\n"
 386         "       SHAPE_HEIGHT_FIELD=1,\n"
 387         "       SHAPE_CONVEX_HULL=3,\n"
 388         "       SHAPE_PLANE=4,\n"
 389         "       SHAPE_CONCAVE_TRIMESH=5,\n"
 390         "       SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n"
 391         "       SHAPE_SPHERE=7,\n"
 392         "       MAX_NUM_SHAPE_TYPES,\n"
 393         "};\n"
 394         "typedef struct b3Collidable b3Collidable_t;\n"
 395         "struct b3Collidable\n"
 396         "{\n"
 397         "       union {\n"
 398         "               int m_numChildShapes;\n"
 399         "               int m_bvhIndex;\n"
 400         "       };\n"
 401         "       union\n"
 402         "       {\n"
 403         "               float m_radius;\n"
 404         "               int     m_compoundBvhIndex;\n"
 405         "       };\n"
 406         "       int m_shapeType;\n"
 407         "       int m_shapeIndex;\n"
 408         "};\n"
 409         "typedef struct b3GpuChildShape b3GpuChildShape_t;\n"
 410         "struct b3GpuChildShape\n"
 411         "{\n"
 412         "       b3Float4        m_childPosition;\n"
 413         "       b3Quat          m_childOrientation;\n"
 414         "       int m_shapeIndex;\n"
 415         "       int m_unused0;\n"
 416         "       int m_unused1;\n"
 417         "       int m_unused2;\n"
 418         "};\n"
 419         "struct b3CompoundOverlappingPair\n"
 420         "{\n"
 421         "       int m_bodyIndexA;\n"
 422         "       int m_bodyIndexB;\n"
 423         "//     int     m_pairType;\n"
 424         "       int m_childShapeIndexA;\n"
 425         "       int m_childShapeIndexB;\n"
 426         "};\n"
 427         "#endif //B3_COLLIDABLE_H\n"
 428         "#ifdef __cplusplus\n"
 429         "#else\n"
 430         "#define B3_MPR_SQRT sqrt\n"
 431         "#endif\n"
 432         "#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n"
 433         "#define B3_MPR_FABS fabs\n"
 434         "#define B3_MPR_TOLERANCE 1E-6f\n"
 435         "#define B3_MPR_MAX_ITERATIONS 1000\n"
 436         "struct _b3MprSupport_t \n"
 437         "{\n"
 438         "    b3Float4 v;  //!< Support point in minkowski sum\n"
 439         "    b3Float4 v1; //!< Support point in obj1\n"
 440         "    b3Float4 v2; //!< Support point in obj2\n"
 441         "};\n"
 442         "typedef struct _b3MprSupport_t b3MprSupport_t;\n"
 443         "struct _b3MprSimplex_t \n"
 444         "{\n"
 445         "    b3MprSupport_t ps[4];\n"
 446         "    int last; //!< index of last added point\n"
 447         "};\n"
 448         "typedef struct _b3MprSimplex_t b3MprSimplex_t;\n"
 449         "inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n"
 450         "{\n"
 451         "    return &s->ps[idx];\n"
 452         "}\n"
 453         "inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n"
 454         "{\n"
 455         "    s->last = size - 1;\n"
 456         "}\n"
 457         "inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n"
 458         "{\n"
 459         "    return s->last + 1;\n"
 460         "}\n"
 461         "inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n"
 462         "{\n"
 463         "    // here is no check on boundaries\n"
 464         "    return &s->ps[idx];\n"
 465         "}\n"
 466         "inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n"
 467         "{\n"
 468         "    *d = *s;\n"
 469         "}\n"
 470         "inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n"
 471         "{\n"
 472         "    b3MprSupportCopy(s->ps + pos, a);\n"
 473         "}\n"
 474         "inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n"
 475         "{\n"
 476         "    b3MprSupport_t supp;\n"
 477         "    b3MprSupportCopy(&supp, &s->ps[pos1]);\n"
 478         "    b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n"
 479         "    b3MprSupportCopy(&s->ps[pos2], &supp);\n"
 480         "}\n"
 481         "inline int b3MprIsZero(float val)\n"
 482         "{\n"
 483         "    return B3_MPR_FABS(val) < FLT_EPSILON;\n"
 484         "}\n"
 485         "inline int b3MprEq(float _a, float _b)\n"
 486         "{\n"
 487         "    float ab;\n"
 488         "    float a, b;\n"
 489         "    ab = B3_MPR_FABS(_a - _b);\n"
 490         "    if (B3_MPR_FABS(ab) < FLT_EPSILON)\n"
 491         "        return 1;\n"
 492         "    a = B3_MPR_FABS(_a);\n"
 493         "    b = B3_MPR_FABS(_b);\n"
 494         "    if (b > a){\n"
 495         "        return ab < FLT_EPSILON * b;\n"
 496         "    }else{\n"
 497         "        return ab < FLT_EPSILON * a;\n"
 498         "    }\n"
 499         "}\n"
 500         "inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n"
 501         "{\n"
 502         "    return b3MprEq((*a).x, (*b).x)\n"
 503         "            && b3MprEq((*a).y, (*b).y)\n"
 504         "            && b3MprEq((*a).z, (*b).z);\n"
 505         "}\n"
 506         "inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull,     b3ConstArray(b3Float4) verticesA)\n"
 507         "{\n"
 508         "       b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n"
 509         "       float maxDot = -B3_LARGE_FLOAT;\n"
 510         "    if( 0 < hull->m_numVertices )\n"
 511         "    {\n"
 512         "        const b3Float4 scaled = supportVec;\n"
 513         "               int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n"
 514         "        return verticesA[hull->m_vertexOffset+index];\n"
 515         "    }\n"
 516         "    return supVec;\n"
 517         "}\n"
 518         "B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
 519         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
 520         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
 521         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
 522         "                                                                                                       __global b3Float4* sepAxis,\n"
 523         "                                                                                                               const b3Float4* _dir, b3Float4* outp, int logme)\n"
 524         "{\n"
 525         "       //dir is in worldspace, move to local space\n"
 526         "       \n"
 527         "       b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n"
 528         "       b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n"
 529         "       \n"
 530         "       b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n"
 531         "       \n"
 532         "       const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n"
 533         "       \n"
 534         "       //find local support vertex\n"
 535         "       int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n"
 536         "       \n"
 537         "       b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n"
 538         "       __global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n"
 539         "       \n"
 540         "       b3Float4 pInA;\n"
 541         "       if (logme)\n"
 542         "       {\n"
 543         "               b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n"
 544         "               float maxDot = -B3_LARGE_FLOAT;\n"
 545         "               if( 0 < hull->m_numVertices )\n"
 546         "               {\n"
 547         "                       const b3Float4 scaled = localDir;\n"
 548         "                       int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n"
 549         "                       pInA = cpuVertices[hull->m_vertexOffset+index];\n"
 550         "                       \n"
 551         "               }\n"
 552         "       } else\n"
 553         "       {\n"
 554         "               pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n"
 555         "       }\n"
 556         "       //move vertex to world space\n"
 557         "       *outp = b3TransformPoint(pInA,pos,orn);\n"
 558         "       \n"
 559         "}\n"
 560         "inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB,   b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
 561         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
 562         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
 563         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
 564         "                                                                                                       __global b3Float4* sepAxis,\n"
 565         "                                                                                                       const b3Float4* _dir, b3MprSupport_t *supp)\n"
 566         "{\n"
 567         "    b3Float4 dir;\n"
 568         "       dir = *_dir;\n"
 569         "       b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n"
 570         "    dir = *_dir*-1.f;\n"
 571         "       b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n"
 572         "    supp->v = supp->v1 - supp->v2;\n"
 573         "}\n"
 574         "inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n"
 575         "{\n"
 576         "    center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n"
 577         "       center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n"
 578         "    center->v = center->v1 - center->v2;\n"
 579         "}\n"
 580         "inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n"
 581         "{\n"
 582         "       (*v).x = x;\n"
 583         "       (*v).y = y;\n"
 584         "       (*v).z = z;\n"
 585         "       (*v).w = 0.f;\n"
 586         "}\n"
 587         "inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n"
 588         "{\n"
 589         "    (*v).x += (*w).x;\n"
 590         "    (*v).y += (*w).y;\n"
 591         "    (*v).z += (*w).z;\n"
 592         "}\n"
 593         "inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n"
 594         "{\n"
 595         "    *v = *w;\n"
 596         "}\n"
 597         "inline void b3MprVec3Scale(b3Float4 *d, float k)\n"
 598         "{\n"
 599         "    *d *= k;\n"
 600         "}\n"
 601         "inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n"
 602         "{\n"
 603         "    float dot;\n"
 604         "       dot = b3Dot3F4(*a,*b);\n"
 605         "    return dot;\n"
 606         "}\n"
 607         "inline float b3MprVec3Len2(const b3Float4 *v)\n"
 608         "{\n"
 609         "    return b3MprVec3Dot(v, v);\n"
 610         "}\n"
 611         "inline void b3MprVec3Normalize(b3Float4 *d)\n"
 612         "{\n"
 613         "    float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n"
 614         "    b3MprVec3Scale(d, k);\n"
 615         "}\n"
 616         "inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n"
 617         "{\n"
 618         "       *d = b3Cross3(*a,*b);\n"
 619         "       \n"
 620         "}\n"
 621         "inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n"
 622         "{\n"
 623         "       *d = *v - *w;\n"
 624         "}\n"
 625         "inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n"
 626         "{\n"
 627         "    b3Float4 v2v1, v3v1;\n"
 628         "    b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n"
 629         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
 630         "    b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n"
 631         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
 632         "    b3MprVec3Cross(dir, &v2v1, &v3v1);\n"
 633         "    b3MprVec3Normalize(dir);\n"
 634         "}\n"
 635         "inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n"
 636         "                                       const b3Float4 *dir)\n"
 637         "{\n"
 638         "    float dot;\n"
 639         "    dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n"
 640         "    return b3MprIsZero(dot) || dot > 0.f;\n"
 641         "}\n"
 642         "inline int portalReachTolerance(const b3MprSimplex_t *portal,\n"
 643         "                                     const b3MprSupport_t *v4,\n"
 644         "                                     const b3Float4 *dir)\n"
 645         "{\n"
 646         "    float dv1, dv2, dv3, dv4;\n"
 647         "    float dot1, dot2, dot3;\n"
 648         "    // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n"
 649         "    dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n"
 650         "    dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n"
 651         "    dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n"
 652         "    dv4 = b3MprVec3Dot(&v4->v, dir);\n"
 653         "    dot1 = dv4 - dv1;\n"
 654         "    dot2 = dv4 - dv2;\n"
 655         "    dot3 = dv4 - dv3;\n"
 656         "    dot1 = B3_MPR_FMIN(dot1, dot2);\n"
 657         "    dot1 = B3_MPR_FMIN(dot1, dot3);\n"
 658         "    return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n"
 659         "}\n"
 660         "inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal,   \n"
 661         "                                         const b3MprSupport_t *v4,\n"
 662         "                                         const b3Float4 *dir)\n"
 663         "{\n"
 664         "    float dot;\n"
 665         "    dot = b3MprVec3Dot(&v4->v, dir);\n"
 666         "    return b3MprIsZero(dot) || dot > 0.f;\n"
 667         "}\n"
 668         "inline void b3ExpandPortal(b3MprSimplex_t *portal,\n"
 669         "                              const b3MprSupport_t *v4)\n"
 670         "{\n"
 671         "    float dot;\n"
 672         "    b3Float4 v4v0;\n"
 673         "    b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n"
 674         "    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n"
 675         "    if (dot > 0.f){\n"
 676         "        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n"
 677         "        if (dot > 0.f){\n"
 678         "            b3MprSimplexSet(portal, 1, v4);\n"
 679         "        }else{\n"
 680         "            b3MprSimplexSet(portal, 3, v4);\n"
 681         "        }\n"
 682         "    }else{\n"
 683         "        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n"
 684         "        if (dot > 0.f){\n"
 685         "            b3MprSimplexSet(portal, 2, v4);\n"
 686         "        }else{\n"
 687         "            b3MprSimplexSet(portal, 1, v4);\n"
 688         "        }\n"
 689         "    }\n"
 690         "}\n"
 691         "B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
 692         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
 693         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
 694         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
 695         "                                                                                                       __global b3Float4* sepAxis,\n"
 696         "                                                                                                       __global int*   hasSepAxis,\n"
 697         "                                                                                                       b3MprSimplex_t *portal)\n"
 698         "{\n"
 699         "    b3Float4 dir, va, vb;\n"
 700         "    float dot;\n"
 701         "    int cont;\n"
 702         "       \n"
 703         "       \n"
 704         "    // vertex 0 is center of portal\n"
 705         "    b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n"
 706         "    // vertex 0 is center of portal\n"
 707         "    b3MprSimplexSetSize(portal, 1);\n"
 708         "       \n"
 709         "       b3Float4 zero = b3MakeFloat4(0,0,0,0);\n"
 710         "       b3Float4* b3mpr_vec3_origin = &zero;\n"
 711         "    if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n"
 712         "        // Portal's center lies on origin (0,0,0) => we know that objects\n"
 713         "        // intersect but we would need to know penetration info.\n"
 714         "        // So move center little bit...\n"
 715         "        b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n"
 716         "        b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n"
 717         "    }\n"
 718         "    // vertex 1 = support in direction of origin\n"
 719         "    b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n"
 720         "    b3MprVec3Scale(&dir, -1.f);\n"
 721         "    b3MprVec3Normalize(&dir);\n"
 722         "    b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n"
 723         "    b3MprSimplexSetSize(portal, 2);\n"
 724         "    // test if origin isn't outside of v1\n"
 725         "    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n"
 726         "       \n"
 727         "    if (b3MprIsZero(dot) || dot < 0.f)\n"
 728         "        return -1;\n"
 729         "    // vertex 2\n"
 730         "    b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n"
 731         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
 732         "    if (b3MprIsZero(b3MprVec3Len2(&dir))){\n"
 733         "        if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n"
 734         "            // origin lies on v1\n"
 735         "            return 1;\n"
 736         "        }else{\n"
 737         "            // origin lies on v0-v1 segment\n"
 738         "            return 2;\n"
 739         "        }\n"
 740         "    }\n"
 741         "    b3MprVec3Normalize(&dir);\n"
 742         "        b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n"
 743         "    \n"
 744         "    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n"
 745         "    if (b3MprIsZero(dot) || dot < 0.f)\n"
 746         "        return -1;\n"
 747         "    b3MprSimplexSetSize(portal, 3);\n"
 748         "    // vertex 3 direction\n"
 749         "    b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n"
 750         "                     &b3MprSimplexPoint(portal, 0)->v);\n"
 751         "    b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n"
 752         "                     &b3MprSimplexPoint(portal, 0)->v);\n"
 753         "    b3MprVec3Cross(&dir, &va, &vb);\n"
 754         "    b3MprVec3Normalize(&dir);\n"
 755         "    // it is better to form portal faces to be oriented \"outside\" origin\n"
 756         "    dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n"
 757         "    if (dot > 0.f){\n"
 758         "        b3MprSimplexSwap(portal, 1, 2);\n"
 759         "        b3MprVec3Scale(&dir, -1.f);\n"
 760         "    }\n"
 761         "    while (b3MprSimplexSize(portal) < 4){\n"
 762         "                b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n"
 763         "        \n"
 764         "        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n"
 765         "        if (b3MprIsZero(dot) || dot < 0.f)\n"
 766         "            return -1;\n"
 767         "        cont = 0;\n"
 768         "        // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n"
 769         "        // continue\n"
 770         "        b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n"
 771         "                          &b3MprSimplexPoint(portal, 3)->v);\n"
 772         "        dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n"
 773         "        if (dot < 0.f && !b3MprIsZero(dot)){\n"
 774         "            b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n"
 775         "            cont = 1;\n"
 776         "        }\n"
 777         "        if (!cont){\n"
 778         "            // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n"
 779         "            // continue\n"
 780         "            b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n"
 781         "                              &b3MprSimplexPoint(portal, 2)->v);\n"
 782         "            dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n"
 783         "            if (dot < 0.f && !b3MprIsZero(dot)){\n"
 784         "                b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n"
 785         "                cont = 1;\n"
 786         "            }\n"
 787         "        }\n"
 788         "        if (cont){\n"
 789         "            b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n"
 790         "                             &b3MprSimplexPoint(portal, 0)->v);\n"
 791         "            b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n"
 792         "                             &b3MprSimplexPoint(portal, 0)->v);\n"
 793         "            b3MprVec3Cross(&dir, &va, &vb);\n"
 794         "            b3MprVec3Normalize(&dir);\n"
 795         "        }else{\n"
 796         "            b3MprSimplexSetSize(portal, 4);\n"
 797         "        }\n"
 798         "    }\n"
 799         "    return 0;\n"
 800         "}\n"
 801         "B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
 802         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
 803         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
 804         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
 805         "                                                                                                       __global b3Float4* sepAxis,\n"
 806         "                                                                                                       b3MprSimplex_t *portal)\n"
 807         "{\n"
 808         "    b3Float4 dir;\n"
 809         "    b3MprSupport_t v4;\n"
 810         "       for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n"
 811         "    //while (1)\n"
 812         "       {\n"
 813         "        // compute direction outside the portal (from v0 throught v1,v2,v3\n"
 814         "        // face)\n"
 815         "        b3PortalDir(portal, &dir);\n"
 816         "        // test if origin is inside the portal\n"
 817         "        if (portalEncapsulesOrigin(portal, &dir))\n"
 818         "            return 0;\n"
 819         "        // get next support point\n"
 820         "        \n"
 821         "                b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n"
 822         "        // test if v4 can expand portal to contain origin and if portal\n"
 823         "        // expanding doesn't reach given tolerance\n"
 824         "        if (!portalCanEncapsuleOrigin(portal, &v4, &dir)\n"
 825         "                || portalReachTolerance(portal, &v4, &dir))\n"
 826         "               {\n"
 827         "            return -1;\n"
 828         "        }\n"
 829         "        // v1-v2-v3 triangle must be rearranged to face outside Minkowski\n"
 830         "        // difference (direction from v0).\n"
 831         "        b3ExpandPortal(portal, &v4);\n"
 832         "    }\n"
 833         "    return -1;\n"
 834         "}\n"
 835         "B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)\n"
 836         "{\n"
 837         "       b3Float4 zero = b3MakeFloat4(0,0,0,0);\n"
 838         "       b3Float4* b3mpr_vec3_origin = &zero;\n"
 839         "    b3Float4 dir;\n"
 840         "    size_t i;\n"
 841         "    float b[4], sum, inv;\n"
 842         "    b3Float4 vec, p1, p2;\n"
 843         "    b3PortalDir(portal, &dir);\n"
 844         "    // use barycentric coordinates of tetrahedron to find origin\n"
 845         "    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n"
 846         "                       &b3MprSimplexPoint(portal, 2)->v);\n"
 847         "    b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n"
 848         "    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n"
 849         "                       &b3MprSimplexPoint(portal, 2)->v);\n"
 850         "    b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n"
 851         "    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n"
 852         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
 853         "    b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n"
 854         "    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n"
 855         "                       &b3MprSimplexPoint(portal, 1)->v);\n"
 856         "    b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n"
 857         "       sum = b[0] + b[1] + b[2] + b[3];\n"
 858         "    if (b3MprIsZero(sum) || sum < 0.f){\n"
 859         "               b[0] = 0.f;\n"
 860         "        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n"
 861         "                           &b3MprSimplexPoint(portal, 3)->v);\n"
 862         "        b[1] = b3MprVec3Dot(&vec, &dir);\n"
 863         "        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n"
 864         "                           &b3MprSimplexPoint(portal, 1)->v);\n"
 865         "        b[2] = b3MprVec3Dot(&vec, &dir);\n"
 866         "        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n"
 867         "                           &b3MprSimplexPoint(portal, 2)->v);\n"
 868         "        b[3] = b3MprVec3Dot(&vec, &dir);\n"
 869         "               sum = b[1] + b[2] + b[3];\n"
 870         "       }\n"
 871         "       inv = 1.f / sum;\n"
 872         "    b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n"
 873         "    b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n"
 874         "    for (i = 0; i < 4; i++){\n"
 875         "        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n"
 876         "        b3MprVec3Scale(&vec, b[i]);\n"
 877         "        b3MprVec3Add(&p1, &vec);\n"
 878         "        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n"
 879         "        b3MprVec3Scale(&vec, b[i]);\n"
 880         "        b3MprVec3Add(&p2, &vec);\n"
 881         "    }\n"
 882         "    b3MprVec3Scale(&p1, inv);\n"
 883         "    b3MprVec3Scale(&p2, inv);\n"
 884         "    b3MprVec3Copy(pos, &p1);\n"
 885         "    b3MprVec3Add(pos, &p2);\n"
 886         "    b3MprVec3Scale(pos, 0.5);\n"
 887         "}\n"
 888         "inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n"
 889         "{\n"
 890         "    b3Float4 ab;\n"
 891         "    b3MprVec3Sub2(&ab, a, b);\n"
 892         "    return b3MprVec3Len2(&ab);\n"
 893         "}\n"
 894         "inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n"
 895         "                                                  const b3Float4 *x0,\n"
 896         "                                                  const b3Float4 *b,\n"
 897         "                                                  b3Float4 *witness)\n"
 898         "{\n"
 899         "    // The computation comes from solving equation of segment:\n"
 900         "    //      S(t) = x0 + t.d\n"
 901         "    //          where - x0 is initial point of segment\n"
 902         "    //                - d is direction of segment from x0 (|d| > 0)\n"
 903         "    //                - t belongs to <0, 1> interval\n"
 904         "    // \n"
 905         "    // Than, distance from a segment to some point P can be expressed:\n"
 906         "    //      D(t) = |x0 + t.d - P|^2\n"
 907         "    //          which is distance from any point on segment. Minimization\n"
 908         "    //          of this function brings distance from P to segment.\n"
 909         "    // Minimization of D(t) leads to simple quadratic equation that's\n"
 910         "    // solving is straightforward.\n"
 911         "    //\n"
 912         "    // Bonus of this method is witness point for free.\n"
 913         "    float dist, t;\n"
 914         "    b3Float4 d, a;\n"
 915         "    // direction of segment\n"
 916         "    b3MprVec3Sub2(&d, b, x0);\n"
 917         "    // precompute vector from P to x0\n"
 918         "    b3MprVec3Sub2(&a, x0, P);\n"
 919         "    t  = -1.f * b3MprVec3Dot(&a, &d);\n"
 920         "    t /= b3MprVec3Len2(&d);\n"
 921         "    if (t < 0.f || b3MprIsZero(t)){\n"
 922         "        dist = b3MprVec3Dist2(x0, P);\n"
 923         "        if (witness)\n"
 924         "            b3MprVec3Copy(witness, x0);\n"
 925         "    }else if (t > 1.f || b3MprEq(t, 1.f)){\n"
 926         "        dist = b3MprVec3Dist2(b, P);\n"
 927         "        if (witness)\n"
 928         "            b3MprVec3Copy(witness, b);\n"
 929         "    }else{\n"
 930         "        if (witness){\n"
 931         "            b3MprVec3Copy(witness, &d);\n"
 932         "            b3MprVec3Scale(witness, t);\n"
 933         "            b3MprVec3Add(witness, x0);\n"
 934         "            dist = b3MprVec3Dist2(witness, P);\n"
 935         "        }else{\n"
 936         "            // recycling variables\n"
 937         "            b3MprVec3Scale(&d, t);\n"
 938         "            b3MprVec3Add(&d, &a);\n"
 939         "            dist = b3MprVec3Len2(&d);\n"
 940         "        }\n"
 941         "    }\n"
 942         "    return dist;\n"
 943         "}\n"
 944         "inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n"
 945         "                                const b3Float4 *x0, const b3Float4 *B,\n"
 946         "                                const b3Float4 *C,\n"
 947         "                                b3Float4 *witness)\n"
 948         "{\n"
 949         "    // Computation comes from analytic expression for triangle (x0, B, C)\n"
 950         "    //      T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n"
 951         "    // Then equation for distance is:\n"
 952         "    //      D(s, t) = | T(s, t) - P |^2\n"
 953         "    // This leads to minimization of quadratic function of two variables.\n"
 954         "    // The solution from is taken only if s is between 0 and 1, t is\n"
 955         "    // between 0 and 1 and t + s < 1, otherwise distance from segment is\n"
 956         "    // computed.\n"
 957         "    b3Float4 d1, d2, a;\n"
 958         "    float u, v, w, p, q, r;\n"
 959         "    float s, t, dist, dist2;\n"
 960         "    b3Float4 witness2;\n"
 961         "    b3MprVec3Sub2(&d1, B, x0);\n"
 962         "    b3MprVec3Sub2(&d2, C, x0);\n"
 963         "    b3MprVec3Sub2(&a, x0, P);\n"
 964         "    u = b3MprVec3Dot(&a, &a);\n"
 965         "    v = b3MprVec3Dot(&d1, &d1);\n"
 966         "    w = b3MprVec3Dot(&d2, &d2);\n"
 967         "    p = b3MprVec3Dot(&a, &d1);\n"
 968         "    q = b3MprVec3Dot(&a, &d2);\n"
 969         "    r = b3MprVec3Dot(&d1, &d2);\n"
 970         "    s = (q * r - w * p) / (w * v - r * r);\n"
 971         "    t = (-s * r - q) / w;\n"
 972         "    if ((b3MprIsZero(s) || s > 0.f)\n"
 973         "            && (b3MprEq(s, 1.f) || s < 1.f)\n"
 974         "            && (b3MprIsZero(t) || t > 0.f)\n"
 975         "            && (b3MprEq(t, 1.f) || t < 1.f)\n"
 976         "            && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n"
 977         "        if (witness){\n"
 978         "            b3MprVec3Scale(&d1, s);\n"
 979         "            b3MprVec3Scale(&d2, t);\n"
 980         "            b3MprVec3Copy(witness, x0);\n"
 981         "            b3MprVec3Add(witness, &d1);\n"
 982         "            b3MprVec3Add(witness, &d2);\n"
 983         "            dist = b3MprVec3Dist2(witness, P);\n"
 984         "        }else{\n"
 985         "            dist  = s * s * v;\n"
 986         "            dist += t * t * w;\n"
 987         "            dist += 2.f * s * t * r;\n"
 988         "            dist += 2.f * s * p;\n"
 989         "            dist += 2.f * t * q;\n"
 990         "            dist += u;\n"
 991         "        }\n"
 992         "    }else{\n"
 993         "        dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n"
 994         "        dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n"
 995         "        if (dist2 < dist){\n"
 996         "            dist = dist2;\n"
 997         "            if (witness)\n"
 998         "                b3MprVec3Copy(witness, &witness2);\n"
 999         "        }\n"
1000         "        dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n"
1001         "        if (dist2 < dist){\n"
1002         "            dist = dist2;\n"
1003         "            if (witness)\n"
1004         "                b3MprVec3Copy(witness, &witness2);\n"
1005         "        }\n"
1006         "    }\n"
1007         "    return dist;\n"
1008         "}\n"
1009         "B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n"
1010         "                                                                                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
1011         "                                                                                                       b3ConstArray(b3Collidable_t)                            cpuCollidables,\n"
1012         "                                                                                                       b3ConstArray(b3Float4)                                  cpuVertices,\n"
1013         "                                                                                                       __global b3Float4* sepAxis,\n"
1014         "                       b3MprSimplex_t *portal,\n"
1015         "                       float *depth, b3Float4 *pdir, b3Float4 *pos)\n"
1016         "{\n"
1017         "    b3Float4 dir;\n"
1018         "    b3MprSupport_t v4;\n"
1019         "    unsigned long iterations;\n"
1020         "       b3Float4 zero = b3MakeFloat4(0,0,0,0);\n"
1021         "       b3Float4* b3mpr_vec3_origin = &zero;\n"
1022         "    iterations = 1UL;\n"
1023         "       for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n"
1024         "    //while (1)\n"
1025         "       {\n"
1026         "        // compute portal direction and obtain next support point\n"
1027         "        b3PortalDir(portal, &dir);\n"
1028         "        \n"
1029         "                b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n"
1030         "        // reached tolerance -> find penetration info\n"
1031         "        if (portalReachTolerance(portal, &v4, &dir)\n"
1032         "                || iterations ==B3_MPR_MAX_ITERATIONS)\n"
1033         "               {\n"
1034         "            *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n"
1035         "            *depth = B3_MPR_SQRT(*depth);\n"
1036         "                       \n"
1037         "                       if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n"
1038         "                       {\n"
1039         "                               \n"
1040         "                               *pdir = dir;\n"
1041         "                       } \n"
1042         "                       b3MprVec3Normalize(pdir);\n"
1043         "                       \n"
1044         "            // barycentric coordinates:\n"
1045         "            b3FindPos(portal, pos);\n"
1046         "            return;\n"
1047         "        }\n"
1048         "        b3ExpandPortal(portal, &v4);\n"
1049         "        iterations++;\n"
1050         "    }\n"
1051         "}\n"
1052         "B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n"
1053         "{\n"
1054         "    // Touching contact on portal's v1 - so depth is zero and direction\n"
1055         "    // is unimportant and pos can be guessed\n"
1056         "    *depth = 0.f;\n"
1057         "    b3Float4 zero = b3MakeFloat4(0,0,0,0);\n"
1058         "       b3Float4* b3mpr_vec3_origin = &zero;\n"
1059         "       b3MprVec3Copy(dir, b3mpr_vec3_origin);\n"
1060         "    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n"
1061         "    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n"
1062         "    b3MprVec3Scale(pos, 0.5);\n"
1063         "}\n"
1064         "B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n"
1065         "                              float *depth, b3Float4 *dir, b3Float4 *pos)\n"
1066         "{\n"
1067         "    \n"
1068         "    // Origin lies on v0-v1 segment.\n"
1069         "    // Depth is distance to v1, direction also and position must be\n"
1070         "    // computed\n"
1071         "    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n"
1072         "    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n"
1073         "    b3MprVec3Scale(pos, 0.5f);\n"
1074         "    \n"
1075         "    b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n"
1076         "    *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n"
1077         "    b3MprVec3Normalize(dir);\n"
1078         "}\n"
1079         "inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n"
1080         "                                       b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n"
1081         "                                       b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n"
1082         "                                       b3ConstArray(b3Collidable_t)    cpuCollidables,\n"
1083         "                                       b3ConstArray(b3Float4)  cpuVertices,\n"
1084         "                                       __global b3Float4* sepAxis,\n"
1085         "                                       __global int*   hasSepAxis,\n"
1086         "                                       float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n"
1087         "{\n"
1088         "       \n"
1089         "        b3MprSimplex_t portal;\n"
1090         "        \n"
1091         "//     if (!hasSepAxis[pairIndex])\n"
1092         "       //      return -1;\n"
1093         "       \n"
1094         "       hasSepAxis[pairIndex] = 0;\n"
1095         "        int res;\n"
1096         "    // Phase 1: Portal discovery\n"
1097         "    res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n"
1098         "       \n"
1099         "         \n"
1100         "       //sepAxis[pairIndex] = *pdir;//or -dir?\n"
1101         "       switch (res)\n"
1102         "       {\n"
1103         "       case 0:\n"
1104         "               {\n"
1105         "                       // Phase 2: Portal refinement\n"
1106         "               \n"
1107         "                       res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n"
1108         "                       if (res < 0)\n"
1109         "                               return -1;\n"
1110         "                       // Phase 3. Penetration info\n"
1111         "                       b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n"
1112         "                       hasSepAxis[pairIndex] = 1;\n"
1113         "                       sepAxis[pairIndex] = -*dirOut;\n"
1114         "                       break;\n"
1115         "               }\n"
1116         "       case 1:\n"
1117         "               {\n"
1118         "                        // Touching contact on portal's v1.\n"
1119         "                       b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n"
1120         "                       break;\n"
1121         "               }\n"
1122         "       case 2:\n"
1123         "               {\n"
1124         "                       \n"
1125         "                       b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n"
1126         "                       break;\n"
1127         "               }\n"
1128         "       default:\n"
1129         "               {\n"
1130         "                       hasSepAxis[pairIndex]=0;\n"
1131         "                       //if (res < 0)\n"
1132         "                       //{\n"
1133         "                               // Origin isn't inside portal - no collision.\n"
1134         "                               return -1;\n"
1135         "                       //}\n"
1136         "               }\n"
1137         "       };\n"
1138         "       \n"
1139         "       return 0;\n"
1140         "};\n"
1141         "#endif //B3_MPR_PENETRATION_H\n"
1142         "#ifndef B3_CONTACT4DATA_H\n"
1143         "#define B3_CONTACT4DATA_H\n"
1144         "#ifndef B3_FLOAT4_H\n"
1145         "#ifdef __cplusplus\n"
1146         "#else\n"
1147         "#endif \n"
1148         "#endif //B3_FLOAT4_H\n"
1149         "typedef  struct b3Contact4Data b3Contact4Data_t;\n"
1150         "struct b3Contact4Data\n"
1151         "{\n"
1152         "       b3Float4        m_worldPosB[4];\n"
1153         "//     b3Float4        m_localPosA[4];\n"
1154         "//     b3Float4        m_localPosB[4];\n"
1155         "       b3Float4        m_worldNormalOnB;       //      w: m_nPoints\n"
1156         "       unsigned short  m_restituitionCoeffCmp;\n"
1157         "       unsigned short  m_frictionCoeffCmp;\n"
1158         "       int m_batchIdx;\n"
1159         "       int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n"
1160         "       int m_bodyBPtrAndSignBit;\n"
1161         "       int     m_childIndexA;\n"
1162         "       int     m_childIndexB;\n"
1163         "       int m_unused1;\n"
1164         "       int m_unused2;\n"
1165         "};\n"
1166         "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
1167         "{\n"
1168         "       return (int)contact->m_worldNormalOnB.w;\n"
1169         "};\n"
1170         "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n"
1171         "{\n"
1172         "       contact->m_worldNormalOnB.w = (float)numPoints;\n"
1173         "};\n"
1174         "#endif //B3_CONTACT4DATA_H\n"
1175         "#define AppendInc(x, out) out = atomic_inc(x)\n"
1176         "#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n"
1177         "#ifdef cl_ext_atomic_counters_32\n"
1178         "       #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
1179         "#else\n"
1180         "       #define counter32_t volatile __global int*\n"
1181         "#endif\n"
1182         "__kernel void   mprPenetrationKernel( __global int4* pairs,\n"
1183         "                                                                                                                                                                       __global const b3RigidBodyData_t* rigidBodies, \n"
1184         "                                                                                                                                                                       __global const b3Collidable_t* collidables,\n"
1185         "                                                                                                                                                                       __global const b3ConvexPolyhedronData_t* convexShapes, \n"
1186         "                                                                                                                                                                       __global const float4* vertices,\n"
1187         "                                                                                                                                                                       __global float4* separatingNormals,\n"
1188         "                                                                                                                                                                       __global int* hasSeparatingAxis,\n"
1189         "                                                                                                                                                                       __global struct b3Contact4Data* restrict globalContactsOut,\n"
1190         "                                                                                                                                                                       counter32_t nGlobalContactsOut,\n"
1191         "                                                                                                                                                                       int contactCapacity,\n"
1192         "                                                                                                                                                                       int numPairs)\n"
1193         "{\n"
1194         "       int i = get_global_id(0);\n"
1195         "       int pairIndex = i;\n"
1196         "       if (i<numPairs)\n"
1197         "       {\n"
1198         "               int bodyIndexA = pairs[i].x;\n"
1199         "               int bodyIndexB = pairs[i].y;\n"
1200         "               int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
1201         "               int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
1202         "       \n"
1203         "               int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
1204         "               int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
1205         "               \n"
1206         "               \n"
1207         "               //once the broadphase avoids static-static pairs, we can remove this test\n"
1208         "               if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n"
1209         "               {\n"
1210         "                       return;\n"
1211         "               }\n"
1212         "               \n"
1213         "               if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n"
1214         "               {\n"
1215         "                       return;\n"
1216         "               }\n"
1217         "               float depthOut;\n"
1218         "               b3Float4 dirOut;\n"
1219         "               b3Float4 posOut;\n"
1220         "               int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut);\n"
1221         "               \n"
1222         "               \n"
1223         "               \n"
1224         "               \n"
1225         "               if (res==0)\n"
1226         "               {\n"
1227         "                       //add a contact\n"
1228         "                       int dstIdx;\n"
1229         "                       AppendInc( nGlobalContactsOut, dstIdx );\n"
1230         "                       if (dstIdx<contactCapacity)\n"
1231         "                       {\n"
1232         "                               pairs[pairIndex].z = dstIdx;\n"
1233         "                               __global struct b3Contact4Data* c = globalContactsOut + dstIdx;\n"
1234         "                               c->m_worldNormalOnB = -dirOut;//normal;\n"
1235         "                               c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n"
1236         "                               c->m_batchIdx = pairIndex;\n"
1237         "                               int bodyA = pairs[pairIndex].x;\n"
1238         "                               int bodyB = pairs[pairIndex].y;\n"
1239         "                               c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n"
1240         "                               c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n"
1241         "                               c->m_childIndexA = -1;\n"
1242         "                               c->m_childIndexB = -1;\n"
1243         "                               //for (int i=0;i<nContacts;i++)\n"
1244         "                               posOut.w = -depthOut;\n"
1245         "                               c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n"
1246         "                               GET_NPOINTS(*c) = 1;//nContacts;\n"
1247         "                       }\n"
1248         "               }\n"
1249         "       }\n"
1250         "}\n"
1251         "typedef float4 Quaternion;\n"
1252         "#define make_float4 (float4)\n"
1253         "__inline\n"
1254         "float dot3F4(float4 a, float4 b)\n"
1255         "{\n"
1256         "       float4 a1 = make_float4(a.xyz,0.f);\n"
1257         "       float4 b1 = make_float4(b.xyz,0.f);\n"
1258         "       return dot(a1, b1);\n"
1259         "}\n"
1260         "__inline\n"
1261         "float4 cross3(float4 a, float4 b)\n"
1262         "{\n"
1263         "       return cross(a,b);\n"
1264         "}\n"
1265         "__inline\n"
1266         "Quaternion qtMul(Quaternion a, Quaternion b)\n"
1267         "{\n"
1268         "       Quaternion ans;\n"
1269         "       ans = cross3( a, b );\n"
1270         "       ans += a.w*b+b.w*a;\n"
1271         "//     ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
1272         "       ans.w = a.w*b.w - dot3F4(a, b);\n"
1273         "       return ans;\n"
1274         "}\n"
1275         "__inline\n"
1276         "Quaternion qtInvert(Quaternion q)\n"
1277         "{\n"
1278         "       return (Quaternion)(-q.xyz, q.w);\n"
1279         "}\n"
1280         "__inline\n"
1281         "float4 qtRotate(Quaternion q, float4 vec)\n"
1282         "{\n"
1283         "       Quaternion qInv = qtInvert( q );\n"
1284         "       float4 vcpy = vec;\n"
1285         "       vcpy.w = 0.f;\n"
1286         "       float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
1287         "       return out;\n"
1288         "}\n"
1289         "__inline\n"
1290         "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
1291         "{\n"
1292         "       return qtRotate( *orientation, *p ) + (*translation);\n"
1293         "}\n"
1294         "__inline\n"
1295         "float4 qtInvRotate(const Quaternion q, float4 vec)\n"
1296         "{\n"
1297         "       return qtRotate( qtInvert( q ), vec );\n"
1298         "}\n"
1299         "inline void project(__global const b3ConvexPolyhedronData_t* hull,  const float4 pos, const float4 orn, \n"
1300         "const float4* dir, __global const float4* vertices, float* min, float* max)\n"
1301         "{\n"
1302         "       min[0] = FLT_MAX;\n"
1303         "       max[0] = -FLT_MAX;\n"
1304         "       int numVerts = hull->m_numVertices;\n"
1305         "       const float4 localDir = qtInvRotate(orn,*dir);\n"
1306         "       float offset = dot(pos,*dir);\n"
1307         "       for(int i=0;i<numVerts;i++)\n"
1308         "       {\n"
1309         "               float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n"
1310         "               if(dp < min[0]) \n"
1311         "                       min[0] = dp;\n"
1312         "               if(dp > max[0]) \n"
1313         "                       max[0] = dp;\n"
1314         "       }\n"
1315         "       if(min[0]>max[0])\n"
1316         "       {\n"
1317         "               float tmp = min[0];\n"
1318         "               min[0] = max[0];\n"
1319         "               max[0] = tmp;\n"
1320         "       }\n"
1321         "       min[0] += offset;\n"
1322         "       max[0] += offset;\n"
1323         "}\n"
1324         "bool findSeparatingAxisUnitSphere(     __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n"
1325         "       const float4 posA1,\n"
1326         "       const float4 ornA,\n"
1327         "       const float4 posB1,\n"
1328         "       const float4 ornB,\n"
1329         "       const float4 DeltaC2,\n"
1330         "       __global const float4* vertices,\n"
1331         "       __global const float4* unitSphereDirections,\n"
1332         "       int numUnitSphereDirections,\n"
1333         "       float4* sep,\n"
1334         "       float* dmin)\n"
1335         "{\n"
1336         "       \n"
1337         "       float4 posA = posA1;\n"
1338         "       posA.w = 0.f;\n"
1339         "       float4 posB = posB1;\n"
1340         "       posB.w = 0.f;\n"
1341         "       int curPlaneTests=0;\n"
1342         "       int curEdgeEdge = 0;\n"
1343         "       // Test unit sphere directions\n"
1344         "       for (int i=0;i<numUnitSphereDirections;i++)\n"
1345         "       {\n"
1346         "               float4 crossje;\n"
1347         "               crossje = unitSphereDirections[i];      \n"
1348         "               if (dot3F4(DeltaC2,crossje)>0)\n"
1349         "                       crossje *= -1.f;\n"
1350         "               {\n"
1351         "                       float dist;\n"
1352         "                       bool result = true;\n"
1353         "                       float Min0,Max0;\n"
1354         "                       float Min1,Max1;\n"
1355         "                       project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n"
1356         "                       project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n"
1357         "               \n"
1358         "                       if(Max0<Min1 || Max1<Min0)\n"
1359         "                               return false;\n"
1360         "               \n"
1361         "                       float d0 = Max0 - Min1;\n"
1362         "                       float d1 = Max1 - Min0;\n"
1363         "                       dist = d0<d1 ? d0:d1;\n"
1364         "                       result = true;\n"
1365         "       \n"
1366         "                       if(dist<*dmin)\n"
1367         "                       {\n"
1368         "                               *dmin = dist;\n"
1369         "                               *sep = crossje;\n"
1370         "                       }\n"
1371         "               }\n"
1372         "       }\n"
1373         "       \n"
1374         "       if((dot3F4(-DeltaC2,*sep))>0.0f)\n"
1375         "       {\n"
1376         "               *sep = -(*sep);\n"
1377         "       }\n"
1378         "       return true;\n"
1379         "}\n"
1380         "__kernel void   findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n"
1381         "                                                                                                                                                                       __global const b3RigidBodyData_t* rigidBodies, \n"
1382         "                                                                                                                                                                       __global const b3Collidable_t* collidables,\n"
1383         "                                                                                                                                                                       __global const b3ConvexPolyhedronData_t* convexShapes, \n"
1384         "                                                                                                                                                                       __global const float4* vertices,\n"
1385         "                                                                                                                                                                       __global const float4* unitSphereDirections,\n"
1386         "                                                                                                                                                                       __global  float4* separatingNormals,\n"
1387         "                                                                                                                                                                       __global  int* hasSeparatingAxis,\n"
1388         "                                                                                                                                                                       __global  float* dmins,\n"
1389         "                                                                                                                                                                       int numUnitSphereDirections,\n"
1390         "                                                                                                                                                                       int numPairs\n"
1391         "                                                                                                                                                                       )\n"
1392         "{\n"
1393         "       int i = get_global_id(0);\n"
1394         "       \n"
1395         "       if (i<numPairs)\n"
1396         "       {\n"
1397         "               if (hasSeparatingAxis[i])\n"
1398         "               {\n"
1399         "       \n"
1400         "                       int bodyIndexA = pairs[i].x;\n"
1401         "                       int bodyIndexB = pairs[i].y;\n"
1402         "       \n"
1403         "                       int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n"
1404         "                       int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n"
1405         "               \n"
1406         "                       int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n"
1407         "                       int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n"
1408         "                       \n"
1409         "                       \n"
1410         "                       int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n"
1411         "       \n"
1412         "                       float dmin = dmins[i];\n"
1413         "       \n"
1414         "                       float4 posA = rigidBodies[bodyIndexA].m_pos;\n"
1415         "                       posA.w = 0.f;\n"
1416         "                       float4 posB = rigidBodies[bodyIndexB].m_pos;\n"
1417         "                       posB.w = 0.f;\n"
1418         "                       float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n"
1419         "                       float4 ornA = rigidBodies[bodyIndexA].m_quat;\n"
1420         "                       float4 c0 = transform(&c0local, &posA, &ornA);\n"
1421         "                       float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n"
1422         "                       float4 ornB =rigidBodies[bodyIndexB].m_quat;\n"
1423         "                       float4 c1 = transform(&c1local,&posB,&ornB);\n"
1424         "                       const float4 DeltaC2 = c0 - c1;\n"
1425         "                       float4 sepNormal = separatingNormals[i];\n"
1426         "                       \n"
1427         "                       int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n"
1428         "                       if (numEdgeEdgeDirections>numUnitSphereDirections)\n"
1429         "                       {\n"
1430         "                               bool sepEE = findSeparatingAxisUnitSphere(      &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n"
1431         "                                                                                                                                                                                                               posB,ornB,\n"
1432         "                                                                                                                                                                                                               DeltaC2,\n"
1433         "                                                                                                                                                                                                               vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n"
1434         "                               if (!sepEE)\n"
1435         "                               {\n"
1436         "                                       hasSeparatingAxis[i] = 0;\n"
1437         "                               } else\n"
1438         "                               {\n"
1439         "                                       hasSeparatingAxis[i] = 1;\n"
1440         "                                       separatingNormals[i] = sepNormal;\n"
1441         "                               }\n"
1442         "                       }\n"
1443         "               }               //if (hasSeparatingAxis[i])\n"
1444         "       }//(i<numPairs)\n"
1445         "}\n";