[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / NarrowphaseCollision / b3ConvexHullContact.cpp
1 /*
2 Bullet Continuous Collision Detection and Physics Library
3 Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
4
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose, 
8 including commercial applications, and to alter it and redistribute it freely, 
9 subject to the following restrictions:
10
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
14 */
15
16 bool findSeparatingAxisOnGpu = true;
17 bool splitSearchSepAxisConcave = false;
18 bool splitSearchSepAxisConvex = true;
19 bool useMprGpu = true;  //use mpr for edge-edge  (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling...
20 bool bvhTraversalKernelGPU = true;
21 bool findConcaveSeparatingAxisKernelGPU = true;
22 bool clipConcaveFacesAndFindContactsCPU = false;  //false;//true;
23 bool clipConvexFacesAndFindContactsCPU = false;   //false;//true;
24 bool reduceConcaveContactsOnGPU = true;           //false;
25 bool reduceConvexContactsOnGPU = true;            //false;
26 bool findConvexClippingFacesGPU = true;
27 bool useGjk = false;          ///option for CPU/host testing, when findSeparatingAxisOnGpu = false
28 bool useGjkContacts = false;  //////option for CPU/host testing when findSeparatingAxisOnGpu = false
29
30 static int myframecount = 0;  ///for testing
31
32 ///This file was written by Erwin Coumans
33 ///Separating axis rest based on work from Pierre Terdiman, see
34 ///And contact clipping based on work from Simon Hobbs
35
36 //#define B3_DEBUG_SAT_FACE
37
38 //#define CHECK_ON_HOST
39
40 #ifdef CHECK_ON_HOST
41 //#define PERSISTENT_CONTACTS_HOST
42 #endif
43
44 int b3g_actualSATPairTests = 0;
45
46 #include "b3ConvexHullContact.h"
47 #include <string.h>  //memcpy
48 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
49 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h"
50
51 #include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h"
52 #include "Bullet3Geometry/b3AabbUtil.h"
53
54 typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;
55
56 #include <float.h>  //for FLT_MAX
57 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
58 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
59 //#include "AdlQuaternion.h"
60
61 #include "kernels/satKernels.h"
62 #include "kernels/mprKernels.h"
63
64 #include "kernels/satConcaveKernels.h"
65
66 #include "kernels/satClipHullContacts.h"
67 #include "kernels/bvhTraversal.h"
68 #include "kernels/primitiveContacts.h"
69
70 #include "Bullet3Geometry/b3AabbUtil.h"
71
72 #define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl"
73 #define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl"
74
75 #define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl"
76
77 #define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl"
78 #define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl"
79 #define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl"
80
81 #ifndef __global
82 #define __global
83 #endif
84
85 #ifndef __kernel
86 #define __kernel
87 #endif
88
89 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h"
90 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h"
91 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h"
92 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h"
93
94 #define dot3F4 b3Dot
95
96 GpuSatCollision::GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q)
97         : m_context(ctx),
98           m_device(device),
99           m_queue(q),
100
101           m_findSeparatingAxisKernel(0),
102           m_findSeparatingAxisVertexFaceKernel(0),
103           m_findSeparatingAxisEdgeEdgeKernel(0),
104           m_unitSphereDirections(m_context, m_queue),
105
106           m_totalContactsOut(m_context, m_queue),
107           m_sepNormals(m_context, m_queue),
108           m_dmins(m_context, m_queue),
109
110           m_hasSeparatingNormals(m_context, m_queue),
111           m_concaveSepNormals(m_context, m_queue),
112           m_concaveHasSeparatingNormals(m_context, m_queue),
113           m_numConcavePairsOut(m_context, m_queue),
114
115           m_gpuCompoundPairs(m_context, m_queue),
116
117           m_gpuCompoundSepNormals(m_context, m_queue),
118           m_gpuHasCompoundSepNormals(m_context, m_queue),
119
120           m_numCompoundPairsOut(m_context, m_queue)
121 {
122         m_totalContactsOut.push_back(0);
123
124         cl_int errNum = 0;
125
126         if (1)
127         {
128                 const char* mprSrc = mprKernelsCL;
129
130                 const char* srcConcave = satConcaveKernelsCL;
131                 char flags[1024] = {0};
132                 //#ifdef CL_PLATFORM_INTEL
133                 //              sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl");
134                 //#endif
135                 m_mprPenetrationKernel = 0;
136                 m_findSeparatingAxisUnitSphereKernel = 0;
137
138                 if (useMprGpu)
139                 {
140                         cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, mprSrc, &errNum, flags, BT_NARROWPHASE_MPR_PATH);
141                         b3Assert(errNum == CL_SUCCESS);
142
143                         m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "mprPenetrationKernel", &errNum, mprProg);
144                         b3Assert(m_mprPenetrationKernel);
145                         b3Assert(errNum == CL_SUCCESS);
146
147                         m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "findSeparatingAxisUnitSphereKernel", &errNum, mprProg);
148                         b3Assert(m_findSeparatingAxisUnitSphereKernel);
149                         b3Assert(errNum == CL_SUCCESS);
150
151                         int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
152                         m_unitSphereDirections.resize(numDirections);
153                         m_unitSphereDirections.copyFromHostPointer(unitSphere162, numDirections, 0, true);
154                 }
155
156                 cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, satKernelsCL, &errNum, flags, BT_NARROWPHASE_SAT_PATH);
157                 b3Assert(errNum == CL_SUCCESS);
158
159                 cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcConcave, &errNum, flags, BT_NARROWPHASE_SAT_CONCAVE_PATH);
160                 b3Assert(errNum == CL_SUCCESS);
161
162                 m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisKernel", &errNum, satProg);
163                 b3Assert(m_findSeparatingAxisKernel);
164                 b3Assert(errNum == CL_SUCCESS);
165
166                 m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisVertexFaceKernel", &errNum, satProg);
167                 b3Assert(m_findSeparatingAxisVertexFaceKernel);
168
169                 m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisEdgeEdgeKernel", &errNum, satProg);
170                 b3Assert(m_findSeparatingAxisVertexFaceKernel);
171
172                 m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findConcaveSeparatingAxisKernel", &errNum, satProg);
173                 b3Assert(m_findConcaveSeparatingAxisKernel);
174                 b3Assert(errNum == CL_SUCCESS);
175
176                 m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisVertexFaceKernel", &errNum, satConcaveProg);
177                 b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel);
178                 b3Assert(errNum == CL_SUCCESS);
179
180                 m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel", &errNum, satConcaveProg);
181                 b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel);
182                 b3Assert(errNum == CL_SUCCESS);
183
184                 m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findCompoundPairsKernel", &errNum, satProg);
185                 b3Assert(m_findCompoundPairsKernel);
186                 b3Assert(errNum == CL_SUCCESS);
187                 m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "processCompoundPairsKernel", &errNum, satProg);
188                 b3Assert(m_processCompoundPairsKernel);
189                 b3Assert(errNum == CL_SUCCESS);
190         }
191
192         if (1)
193         {
194                 const char* srcClip = satClipKernelsCL;
195
196                 char flags[1024] = {0};
197                 //#ifdef CL_PLATFORM_INTEL
198                 //              sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
199                 //#endif
200
201                 cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcClip, &errNum, flags, BT_NARROWPHASE_CLIPHULL_PATH);
202                 b3Assert(errNum == CL_SUCCESS);
203
204                 m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullKernel", &errNum, satClipContactsProg);
205                 b3Assert(errNum == CL_SUCCESS);
206
207                 m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipCompoundsHullHullKernel", &errNum, satClipContactsProg);
208                 b3Assert(errNum == CL_SUCCESS);
209
210                 m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "findClippingFacesKernel", &errNum, satClipContactsProg);
211                 b3Assert(errNum == CL_SUCCESS);
212
213                 m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipFacesAndFindContactsKernel", &errNum, satClipContactsProg);
214                 b3Assert(errNum == CL_SUCCESS);
215
216                 m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullConcaveConvexKernel", &errNum, satClipContactsProg);
217                 b3Assert(errNum == CL_SUCCESS);
218
219                 //              m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
220                 //      b3Assert(errNum==CL_SUCCESS);
221
222                 m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip,
223                                                                                                                                                            "newContactReductionKernel", &errNum, satClipContactsProg);
224                 b3Assert(errNum == CL_SUCCESS);
225         }
226         else
227         {
228                 m_clipHullHullKernel = 0;
229                 m_clipCompoundsHullHullKernel = 0;
230                 m_findClippingFacesKernel = 0;
231                 m_newContactReductionKernel = 0;
232                 m_clipFacesAndFindContacts = 0;
233                 m_clipHullHullConcaveConvexKernel = 0;
234                 //              m_extractManifoldAndAddContactKernel = 0;
235         }
236
237         if (1)
238         {
239                 const char* srcBvh = bvhTraversalKernelCL;
240                 cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcBvh, &errNum, "", BT_NARROWPHASE_BVH_TRAVERSAL_PATH);
241                 b3Assert(errNum == CL_SUCCESS);
242
243                 m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcBvh, "bvhTraversalKernel", &errNum, bvhTraversalProg, "");
244                 b3Assert(errNum == CL_SUCCESS);
245         }
246
247         {
248                 const char* primitiveContactsSrc = primitiveContactsKernelsCL;
249                 cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, primitiveContactsSrc, &errNum, "", BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH);
250                 b3Assert(errNum == CL_SUCCESS);
251
252                 m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "primitiveContactsKernel", &errNum, primitiveContactsProg, "");
253                 b3Assert(errNum == CL_SUCCESS);
254
255                 m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "findConcaveSphereContactsKernel", &errNum, primitiveContactsProg);
256                 b3Assert(errNum == CL_SUCCESS);
257                 b3Assert(m_findConcaveSphereContactsKernel);
258
259                 m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "processCompoundPairsPrimitivesKernel", &errNum, primitiveContactsProg, "");
260                 b3Assert(errNum == CL_SUCCESS);
261                 b3Assert(m_processCompoundPairsPrimitivesKernel);
262         }
263 }
264
265 GpuSatCollision::~GpuSatCollision()
266 {
267         if (m_findSeparatingAxisVertexFaceKernel)
268                 clReleaseKernel(m_findSeparatingAxisVertexFaceKernel);
269
270         if (m_findSeparatingAxisEdgeEdgeKernel)
271                 clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel);
272
273         if (m_findSeparatingAxisUnitSphereKernel)
274                 clReleaseKernel(m_findSeparatingAxisUnitSphereKernel);
275
276         if (m_mprPenetrationKernel)
277                 clReleaseKernel(m_mprPenetrationKernel);
278
279         if (m_findSeparatingAxisKernel)
280                 clReleaseKernel(m_findSeparatingAxisKernel);
281
282         if (m_findConcaveSeparatingAxisVertexFaceKernel)
283                 clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel);
284
285         if (m_findConcaveSeparatingAxisEdgeEdgeKernel)
286                 clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel);
287
288         if (m_findConcaveSeparatingAxisKernel)
289                 clReleaseKernel(m_findConcaveSeparatingAxisKernel);
290
291         if (m_findCompoundPairsKernel)
292                 clReleaseKernel(m_findCompoundPairsKernel);
293
294         if (m_processCompoundPairsKernel)
295                 clReleaseKernel(m_processCompoundPairsKernel);
296
297         if (m_findClippingFacesKernel)
298                 clReleaseKernel(m_findClippingFacesKernel);
299
300         if (m_clipFacesAndFindContacts)
301                 clReleaseKernel(m_clipFacesAndFindContacts);
302         if (m_newContactReductionKernel)
303                 clReleaseKernel(m_newContactReductionKernel);
304         if (m_primitiveContactsKernel)
305                 clReleaseKernel(m_primitiveContactsKernel);
306
307         if (m_findConcaveSphereContactsKernel)
308                 clReleaseKernel(m_findConcaveSphereContactsKernel);
309
310         if (m_processCompoundPairsPrimitivesKernel)
311                 clReleaseKernel(m_processCompoundPairsPrimitivesKernel);
312
313         if (m_clipHullHullKernel)
314                 clReleaseKernel(m_clipHullHullKernel);
315         if (m_clipCompoundsHullHullKernel)
316                 clReleaseKernel(m_clipCompoundsHullHullKernel);
317
318         if (m_clipHullHullConcaveConvexKernel)
319                 clReleaseKernel(m_clipHullHullConcaveConvexKernel);
320         //      if (m_extractManifoldAndAddContactKernel)
321         //      clReleaseKernel(m_extractManifoldAndAddContactKernel);
322
323         if (m_bvhTraversalKernel)
324                 clReleaseKernel(m_bvhTraversalKernel);
325 }
326
327 struct MyTriangleCallback : public b3NodeOverlapCallback
328 {
329         int m_bodyIndexA;
330         int m_bodyIndexB;
331
332         virtual void processNode(int subPart, int triangleIndex)
333         {
334                 printf("bodyIndexA %d, bodyIndexB %d\n", m_bodyIndexA, m_bodyIndexB);
335                 printf("triangleIndex %d\n", triangleIndex);
336         }
337 };
338
339 #define float4 b3Vector3
340 #define make_float4(x, y, z, w) b3MakeVector3(x, y, z, w)
341
342 float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace)
343 {
344         float4 n = planeEqn;
345         n[3] = 0.f;
346         float dist = dot3F4(n, point) + planeEqn[3];
347         *closestPointOnFace = point - dist * n;
348         return dist;
349 }
350
351 #define cross3(a, b) (a.cross(b))
352 b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn)
353 {
354         b3Transform tr;
355         tr.setIdentity();
356         tr.setOrigin(*pos);
357         tr.setRotation(*orn);
358         b3Vector3 res = tr(*v);
359         return res;
360 }
361
362 inline bool IsPointInPolygon(const float4& p,
363                                                          const b3GpuFace* face,
364                                                          const float4* baseVertex,
365                                                          const int* convexIndices,
366                                                          float4* out)
367 {
368         float4 a;
369         float4 b;
370         float4 ab;
371         float4 ap;
372         float4 v;
373
374         float4 plane = b3MakeVector3(face->m_plane.x, face->m_plane.y, face->m_plane.z, 0.f);
375
376         if (face->m_numIndices < 2)
377                 return false;
378
379         float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices - 1]];
380         b = v0;
381
382         for (unsigned i = 0; i != face->m_numIndices; ++i)
383         {
384                 a = b;
385                 float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];
386                 b = vi;
387                 ab = b - a;
388                 ap = p - a;
389                 v = cross3(ab, plane);
390
391                 if (b3Dot(ap, v) > 0.f)
392                 {
393                         float ab_m2 = b3Dot(ab, ab);
394                         float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f;
395                         if (rt <= 0.f)
396                         {
397                                 *out = a;
398                         }
399                         else if (rt >= 1.f)
400                         {
401                                 *out = b;
402                         }
403                         else
404                         {
405                                 float s = 1.f - rt;
406                                 out[0].x = s * a.x + rt * b.x;
407                                 out[0].y = s * a.y + rt * b.y;
408                                 out[0].z = s * a.z + rt * b.z;
409                         }
410                         return false;
411                 }
412         }
413         return true;
414 }
415
416 #define normalize3(a) (a.normalize())
417
418 int extractManifoldSequentialGlobal(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)
419 {
420         if (nPoints == 0)
421                 return 0;
422
423         if (nPoints <= 4)
424                 return nPoints;
425
426         if (nPoints > 64)
427                 nPoints = 64;
428
429         float4 center = b3MakeVector3(0, 0, 0, 0);
430         {
431                 for (int i = 0; i < nPoints; i++)
432                         center += p[i];
433                 center /= (float)nPoints;
434         }
435
436         //      sample 4 directions
437
438         float4 aVector = p[0] - center;
439         float4 u = cross3(nearNormal, aVector);
440         float4 v = cross3(nearNormal, u);
441         u = normalize3(u);
442         v = normalize3(v);
443
444         //keep point with deepest penetration
445         float minW = FLT_MAX;
446
447         int minIndex = -1;
448
449         float4 maxDots;
450         maxDots.x = FLT_MIN;
451         maxDots.y = FLT_MIN;
452         maxDots.z = FLT_MIN;
453         maxDots.w = FLT_MIN;
454
455         //      idx, distance
456         for (int ie = 0; ie < nPoints; ie++)
457         {
458                 if (p[ie].w < minW)
459                 {
460                         minW = p[ie].w;
461                         minIndex = ie;
462                 }
463                 float f;
464                 float4 r = p[ie] - center;
465                 f = dot3F4(u, r);
466                 if (f < maxDots.x)
467                 {
468                         maxDots.x = f;
469                         contactIdx[0].x = ie;
470                 }
471
472                 f = dot3F4(-u, r);
473                 if (f < maxDots.y)
474                 {
475                         maxDots.y = f;
476                         contactIdx[0].y = ie;
477                 }
478
479                 f = dot3F4(v, r);
480                 if (f < maxDots.z)
481                 {
482                         maxDots.z = f;
483                         contactIdx[0].z = ie;
484                 }
485
486                 f = dot3F4(-v, r);
487                 if (f < maxDots.w)
488                 {
489                         maxDots.w = f;
490                         contactIdx[0].w = ie;
491                 }
492         }
493
494         if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
495         {
496                 //replace the first contact with minimum (todo: replace contact with least penetration)
497                 contactIdx[0].x = minIndex;
498         }
499
500         return 4;
501 }
502
503 #define MAX_VERTS 1024
504
505 inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max)
506 {
507         min = FLT_MAX;
508         max = -FLT_MAX;
509         int numVerts = hull.m_numVertices;
510
511         const float4 localDir = b3QuatRotate(orn.inverse(), dir);
512
513         b3Scalar offset = dot3F4(pos, dir);
514
515         for (int i = 0; i < numVerts; i++)
516         {
517                 //b3Vector3 pt = trans * vertices[m_vertexOffset+i];
518                 //b3Scalar dp = pt.dot(dir);
519                 //b3Vector3 vertex = vertices[hull.m_vertexOffset+i];
520                 b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset + i], localDir);
521                 //b3Assert(dp==dpL);
522                 if (dp < min) min = dp;
523                 if (dp > max) max = dp;
524         }
525         if (min > max)
526         {
527                 b3Scalar tmp = min;
528                 min = max;
529                 max = tmp;
530         }
531         min += offset;
532         max += offset;
533 }
534
535 static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
536                                                 const float4& posA, const b3Quaternion& ornA,
537                                                 const float4& posB, const b3Quaternion& ornB,
538                                                 const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB, b3Scalar& depth)
539 {
540         b3Scalar Min0, Max0;
541         b3Scalar Min1, Max1;
542         project(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0);
543         project(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1);
544
545         if (Max0 < Min1 || Max1 < Min0)
546                 return false;
547
548         b3Scalar d0 = Max0 - Min1;
549         assert(d0 >= 0.0f);
550         b3Scalar d1 = Max1 - Min0;
551         assert(d1 >= 0.0f);
552         depth = d0 < d1 ? d0 : d1;
553         return true;
554 }
555
556 inline bool IsAlmostZero(const b3Vector3& v)
557 {
558         if (fabsf(v.x) > 1e-6 || fabsf(v.y) > 1e-6 || fabsf(v.z) > 1e-6) return false;
559         return true;
560 }
561
562 static bool findSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
563                                                            const float4& posA1,
564                                                            const b3Quaternion& ornA,
565                                                            const float4& posB1,
566                                                            const b3Quaternion& ornB,
567                                                            const b3AlignedObjectArray<b3Vector3>& verticesA,
568                                                            const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
569                                                            const b3AlignedObjectArray<b3GpuFace>& facesA,
570                                                            const b3AlignedObjectArray<int>& indicesA,
571                                                            const b3AlignedObjectArray<b3Vector3>& verticesB,
572                                                            const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
573                                                            const b3AlignedObjectArray<b3GpuFace>& facesB,
574                                                            const b3AlignedObjectArray<int>& indicesB,
575
576                                                            b3Vector3& sep)
577 {
578         B3_PROFILE("findSeparatingAxis");
579
580         b3g_actualSATPairTests++;
581         float4 posA = posA1;
582         posA.w = 0.f;
583         float4 posB = posB1;
584         posB.w = 0.f;
585         //#ifdef TEST_INTERNAL_OBJECTS
586         float4 c0local = (float4&)hullA.m_localCenter;
587         float4 c0 = transform(&c0local, &posA, &ornA);
588         float4 c1local = (float4&)hullB.m_localCenter;
589         float4 c1 = transform(&c1local, &posB, &ornB);
590         const float4 deltaC2 = c0 - c1;
591         //#endif
592
593         b3Scalar dmin = FLT_MAX;
594         int curPlaneTests = 0;
595
596         int numFacesA = hullA.m_numFaces;
597         // Test normals from hullA
598         for (int i = 0; i < numFacesA; i++)
599         {
600                 const float4& normal = (float4&)facesA[hullA.m_faceOffset + i].m_plane;
601                 float4 faceANormalWS = b3QuatRotate(ornA, normal);
602
603                 if (dot3F4(deltaC2, faceANormalWS) < 0)
604                         faceANormalWS *= -1.f;
605
606                 curPlaneTests++;
607 #ifdef TEST_INTERNAL_OBJECTS
608                 gExpectedNbTests++;
609                 if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))
610                         continue;
611                 gActualNbTests++;
612 #endif
613
614                 b3Scalar d;
615                 if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d))
616                         return false;
617
618                 if (d < dmin)
619                 {
620                         dmin = d;
621                         sep = (b3Vector3&)faceANormalWS;
622                 }
623         }
624
625         int numFacesB = hullB.m_numFaces;
626         // Test normals from hullB
627         for (int i = 0; i < numFacesB; i++)
628         {
629                 float4 normal = (float4&)facesB[hullB.m_faceOffset + i].m_plane;
630                 float4 WorldNormal = b3QuatRotate(ornB, normal);
631
632                 if (dot3F4(deltaC2, WorldNormal) < 0)
633                 {
634                         WorldNormal *= -1.f;
635                 }
636                 curPlaneTests++;
637 #ifdef TEST_INTERNAL_OBJECTS
638                 gExpectedNbTests++;
639                 if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin))
640                         continue;
641                 gActualNbTests++;
642 #endif
643
644                 b3Scalar d;
645                 if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d))
646                         return false;
647
648                 if (d < dmin)
649                 {
650                         dmin = d;
651                         sep = (b3Vector3&)WorldNormal;
652                 }
653         }
654
655         int curEdgeEdge = 0;
656         // Test edges
657         for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++)
658         {
659                 const float4& edge0 = (float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0];
660                 float4 edge0World = b3QuatRotate(ornA, (float4&)edge0);
661
662                 for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++)
663                 {
664                         const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1];
665                         float4 edge1World = b3QuatRotate(ornB, (float4&)edge1);
666
667                         float4 crossje = cross3(edge0World, edge1World);
668
669                         curEdgeEdge++;
670                         if (!IsAlmostZero((b3Vector3&)crossje))
671                         {
672                                 crossje = normalize3(crossje);
673                                 if (dot3F4(deltaC2, crossje) < 0)
674                                         crossje *= -1.f;
675
676 #ifdef TEST_INTERNAL_OBJECTS
677                                 gExpectedNbTests++;
678                                 if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin))
679                                         continue;
680                                 gActualNbTests++;
681 #endif
682
683                                 b3Scalar dist;
684                                 if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist))
685                                         return false;
686
687                                 if (dist < dmin)
688                                 {
689                                         dmin = dist;
690                                         sep = (b3Vector3&)crossje;
691                                 }
692                         }
693                 }
694         }
695
696         if ((dot3F4(-deltaC2, (float4&)sep)) > 0.0f)
697                 sep = -sep;
698
699         return true;
700 }
701
702 bool findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
703                                                                 const b3Float4& posA1,
704                                                                 const b3Quat& ornA,
705                                                                 const b3Float4& posB1,
706                                                                 const b3Quat& ornB,
707                                                                 const b3Float4& DeltaC2,
708                                                                 __global const b3AlignedObjectArray<float4>& vertices,
709                                                                 __global const b3AlignedObjectArray<float4>& uniqueEdges,
710                                                                 __global const b3AlignedObjectArray<b3GpuFace>& faces,
711                                                                 __global const b3AlignedObjectArray<int>& indices,
712                                                                 float4* sep,
713                                                                 float* dmin)
714 {
715         //      int i = get_global_id(0);
716
717         float4 posA = posA1;
718         posA.w = 0.f;
719         float4 posB = posB1;
720         posB.w = 0.f;
721
722         //int curPlaneTests=0;
723
724         int curEdgeEdge = 0;
725         // Test edges
726         for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++)
727         {
728                 const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset + e0];
729                 float4 edge0World = b3QuatRotate(ornA, edge0);
730
731                 for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++)
732                 {
733                         const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset + e1];
734                         float4 edge1World = b3QuatRotate(ornB, edge1);
735
736                         float4 crossje = cross3(edge0World, edge1World);
737
738                         curEdgeEdge++;
739                         if (!IsAlmostZero(crossje))
740                         {
741                                 crossje = normalize3(crossje);
742                                 if (dot3F4(DeltaC2, crossje) < 0)
743                                         crossje *= -1.f;
744
745                                 float dist;
746                                 bool result = true;
747                                 {
748                                         float Min0, Max0;
749                                         float Min1, Max1;
750                                         project(*hullA, posA, ornA, crossje, vertices, Min0, Max0);
751                                         project(*hullB, posB, ornB, crossje, vertices, Min1, Max1);
752
753                                         if (Max0 < Min1 || Max1 < Min0)
754                                                 result = false;
755
756                                         float d0 = Max0 - Min1;
757                                         float d1 = Max1 - Min0;
758                                         dist = d0 < d1 ? d0 : d1;
759                                         result = true;
760                                 }
761
762                                 if (dist < *dmin)
763                                 {
764                                         *dmin = dist;
765                                         *sep = crossje;
766                                 }
767                         }
768                 }
769         }
770
771         if ((dot3F4(-DeltaC2, *sep)) > 0.0f)
772         {
773                 *sep = -(*sep);
774         }
775         return true;
776 }
777
778 __inline float4 lerp3(const float4& a, const float4& b, float t)
779 {
780         return b3MakeVector3(a.x + (b.x - a.x) * t,
781                                                  a.y + (b.y - a.y) * t,
782                                                  a.z + (b.z - a.z) * t,
783                                                  0.f);
784 }
785
786 // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
787 int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS, float planeEqWS, float4* ppVtxOut)
788 {
789         int ve;
790         float ds, de;
791         int numVertsOut = 0;
792         if (numVertsIn < 2)
793                 return 0;
794
795         float4 firstVertex = pVtxIn[numVertsIn - 1];
796         float4 endVertex = pVtxIn[0];
797
798         ds = dot3F4(planeNormalWS, firstVertex) + planeEqWS;
799
800         for (ve = 0; ve < numVertsIn; ve++)
801         {
802                 endVertex = pVtxIn[ve];
803
804                 de = dot3F4(planeNormalWS, endVertex) + planeEqWS;
805
806                 if (ds < 0)
807                 {
808                         if (de < 0)
809                         {
810                                 // Start < 0, end < 0, so output endVertex
811                                 ppVtxOut[numVertsOut++] = endVertex;
812                         }
813                         else
814                         {
815                                 // Start < 0, end >= 0, so output intersection
816                                 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
817                         }
818                 }
819                 else
820                 {
821                         if (de < 0)
822                         {
823                                 // Start >= 0, end < 0 so output intersection and end
824                                 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
825                                 ppVtxOut[numVertsOut++] = endVertex;
826                         }
827                 }
828                 firstVertex = endVertex;
829                 ds = de;
830         }
831         return numVertsOut;
832 }
833
834 int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA,
835                                                 const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1,
836                                                 float4* worldVertsB2, int capacityWorldVertsB2,
837                                                 const float minDist, float maxDist,
838                                                 const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
839                                                 //const float4* verticesB,      const b3GpuFace* facesB,        const int* indicesB,
840                                                 float4* contactsOut,
841                                                 int contactCapacity)
842 {
843         int numContactsOut = 0;
844
845         float4* pVtxIn = worldVertsB1;
846         float4* pVtxOut = worldVertsB2;
847
848         int numVertsIn = numWorldVertsB1;
849         int numVertsOut = 0;
850
851         int closestFaceA = -1;
852         {
853                 float dmin = FLT_MAX;
854                 for (int face = 0; face < hullA->m_numFaces; face++)
855                 {
856                         const float4 Normal = b3MakeVector3(
857                                 facesA[hullA->m_faceOffset + face].m_plane.x,
858                                 facesA[hullA->m_faceOffset + face].m_plane.y,
859                                 facesA[hullA->m_faceOffset + face].m_plane.z, 0.f);
860                         const float4 faceANormalWS = b3QuatRotate(ornA, Normal);
861
862                         float d = dot3F4(faceANormalWS, separatingNormal);
863                         if (d < dmin)
864                         {
865                                 dmin = d;
866                                 closestFaceA = face;
867                         }
868                 }
869         }
870         if (closestFaceA < 0)
871                 return numContactsOut;
872
873         b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA];
874
875         // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
876         //      int numContacts = numWorldVertsB1;
877         int numVerticesA = polyA.m_numIndices;
878         for (int e0 = 0; e0 < numVerticesA; e0++)
879         {
880                 const float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]];
881                 const float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]];
882                 const float4 edge0 = a - b;
883                 const float4 WorldEdge0 = b3QuatRotate(ornA, edge0);
884                 float4 planeNormalA = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
885                 float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA);
886
887                 float4 planeNormalWS1 = -cross3(WorldEdge0, worldPlaneAnormal1);
888                 float4 worldA1 = transform(&a, &posA, &ornA);
889                 float planeEqWS1 = -dot3F4(worldA1, planeNormalWS1);
890
891                 float4 planeNormalWS = planeNormalWS1;
892                 float planeEqWS = planeEqWS1;
893
894                 //clip face
895                 //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
896                 numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut);
897
898                 //btSwap(pVtxIn,pVtxOut);
899                 float4* tmp = pVtxOut;
900                 pVtxOut = pVtxIn;
901                 pVtxIn = tmp;
902                 numVertsIn = numVertsOut;
903                 numVertsOut = 0;
904         }
905
906         // only keep points that are behind the witness face
907         {
908                 float4 localPlaneNormal = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
909                 float localPlaneEq = polyA.m_plane.w;
910                 float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal);
911                 float planeEqWS = localPlaneEq - dot3F4(planeNormalWS, posA);
912                 for (int i = 0; i < numVertsIn; i++)
913                 {
914                         float depth = dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS;
915                         if (depth <= minDist)
916                         {
917                                 depth = minDist;
918                         }
919                         if (numContactsOut < contactCapacity)
920                         {
921                                 if (depth <= maxDist)
922                                 {
923                                         float4 pointInWorld = pVtxIn[i];
924                                         //resultOut.addContactPoint(separatingNormal,point,depth);
925                                         contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth);
926                                         //printf("depth=%f\n",depth);
927                                 }
928                         }
929                         else
930                         {
931                                 b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity);
932                         }
933                 }
934         }
935
936         return numContactsOut;
937 }
938
939 static int clipHullAgainstHull(const float4& separatingNormal,
940                                                            const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
941                                                            const float4& posA, const b3Quaternion& ornA, const float4& posB, const b3Quaternion& ornB,
942                                                            float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,
943                                                            const float minDist, float maxDist,
944                                                            const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
945                                                            const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB,
946
947                                                            float4* contactsOut,
948                                                            int contactCapacity)
949 {
950         int numContactsOut = 0;
951         int numWorldVertsB1 = 0;
952
953         B3_PROFILE("clipHullAgainstHull");
954
955         //      float curMaxDist=maxDist;
956         int closestFaceB = -1;
957         float dmax = -FLT_MAX;
958
959         {
960                 //B3_PROFILE("closestFaceB");
961                 if (hullB.m_numFaces != 1)
962                 {
963                         //printf("wtf\n");
964                 }
965                 static bool once = true;
966                 //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z);
967
968                 for (int face = 0; face < hullB.m_numFaces; face++)
969                 {
970 #ifdef BT_DEBUG_SAT_FACE
971                         if (once)
972                                 printf("face %d\n", face);
973                         const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face];
974                         if (once)
975                         {
976                                 for (int i = 0; i < faceB->m_numIndices; i++)
977                                 {
978                                         float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]];
979                                         printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z);
980                                 }
981                         }
982 #endif  //BT_DEBUG_SAT_FACE \
983         //if (facesB[hullB.m_faceOffset+face].m_numIndices>2)
984                         {
985                                 const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x,
986                                                                                                         facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f);
987                                 const float4 WorldNormal = b3QuatRotate(ornB, Normal);
988 #ifdef BT_DEBUG_SAT_FACE
989                                 if (once)
990                                         printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z);
991 #endif
992                                 float d = dot3F4(WorldNormal, separatingNormal);
993                                 if (d > dmax)
994                                 {
995                                         dmax = d;
996                                         closestFaceB = face;
997                                 }
998                         }
999                 }
1000                 once = false;
1001         }
1002
1003         b3Assert(closestFaceB >= 0);
1004         {
1005                 //B3_PROFILE("worldVertsB1");
1006                 const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB];
1007                 const int numVertices = polyB.m_numIndices;
1008                 for (int e0 = 0; e0 < numVertices; e0++)
1009                 {
1010                         const float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]];
1011                         worldVertsB1[numWorldVertsB1++] = transform(&b, &posB, &ornB);
1012                 }
1013         }
1014
1015         if (closestFaceB >= 0)
1016         {
1017                 //B3_PROFILE("clipFaceAgainstHull");
1018                 numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA,
1019                                                                                          posA, ornA,
1020                                                                                          worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist,
1021                                                                                          verticesA, facesA, indicesA,
1022                                                                                          contactsOut, contactCapacity);
1023         }
1024
1025         return numContactsOut;
1026 }
1027
1028 #define PARALLEL_SUM(v, n) \
1029         for (int j = 1; j < n; j++) v[0] += v[j];
1030 #define PARALLEL_DO(execution, n)  \
1031         for (int ie = 0; ie < n; ie++) \
1032         {                              \
1033                 execution;                 \
1034         }
1035 #define REDUCE_MAX(v, n)                                                                                     \
1036         {                                                                                                        \
1037                 int i = 0;                                                                                           \
1038                 for (int offset = 0; offset < n; offset++) v[i] = (v[i].y > v[i + offset].y) ? v[i] : v[i + offset]; \
1039         }
1040 #define REDUCE_MIN(v, n)                                                                                     \
1041         {                                                                                                        \
1042                 int i = 0;                                                                                           \
1043                 for (int offset = 0; offset < n; offset++) v[i] = (v[i].y < v[i + offset].y) ? v[i] : v[i + offset]; \
1044         }
1045
1046 int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)
1047 {
1048         if (nPoints == 0)
1049                 return 0;
1050
1051         if (nPoints <= 4)
1052                 return nPoints;
1053
1054         if (nPoints > 64)
1055                 nPoints = 64;
1056
1057         float4 center = make_float4(0, 0, 0, 0);
1058         {
1059                 for (int i = 0; i < nPoints; i++)
1060                         center += p[i];
1061                 center /= (float)nPoints;
1062         }
1063
1064         //      sample 4 directions
1065
1066         float4 aVector = p[0] - center;
1067         float4 u = cross3(nearNormal, aVector);
1068         float4 v = cross3(nearNormal, u);
1069         u = normalize3(u);
1070         v = normalize3(v);
1071
1072         //keep point with deepest penetration
1073         float minW = FLT_MAX;
1074
1075         int minIndex = -1;
1076
1077         float4 maxDots;
1078         maxDots.x = FLT_MIN;
1079         maxDots.y = FLT_MIN;
1080         maxDots.z = FLT_MIN;
1081         maxDots.w = FLT_MIN;
1082
1083         //      idx, distance
1084         for (int ie = 0; ie < nPoints; ie++)
1085         {
1086                 if (p[ie].w < minW)
1087                 {
1088                         minW = p[ie].w;
1089                         minIndex = ie;
1090                 }
1091                 float f;
1092                 float4 r = p[ie] - center;
1093                 f = dot3F4(u, r);
1094                 if (f < maxDots.x)
1095                 {
1096                         maxDots.x = f;
1097                         contactIdx[0].x = ie;
1098                 }
1099
1100                 f = dot3F4(-u, r);
1101                 if (f < maxDots.y)
1102                 {
1103                         maxDots.y = f;
1104                         contactIdx[0].y = ie;
1105                 }
1106
1107                 f = dot3F4(v, r);
1108                 if (f < maxDots.z)
1109                 {
1110                         maxDots.z = f;
1111                         contactIdx[0].z = ie;
1112                 }
1113
1114                 f = dot3F4(-v, r);
1115                 if (f < maxDots.w)
1116                 {
1117                         maxDots.w = f;
1118                         contactIdx[0].w = ie;
1119                 }
1120         }
1121
1122         if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
1123         {
1124                 //replace the first contact with minimum (todo: replace contact with least penetration)
1125                 contactIdx[0].x = minIndex;
1126         }
1127
1128         return 4;
1129 }
1130
1131 int clipHullHullSingle(
1132         int bodyIndexA, int bodyIndexB,
1133         const float4& posA,
1134         const b3Quaternion& ornA,
1135         const float4& posB,
1136         const b3Quaternion& ornB,
1137
1138         int collidableIndexA, int collidableIndexB,
1139
1140         const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf,
1141         b3AlignedObjectArray<b3Contact4>* globalContactOut,
1142         int& nContacts,
1143
1144         const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA,
1145         const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB,
1146
1147         const b3AlignedObjectArray<b3Vector3>& verticesA,
1148         const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
1149         const b3AlignedObjectArray<b3GpuFace>& facesA,
1150         const b3AlignedObjectArray<int>& indicesA,
1151
1152         const b3AlignedObjectArray<b3Vector3>& verticesB,
1153         const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
1154         const b3AlignedObjectArray<b3GpuFace>& facesB,
1155         const b3AlignedObjectArray<int>& indicesB,
1156
1157         const b3AlignedObjectArray<b3Collidable>& hostCollidablesA,
1158         const b3AlignedObjectArray<b3Collidable>& hostCollidablesB,
1159         const b3Vector3& sepNormalWorldSpace,
1160         int maxContactCapacity)
1161 {
1162         int contactIndex = -1;
1163         b3ConvexPolyhedronData hullA, hullB;
1164
1165         b3Collidable colA = hostCollidablesA[collidableIndexA];
1166         hullA = hostConvexDataA[colA.m_shapeIndex];
1167         //printf("numvertsA = %d\n",hullA.m_numVertices);
1168
1169         b3Collidable colB = hostCollidablesB[collidableIndexB];
1170         hullB = hostConvexDataB[colB.m_shapeIndex];
1171         //printf("numvertsB = %d\n",hullB.m_numVertices);
1172
1173         float4 contactsOut[MAX_VERTS];
1174         int localContactCapacity = MAX_VERTS;
1175
1176 #ifdef _WIN32
1177         b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x));
1178         b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x));
1179 #endif
1180
1181         {
1182                 float4 worldVertsB1[MAX_VERTS];
1183                 float4 worldVertsB2[MAX_VERTS];
1184                 int capacityWorldVerts = MAX_VERTS;
1185
1186                 float4 hostNormal = make_float4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f);
1187                 int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex;
1188                 int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex;
1189
1190                 b3Scalar minDist = -1;
1191                 b3Scalar maxDist = 0.;
1192
1193                 b3Transform trA, trB;
1194                 {
1195                         //B3_PROFILE("transform computation");
1196                         //trA.setIdentity();
1197                         trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z));
1198                         trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w));
1199
1200                         //trB.setIdentity();
1201                         trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z));
1202                         trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w));
1203                 }
1204
1205                 b3Quaternion trAorn = trA.getRotation();
1206                 b3Quaternion trBorn = trB.getRotation();
1207
1208                 int numContactsOut = clipHullAgainstHull(hostNormal,
1209                                                                                                  hostConvexDataA.at(shapeA),
1210                                                                                                  hostConvexDataB.at(shapeB),
1211                                                                                                  (float4&)trA.getOrigin(), (b3Quaternion&)trAorn,
1212                                                                                                  (float4&)trB.getOrigin(), (b3Quaternion&)trBorn,
1213                                                                                                  worldVertsB1, worldVertsB2, capacityWorldVerts,
1214                                                                                                  minDist, maxDist,
1215                                                                                                  verticesA, facesA, indicesA,
1216                                                                                                  verticesB, facesB, indicesB,
1217
1218                                                                                                  contactsOut, localContactCapacity);
1219
1220                 if (numContactsOut > 0)
1221                 {
1222                         B3_PROFILE("overlap");
1223
1224                         float4 normalOnSurfaceB = (float4&)hostNormal;
1225
1226                         b3Int4 contactIdx;
1227                         contactIdx.x = 0;
1228                         contactIdx.y = 1;
1229                         contactIdx.z = 2;
1230                         contactIdx.w = 3;
1231
1232                         int numPoints = 0;
1233
1234                         {
1235                                 //      B3_PROFILE("extractManifold");
1236                                 numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx);
1237                         }
1238
1239                         b3Assert(numPoints);
1240
1241                         if (nContacts < maxContactCapacity)
1242                         {
1243                                 contactIndex = nContacts;
1244                                 globalContactOut->expand();
1245                                 b3Contact4& contact = globalContactOut->at(nContacts);
1246                                 contact.m_batchIdx = 0;  //i;
1247                                 contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA;
1248                                 contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;
1249
1250                                 contact.m_frictionCoeffCmp = 45874;
1251                                 contact.m_restituitionCoeffCmp = 0;
1252
1253                                 //                      float distance = 0.f;
1254                                 for (int p = 0; p < numPoints; p++)
1255                                 {
1256                                         contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];  //check if it is actually on B
1257                                         contact.m_worldNormalOnB = normalOnSurfaceB;
1258                                 }
1259                                 //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);
1260                                 contact.m_worldNormalOnB.w = (b3Scalar)numPoints;
1261                                 nContacts++;
1262                         }
1263                         else
1264                         {
1265                                 b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity);
1266                         }
1267                 }
1268         }
1269         return contactIndex;
1270 }
1271
1272 void computeContactPlaneConvex(int pairIndex,
1273                                                            int bodyIndexA, int bodyIndexB,
1274                                                            int collidableIndexA, int collidableIndexB,
1275                                                            const b3RigidBodyData* rigidBodies,
1276                                                            const b3Collidable* collidables,
1277                                                            const b3ConvexPolyhedronData* convexShapes,
1278                                                            const b3Vector3* convexVertices,
1279                                                            const int* convexIndices,
1280                                                            const b3GpuFace* faces,
1281                                                            b3Contact4* globalContactsOut,
1282                                                            int& nGlobalContactsOut,
1283                                                            int maxContactCapacity)
1284 {
1285         int shapeIndex = collidables[collidableIndexB].m_shapeIndex;
1286         const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex];
1287
1288         b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;
1289         b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
1290         b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
1291         b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
1292
1293         //      int numContactsOut = 0;
1294         //      int numWorldVertsB1= 0;
1295
1296         b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;
1297         b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z);
1298         b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);
1299         float planeConstant = planeEq.w;
1300         b3Transform convexWorldTransform;
1301         convexWorldTransform.setIdentity();
1302         convexWorldTransform.setOrigin(posB);
1303         convexWorldTransform.setRotation(ornB);
1304         b3Transform planeTransform;
1305         planeTransform.setIdentity();
1306         planeTransform.setOrigin(posA);
1307         planeTransform.setRotation(ornA);
1308
1309         b3Transform planeInConvex;
1310         planeInConvex = convexWorldTransform.inverse() * planeTransform;
1311         b3Transform convexInPlane;
1312         convexInPlane = planeTransform.inverse() * convexWorldTransform;
1313
1314         b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;
1315         float maxDot = -1e30;
1316         int hitVertex = -1;
1317         b3Vector3 hitVtx;
1318
1319 #define MAX_PLANE_CONVEX_POINTS 64
1320
1321         b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
1322         int numPoints = 0;
1323
1324         b3Int4 contactIdx;
1325         contactIdx.s[0] = 0;
1326         contactIdx.s[1] = 1;
1327         contactIdx.s[2] = 2;
1328         contactIdx.s[3] = 3;
1329
1330         for (int i = 0; i < hullB->m_numVertices; i++)
1331         {
1332                 b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];
1333                 float curDot = vtx.dot(planeNormalInConvex);
1334
1335                 if (curDot > maxDot)
1336                 {
1337                         hitVertex = i;
1338                         maxDot = curDot;
1339                         hitVtx = vtx;
1340                         //make sure the deepest points is always included
1341                         if (numPoints == MAX_PLANE_CONVEX_POINTS)
1342                                 numPoints--;
1343                 }
1344
1345                 if (numPoints < MAX_PLANE_CONVEX_POINTS)
1346                 {
1347                         b3Vector3 vtxWorld = convexWorldTransform * vtx;
1348                         b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld;
1349                         float dist = planeNormal.dot(vtxInPlane) - planeConstant;
1350                         if (dist < 0.f)
1351                         {
1352                                 vtxWorld.w = dist;
1353                                 contactPoints[numPoints] = vtxWorld;
1354                                 numPoints++;
1355                         }
1356                 }
1357         }
1358
1359         int numReducedPoints = 0;
1360
1361         numReducedPoints = numPoints;
1362
1363         if (numPoints > 4)
1364         {
1365                 numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);
1366         }
1367         int dstIdx;
1368         //    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
1369
1370         if (numReducedPoints > 0)
1371         {
1372                 if (nGlobalContactsOut < maxContactCapacity)
1373                 {
1374                         dstIdx = nGlobalContactsOut;
1375                         nGlobalContactsOut++;
1376
1377                         b3Contact4* c = &globalContactsOut[dstIdx];
1378                         c->m_worldNormalOnB = -planeNormalWorld;
1379                         c->setFrictionCoeff(0.7);
1380                         c->setRestituitionCoeff(0.f);
1381
1382                         c->m_batchIdx = pairIndex;
1383                         c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
1384                         c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
1385                         for (int i = 0; i < numReducedPoints; i++)
1386                         {
1387                                 b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];
1388                                 c->m_worldPosB[i] = pOnB1;
1389                         }
1390                         c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints;
1391                 }  //if (dstIdx < numPairs)
1392         }
1393
1394         //      printf("computeContactPlaneConvex\n");
1395 }
1396
1397 B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin)
1398 {
1399         b3Vector3 vecOut;
1400         vecOut.setValue(
1401                 (b3Scalar)(vecIn[0]) / (quantization.x),
1402                 (b3Scalar)(vecIn[1]) / (quantization.y),
1403                 (b3Scalar)(vecIn[2]) / (quantization.z));
1404         vecOut += bvhAabbMin;
1405         return vecOut;
1406 }
1407
1408 void traverseTreeTree()
1409 {
1410 }
1411
1412 #include "Bullet3Common/shared/b3Mat3x3.h"
1413
1414 int numAabbChecks = 0;
1415 int maxNumAabbChecks = 0;
1416 int maxDepth = 0;
1417
1418 // work-in-progress
1419 __kernel void findCompoundPairsKernel(
1420         int pairIndex,
1421         int bodyIndexA,
1422         int bodyIndexB,
1423         int collidableIndexA,
1424         int collidableIndexB,
1425         __global const b3RigidBodyData* rigidBodies,
1426         __global const b3Collidable* collidables,
1427         __global const b3ConvexPolyhedronData* convexShapes,
1428         __global const b3AlignedObjectArray<b3Float4>& vertices,
1429         __global const b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace,
1430         __global const b3AlignedObjectArray<b3Aabb>& aabbsLocalSpace,
1431         __global const b3GpuChildShape* gpuChildShapes,
1432         __global b3Int4* gpuCompoundPairsOut,
1433         __global int* numCompoundPairsOut,
1434         int maxNumCompoundPairsCapacity,
1435         b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
1436         b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
1437         b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)
1438 {
1439         numAabbChecks = 0;
1440         maxNumAabbChecks = 0;
1441         //      int i = pairIndex;
1442         {
1443                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1444                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1445
1446                 //once the broadphase avoids static-static pairs, we can remove this test
1447                 if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0))
1448                 {
1449                         return;
1450                 }
1451
1452                 if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1453                 {
1454                         int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;
1455                         int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;
1456                         int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees;
1457                         int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset;
1458                         int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset;
1459
1460                         int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees;
1461
1462                         float4 posA = rigidBodies[bodyIndexA].m_pos;
1463                         b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1464
1465                         b3Transform transA;
1466                         transA.setIdentity();
1467                         transA.setOrigin(posA);
1468                         transA.setRotation(ornA);
1469
1470                         b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1471                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1472
1473                         b3Transform transB;
1474                         transB.setIdentity();
1475                         transB.setOrigin(posB);
1476                         transB.setRotation(ornB);
1477
1478                         for (int p = 0; p < numSubTreesA; p++)
1479                         {
1480                                 b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA + p];
1481                                 //bvhInfoCPU[bvhA].m_quantization
1482                                 b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1483                                 b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1484
1485                                 b3Vector3 aabbAMinOut, aabbAMaxOut;
1486                                 float margin = 0.f;
1487                                 b3TransformAabb2(treeAminLocal, treeAmaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1488
1489                                 for (int q = 0; q < numSubTreesB; q++)
1490                                 {
1491                                         b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB + q];
1492
1493                                         b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1494                                         b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1495
1496                                         b3Vector3 aabbBMinOut, aabbBMaxOut;
1497                                         float margin = 0.f;
1498                                         b3TransformAabb2(treeBminLocal, treeBmaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1499
1500                                         numAabbChecks = 0;
1501                                         bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1502                                         if (aabbOverlap)
1503                                         {
1504                                                 int startNodeIndexA = subtreeA.m_rootNodeIndex + bvhInfoCPU[bvhA].m_nodeOffset;
1505                                                 //                              int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;
1506
1507                                                 int startNodeIndexB = subtreeB.m_rootNodeIndex + bvhInfoCPU[bvhB].m_nodeOffset;
1508                                                 //                              int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;
1509
1510                                                 b3AlignedObjectArray<b3Int2> nodeStack;
1511                                                 b3Int2 node0;
1512                                                 node0.x = startNodeIndexA;
1513                                                 node0.y = startNodeIndexB;
1514
1515                                                 int maxStackDepth = 1024;
1516                                                 nodeStack.resize(maxStackDepth);
1517                                                 int depth = 0;
1518                                                 nodeStack[depth++] = node0;
1519
1520                                                 do
1521                                                 {
1522                                                         if (depth > maxDepth)
1523                                                         {
1524                                                                 maxDepth = depth;
1525                                                                 printf("maxDepth=%d\n", maxDepth);
1526                                                         }
1527                                                         b3Int2 node = nodeStack[--depth];
1528
1529                                                         b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1530                                                         b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1531
1532                                                         b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1533                                                         b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1534
1535                                                         float margin = 0.f;
1536                                                         b3Vector3 aabbAMinOut, aabbAMaxOut;
1537                                                         b3TransformAabb2(aMinLocal, aMaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1538
1539                                                         b3Vector3 aabbBMinOut, aabbBMaxOut;
1540                                                         b3TransformAabb2(bMinLocal, bMaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1541
1542                                                         numAabbChecks++;
1543                                                         bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1544                                                         if (nodeOverlap)
1545                                                         {
1546                                                                 bool isLeafA = treeNodesCPU[node.x].isLeafNode();
1547                                                                 bool isLeafB = treeNodesCPU[node.y].isLeafNode();
1548                                                                 bool isInternalA = !isLeafA;
1549                                                                 bool isInternalB = !isLeafB;
1550
1551                                                                 //fail, even though it might hit two leaf nodes
1552                                                                 if (depth + 4 > maxStackDepth && !(isLeafA && isLeafB))
1553                                                                 {
1554                                                                         b3Error("Error: traversal exceeded maxStackDepth\n");
1555                                                                         continue;
1556                                                                 }
1557
1558                                                                 if (isInternalA)
1559                                                                 {
1560                                                                         int nodeAleftChild = node.x + 1;
1561                                                                         bool isNodeALeftChildLeaf = treeNodesCPU[node.x + 1].isLeafNode();
1562                                                                         int nodeArightChild = isNodeALeftChildLeaf ? node.x + 2 : node.x + 1 + treeNodesCPU[node.x + 1].getEscapeIndex();
1563
1564                                                                         if (isInternalB)
1565                                                                         {
1566                                                                                 int nodeBleftChild = node.y + 1;
1567                                                                                 bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode();
1568                                                                                 int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex();
1569
1570                                                                                 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);
1571                                                                                 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);
1572                                                                                 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);
1573                                                                                 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);
1574                                                                         }
1575                                                                         else
1576                                                                         {
1577                                                                                 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, node.y);
1578                                                                                 nodeStack[depth++] = b3MakeInt2(nodeArightChild, node.y);
1579                                                                         }
1580                                                                 }
1581                                                                 else
1582                                                                 {
1583                                                                         if (isInternalB)
1584                                                                         {
1585                                                                                 int nodeBleftChild = node.y + 1;
1586                                                                                 bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode();
1587                                                                                 int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex();
1588                                                                                 nodeStack[depth++] = b3MakeInt2(node.x, nodeBleftChild);
1589                                                                                 nodeStack[depth++] = b3MakeInt2(node.x, nodeBrightChild);
1590                                                                         }
1591                                                                         else
1592                                                                         {
1593                                                                                 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1594                                                                                 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1595                                                                                 {
1596                                                                                         int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex();
1597                                                                                         int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex();
1598                                                                                         gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);
1599                                                                                 }
1600                                                                         }
1601                                                                 }
1602                                                         }
1603                                                 } while (depth);
1604                                                 maxNumAabbChecks = b3Max(numAabbChecks, maxNumAabbChecks);
1605                                         }
1606                                 }
1607                         }
1608
1609                         return;
1610                 }
1611
1612                 if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) || (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1613                 {
1614                         if (collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
1615                         {
1616                                 int numChildrenA = collidables[collidableIndexA].m_numChildShapes;
1617                                 for (int c = 0; c < numChildrenA; c++)
1618                                 {
1619                                         int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex + c;
1620                                         int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1621
1622                                         float4 posA = rigidBodies[bodyIndexA].m_pos;
1623                                         b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1624                                         float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1625                                         b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1626                                         float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1627                                         b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1628
1629                                         b3Aabb aabbA = aabbsLocalSpace[childColIndexA];
1630
1631                                         b3Transform transA;
1632                                         transA.setIdentity();
1633                                         transA.setOrigin(newPosA);
1634                                         transA.setRotation(newOrnA);
1635                                         b3Scalar margin = 0.0f;
1636
1637                                         b3Vector3 aabbAMinOut, aabbAMaxOut;
1638
1639                                         b3TransformAabb2((const b3Float4&)aabbA.m_min, (const b3Float4&)aabbA.m_max, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1640
1641                                         if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
1642                                         {
1643                                                 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1644                                                 for (int b = 0; b < numChildrenB; b++)
1645                                                 {
1646                                                         int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;
1647                                                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1648                                                         b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1649                                                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1650                                                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1651                                                         b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1652                                                         float4 newPosB = transform(&childPosB, &posB, &ornB);
1653                                                         b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1654
1655                                                         b3Aabb aabbB = aabbsLocalSpace[childColIndexB];
1656
1657                                                         b3Transform transB;
1658                                                         transB.setIdentity();
1659                                                         transB.setOrigin(newPosB);
1660                                                         transB.setRotation(newOrnB);
1661
1662                                                         b3Vector3 aabbBMinOut, aabbBMaxOut;
1663                                                         b3TransformAabb2((const b3Float4&)aabbB.m_min, (const b3Float4&)aabbB.m_max, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1664
1665                                                         numAabbChecks++;
1666                                                         bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1667                                                         if (aabbOverlap)
1668                                                         {
1669                                                                 /*
1670                                                                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1671                                                                 float dmin = FLT_MAX;
1672                                                                 float4 posA = newPosA;
1673                                                                 posA.w = 0.f;
1674                                                                 float4 posB = newPosB;
1675                                                                 posB.w = 0.f;
1676                                                                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1677                                                                 b3Quat ornA = newOrnA;
1678                                                                 float4 c0 = transform(&c0local, &posA, &ornA);
1679                                                                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1680                                                                 b3Quat ornB =newOrnB;
1681                                                                 float4 c1 = transform(&c1local,&posB,&ornB);
1682                                                                 const float4 DeltaC2 = c0 - c1;
1683                                                                 */
1684                                                                 {  //
1685                                                                         int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1686                                                                         if (compoundPairIdx < maxNumCompoundPairsCapacity)
1687                                                                         {
1688                                                                                 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);
1689                                                                         }
1690                                                                 }  //
1691                                                         }      //fi(1)
1692                                                 }          //for (int b=0
1693                                         }              //if (collidables[collidableIndexB].
1694                                         else           //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1695                                         {
1696                                                 if (1)
1697                                                 {
1698                                                         //      int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1699                                                         //      float dmin = FLT_MAX;
1700                                                         float4 posA = newPosA;
1701                                                         posA.w = 0.f;
1702                                                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1703                                                         posB.w = 0.f;
1704                                                         float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1705                                                         b3Quat ornA = newOrnA;
1706                                                         float4 c0;
1707                                                         c0 = transform(&c0local, &posA, &ornA);
1708                                                         float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1709                                                         b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1710                                                         float4 c1;
1711                                                         c1 = transform(&c1local, &posB, &ornB);
1712                                                         //      const float4 DeltaC2 = c0 - c1;
1713
1714                                                         {
1715                                                                 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1716                                                                 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1717                                                                 {
1718                                                                         gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, -1);
1719                                                                 }  //if (compoundPairIdx<maxNumCompoundPairsCapacity)
1720                                                         }      //
1721                                                 }          //fi (1)
1722                                         }              //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1723                                 }                  //for (int b=0;b<numChildrenB;b++)
1724                                 return;
1725                         }  //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1726                         if ((collidables[collidableIndexA].m_shapeType != SHAPE_CONCAVE_TRIMESH) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1727                         {
1728                                 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1729                                 for (int b = 0; b < numChildrenB; b++)
1730                                 {
1731                                         int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;
1732                                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1733                                         b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1734                                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1735                                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1736                                         b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1737                                         float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1738                                         b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1739
1740                                         int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1741
1742                                         //////////////////////////////////////
1743
1744                                         if (1)
1745                                         {
1746                                                 //      int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1747                                                 //      float dmin = FLT_MAX;
1748                                                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1749                                                 posA.w = 0.f;
1750                                                 float4 posB = newPosB;
1751                                                 posB.w = 0.f;
1752                                                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1753                                                 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1754                                                 float4 c0;
1755                                                 c0 = transform(&c0local, &posA, &ornA);
1756                                                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1757                                                 b3Quat ornB = newOrnB;
1758                                                 float4 c1;
1759                                                 c1 = transform(&c1local, &posB, &ornB);
1760                                                 //      const float4 DeltaC2 = c0 - c1;
1761                                                 {  //
1762                                                         int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1763                                                         if (compoundPairIdx < maxNumCompoundPairsCapacity)
1764                                                         {
1765                                                                 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, -1, childShapeIndexB);
1766                                                         }  //fi (compoundPairIdx<maxNumCompoundPairsCapacity)
1767                                                 }      //
1768                                         }          //fi (1)
1769                                 }              //for (int b=0;b<numChildrenB;b++)
1770                                 return;
1771                         }  //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1772                         return;
1773                 }  //fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1774         }      //i<numPairs
1775 }
1776
1777 __kernel void processCompoundPairsKernel(__global const b3Int4* gpuCompoundPairs,
1778                                                                                  __global const b3RigidBodyData* rigidBodies,
1779                                                                                  __global const b3Collidable* collidables,
1780                                                                                  __global const b3ConvexPolyhedronData* convexShapes,
1781                                                                                  __global const b3AlignedObjectArray<b3Float4>& vertices,
1782                                                                                  __global const b3AlignedObjectArray<b3Float4>& uniqueEdges,
1783                                                                                  __global const b3AlignedObjectArray<b3GpuFace>& faces,
1784                                                                                  __global const b3AlignedObjectArray<int>& indices,
1785                                                                                  __global b3Aabb* aabbs,
1786                                                                                  __global const b3GpuChildShape* gpuChildShapes,
1787                                                                                  __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut,
1788                                                                                  __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut,
1789                                                                                  int numCompoundPairs,
1790                                                                                  int i)
1791 {
1792         //      int i = get_global_id(0);
1793         if (i < numCompoundPairs)
1794         {
1795                 int bodyIndexA = gpuCompoundPairs[i].x;
1796                 int bodyIndexB = gpuCompoundPairs[i].y;
1797
1798                 int childShapeIndexA = gpuCompoundPairs[i].z;
1799                 int childShapeIndexB = gpuCompoundPairs[i].w;
1800
1801                 int collidableIndexA = -1;
1802                 int collidableIndexB = -1;
1803
1804                 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1805                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1806
1807                 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1808                 float4 posB = rigidBodies[bodyIndexB].m_pos;
1809
1810                 if (childShapeIndexA >= 0)
1811                 {
1812                         collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1813                         float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1814                         b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1815                         float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1816                         b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1817                         posA = newPosA;
1818                         ornA = newOrnA;
1819                 }
1820                 else
1821                 {
1822                         collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1823                 }
1824
1825                 if (childShapeIndexB >= 0)
1826                 {
1827                         collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1828                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1829                         b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1830                         float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1831                         b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1832                         posB = newPosB;
1833                         ornB = newOrnB;
1834                 }
1835                 else
1836                 {
1837                         collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1838                 }
1839
1840                 gpuHasCompoundSepNormalsOut[i] = 0;
1841
1842                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1843                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1844
1845                 int shapeTypeA = collidables[collidableIndexA].m_shapeType;
1846                 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
1847
1848                 if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))
1849                 {
1850                         return;
1851                 }
1852
1853                 int hasSeparatingAxis = 5;
1854
1855                 //      int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1856                 float dmin = FLT_MAX;
1857                 posA.w = 0.f;
1858                 posB.w = 0.f;
1859                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1860                 float4 c0 = transform(&c0local, &posA, &ornA);
1861                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1862                 float4 c1 = transform(&c1local, &posB, &ornB);
1863                 const float4 DeltaC2 = c0 - c1;
1864                 float4 sepNormal = make_float4(1, 0, 0, 0);
1865                 //              bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
1866                 bool sepA = findSeparatingAxis(convexShapes[shapeIndexA], convexShapes[shapeIndexB], posA, ornA, posB, ornB, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal);  //,&dmin);
1867
1868                 hasSeparatingAxis = 4;
1869                 if (!sepA)
1870                 {
1871                         hasSeparatingAxis = 0;
1872                 }
1873                 else
1874                 {
1875                         bool sepB = findSeparatingAxis(convexShapes[shapeIndexB], convexShapes[shapeIndexA], posB, ornB, posA, ornA, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal);  //,&dmin);
1876
1877                         if (!sepB)
1878                         {
1879                                 hasSeparatingAxis = 0;
1880                         }
1881                         else  //(!sepB)
1882                         {
1883                                 bool sepEE = findSeparatingAxisEdgeEdge(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB], posA, ornA, posB, ornB, DeltaC2, vertices, uniqueEdges, faces, indices, &sepNormal, &dmin);
1884                                 if (sepEE)
1885                                 {
1886                                         gpuCompoundSepNormalsOut[i] = sepNormal;  //fastNormalize4(sepNormal);
1887                                         gpuHasCompoundSepNormalsOut[i] = 1;
1888                                 }  //sepEE
1889                         }      //(!sepB)
1890                 }          //(!sepA)
1891         }
1892 }
1893
1894 __kernel void clipCompoundsHullHullKernel(__global const b3Int4* gpuCompoundPairs,
1895                                                                                   __global const b3RigidBodyData* rigidBodies,
1896                                                                                   __global const b3Collidable* collidables,
1897                                                                                   __global const b3ConvexPolyhedronData* convexShapes,
1898                                                                                   __global const b3AlignedObjectArray<b3Float4>& vertices,
1899                                                                                   __global const b3AlignedObjectArray<b3Float4>& uniqueEdges,
1900                                                                                   __global const b3AlignedObjectArray<b3GpuFace>& faces,
1901                                                                                   __global const b3AlignedObjectArray<int>& indices,
1902                                                                                   __global const b3GpuChildShape* gpuChildShapes,
1903                                                                                   __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut,
1904                                                                                   __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut,
1905                                                                                   __global struct b3Contact4Data* globalContactsOut,
1906                                                                                   int* nGlobalContactsOut,
1907                                                                                   int numCompoundPairs, int maxContactCapacity, int i)
1908 {
1909         //      int i = get_global_id(0);
1910         int pairIndex = i;
1911
1912         float4 worldVertsB1[64];
1913         float4 worldVertsB2[64];
1914         int capacityWorldVerts = 64;
1915
1916         float4 localContactsOut[64];
1917         int localContactCapacity = 64;
1918
1919         float minDist = -1e30f;
1920         float maxDist = 0.0f;
1921
1922         if (i < numCompoundPairs)
1923         {
1924                 if (gpuHasCompoundSepNormalsOut[i])
1925                 {
1926                         int bodyIndexA = gpuCompoundPairs[i].x;
1927                         int bodyIndexB = gpuCompoundPairs[i].y;
1928
1929                         int childShapeIndexA = gpuCompoundPairs[i].z;
1930                         int childShapeIndexB = gpuCompoundPairs[i].w;
1931
1932                         int collidableIndexA = -1;
1933                         int collidableIndexB = -1;
1934
1935                         b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1936                         float4 posA = rigidBodies[bodyIndexA].m_pos;
1937
1938                         b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1939                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1940
1941                         if (childShapeIndexA >= 0)
1942                         {
1943                                 collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1944                                 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1945                                 b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1946                                 float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1947                                 b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1948                                 posA = newPosA;
1949                                 ornA = newOrnA;
1950                         }
1951                         else
1952                         {
1953                                 collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1954                         }
1955
1956                         if (childShapeIndexB >= 0)
1957                         {
1958                                 collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1959                                 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1960                                 b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1961                                 float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1962                                 b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1963                                 posB = newPosB;
1964                                 ornB = newOrnB;
1965                         }
1966                         else
1967                         {
1968                                 collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1969                         }
1970
1971                         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1972                         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1973
1974                         int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],
1975                                                                                                                   convexShapes[shapeIndexA], convexShapes[shapeIndexB],
1976                                                                                                                   posA, ornA,
1977                                                                                                                   posB, ornB,
1978                                                                                                                   worldVertsB1, worldVertsB2, capacityWorldVerts,
1979                                                                                                                   minDist, maxDist,
1980                                                                                                                   vertices, faces, indices,
1981                                                                                                                   vertices, faces, indices,
1982                                                                                                                   localContactsOut, localContactCapacity);
1983
1984                         if (numLocalContactsOut > 0)
1985                         {
1986                                 float4 normal = -gpuCompoundSepNormalsOut[i];
1987                                 int nPoints = numLocalContactsOut;
1988                                 float4* pointsIn = localContactsOut;
1989                                 b3Int4 contactIdx;  // = {-1,-1,-1,-1};
1990
1991                                 contactIdx.s[0] = 0;
1992                                 contactIdx.s[1] = 1;
1993                                 contactIdx.s[2] = 2;
1994                                 contactIdx.s[3] = 3;
1995
1996                                 int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
1997
1998                                 int dstIdx;
1999                                 dstIdx = b3AtomicInc(nGlobalContactsOut);
2000                                 if ((dstIdx + nReducedContacts) < maxContactCapacity)
2001                                 {
2002                                         __global struct b3Contact4Data* c = globalContactsOut + dstIdx;
2003                                         c->m_worldNormalOnB = -normal;
2004                                         c->m_restituitionCoeffCmp = (0.f * 0xffff);
2005                                         c->m_frictionCoeffCmp = (0.7f * 0xffff);
2006                                         c->m_batchIdx = pairIndex;
2007                                         int bodyA = gpuCompoundPairs[pairIndex].x;
2008                                         int bodyB = gpuCompoundPairs[pairIndex].y;
2009                                         c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA;
2010                                         c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB;
2011                                         c->m_childIndexA = childShapeIndexA;
2012                                         c->m_childIndexB = childShapeIndexB;
2013                                         for (int i = 0; i < nReducedContacts; i++)
2014                                         {
2015                                                 c->m_worldPosB[i] = pointsIn[contactIdx.s[i]];
2016                                         }
2017                                         b3Contact4Data_setNumPoints(c, nReducedContacts);
2018                                 }
2019
2020                         }  //           if (numContactsOut>0)
2021                 }      //               if (gpuHasCompoundSepNormalsOut[i])
2022         }          //   if (i<numCompoundPairs)
2023 }
2024
2025 void computeContactCompoundCompound(int pairIndex,
2026                                                                         int bodyIndexA, int bodyIndexB,
2027                                                                         int collidableIndexA, int collidableIndexB,
2028                                                                         const b3RigidBodyData* rigidBodies,
2029                                                                         const b3Collidable* collidables,
2030                                                                         const b3ConvexPolyhedronData* convexShapes,
2031                                                                         const b3GpuChildShape* cpuChildShapes,
2032                                                                         const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace,
2033                                                                         const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace,
2034
2035                                                                         const b3AlignedObjectArray<b3Vector3>& convexVertices,
2036                                                                         const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges,
2037                                                                         const b3AlignedObjectArray<int>& convexIndices,
2038                                                                         const b3AlignedObjectArray<b3GpuFace>& faces,
2039
2040                                                                         b3Contact4* globalContactsOut,
2041                                                                         int& nGlobalContactsOut,
2042                                                                         int maxContactCapacity,
2043                                                                         b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
2044                                                                         b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
2045                                                                         b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)
2046 {
2047         int shapeTypeB = collidables[collidableIndexB].m_shapeType;
2048         b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
2049
2050         b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut;
2051         int numCompoundPairsOut = 0;
2052         int maxNumCompoundPairsCapacity = 8192;  //1024;
2053         cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity);
2054
2055         // work-in-progress
2056         findCompoundPairsKernel(
2057                 pairIndex,
2058                 bodyIndexA, bodyIndexB,
2059                 collidableIndexA, collidableIndexB,
2060                 rigidBodies,
2061                 collidables,
2062                 convexShapes,
2063                 convexVertices,
2064                 hostAabbsWorldSpace,
2065                 hostAabbsLocalSpace,
2066                 cpuChildShapes,
2067                 &cpuCompoundPairsOut[0],
2068                 &numCompoundPairsOut,
2069                 maxNumCompoundPairsCapacity,
2070                 treeNodesCPU,
2071                 subTreesCPU,
2072                 bvhInfoCPU);
2073
2074         printf("maxNumAabbChecks=%d\n", maxNumAabbChecks);
2075         if (numCompoundPairsOut > maxNumCompoundPairsCapacity)
2076         {
2077                 b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n", maxNumCompoundPairsCapacity);
2078                 numCompoundPairsOut = maxNumCompoundPairsCapacity;
2079         }
2080         b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut;
2081         b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut;
2082         cpuCompoundSepNormalsOut.resize(numCompoundPairsOut);
2083         cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut);
2084
2085         for (int i = 0; i < numCompoundPairsOut; i++)
2086         {
2087                 processCompoundPairsKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, 0, cpuChildShapes,
2088                                                                    cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, numCompoundPairsOut, i);
2089         }
2090
2091         for (int i = 0; i < numCompoundPairsOut; i++)
2092         {
2093                 clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, cpuChildShapes,
2094                                                                         cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, globalContactsOut, &nGlobalContactsOut, numCompoundPairsOut, maxContactCapacity, i);
2095         }
2096         /*
2097                 int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
2098
2099                                         float4 posA = rigidBodies[bodyIndexA].m_pos;
2100                                         b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
2101                                         float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
2102                                         b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
2103                                         float4 newPosA = b3QuatRotate(ornA,childPosA)+posA;
2104                                         b3Quat newOrnA = b3QuatMul(ornA,childOrnA);
2105
2106                                         int shapeIndexA = collidables[childColIndexA].m_shapeIndex;
2107
2108
2109                         bool foundSepAxis = findSeparatingAxis(hullA,hullB,
2110                                                         posA,
2111                                                         ornA,
2112                                                         posB,
2113                                                         ornB,
2114
2115                                                         convexVertices,uniqueEdges,faces,convexIndices,
2116                                                         convexVertices,uniqueEdges,faces,convexIndices,
2117                                                         
2118                                                         sepNormalWorldSpace
2119                                                         );
2120                                                         */
2121
2122         /*
2123         if (foundSepAxis)
2124         {
2125                 
2126                 
2127                 contactIndex = clipHullHullSingle(
2128                         bodyIndexA, bodyIndexB,
2129                                                    posA,ornA,
2130                                                    posB,ornB,
2131                         collidableIndexA, collidableIndexB,
2132                         &rigidBodies, 
2133                         &globalContactsOut,
2134                         nGlobalContactsOut,
2135                         
2136                         convexShapes,
2137                         convexShapes,
2138         
2139                         convexVertices, 
2140                         uniqueEdges, 
2141                         faces,
2142                         convexIndices,
2143         
2144                         convexVertices,
2145                         uniqueEdges,
2146                         faces,
2147                         convexIndices,
2148
2149                         collidables,
2150                         collidables,
2151                         sepNormalWorldSpace,
2152                         maxContactCapacity);
2153                         
2154         }
2155         */
2156
2157         //      return contactIndex;
2158
2159         /*
2160
2161         int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
2162         for (int c=0;c<numChildrenB;c++)
2163         {
2164                 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c;
2165                 int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex;
2166
2167                 float4 rootPosB = rigidBodies[bodyIndexB].m_pos;
2168                 b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat;
2169                 b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition;
2170                 b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation;
2171                 float4  posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB;
2172                 b3Quaternion ornB = b3QuatMul(rootOrnB,childOrnB);//b3QuatMul(ornB,childOrnB);
2173
2174                 int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
2175
2176                 const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB];
2177
2178         }
2179         */
2180 }
2181
2182 void computeContactPlaneCompound(int pairIndex,
2183                                                                  int bodyIndexA, int bodyIndexB,
2184                                                                  int collidableIndexA, int collidableIndexB,
2185                                                                  const b3RigidBodyData* rigidBodies,
2186                                                                  const b3Collidable* collidables,
2187                                                                  const b3ConvexPolyhedronData* convexShapes,
2188                                                                  const b3GpuChildShape* cpuChildShapes,
2189                                                                  const b3Vector3* convexVertices,
2190                                                                  const int* convexIndices,
2191                                                                  const b3GpuFace* faces,
2192
2193                                                                  b3Contact4* globalContactsOut,
2194                                                                  int& nGlobalContactsOut,
2195                                                                  int maxContactCapacity)
2196 {
2197         int shapeTypeB = collidables[collidableIndexB].m_shapeType;
2198         b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
2199
2200         int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
2201         for (int c = 0; c < numChildrenB; c++)
2202         {
2203                 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + c;
2204                 int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex;
2205
2206                 float4 rootPosB = rigidBodies[bodyIndexB].m_pos;
2207                 b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat;
2208                 b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition;
2209                 b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation;
2210                 float4 posB = b3QuatRotate(rootOrnB, childPosB) + rootPosB;
2211                 b3Quaternion ornB = rootOrnB * childOrnB;  //b3QuatMul(ornB,childOrnB);
2212
2213                 int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
2214
2215                 const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB];
2216
2217                 b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
2218                 b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
2219
2220                 //      int numContactsOut = 0;
2221                 //      int numWorldVertsB1= 0;
2222
2223                 b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;
2224                 b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z);
2225                 b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);
2226                 float planeConstant = planeEq.w;
2227                 b3Transform convexWorldTransform;
2228                 convexWorldTransform.setIdentity();
2229                 convexWorldTransform.setOrigin(posB);
2230                 convexWorldTransform.setRotation(ornB);
2231                 b3Transform planeTransform;
2232                 planeTransform.setIdentity();
2233                 planeTransform.setOrigin(posA);
2234                 planeTransform.setRotation(ornA);
2235
2236                 b3Transform planeInConvex;
2237                 planeInConvex = convexWorldTransform.inverse() * planeTransform;
2238                 b3Transform convexInPlane;
2239                 convexInPlane = planeTransform.inverse() * convexWorldTransform;
2240
2241                 b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;
2242                 float maxDot = -1e30;
2243                 int hitVertex = -1;
2244                 b3Vector3 hitVtx;
2245
2246 #define MAX_PLANE_CONVEX_POINTS 64
2247
2248                 b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
2249                 int numPoints = 0;
2250
2251                 b3Int4 contactIdx;
2252                 contactIdx.s[0] = 0;
2253                 contactIdx.s[1] = 1;
2254                 contactIdx.s[2] = 2;
2255                 contactIdx.s[3] = 3;
2256
2257                 for (int i = 0; i < hullB->m_numVertices; i++)
2258                 {
2259                         b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];
2260                         float curDot = vtx.dot(planeNormalInConvex);
2261
2262                         if (curDot > maxDot)
2263                         {
2264                                 hitVertex = i;
2265                                 maxDot = curDot;
2266                                 hitVtx = vtx;
2267                                 //make sure the deepest points is always included
2268                                 if (numPoints == MAX_PLANE_CONVEX_POINTS)
2269                                         numPoints--;
2270                         }
2271
2272                         if (numPoints < MAX_PLANE_CONVEX_POINTS)
2273                         {
2274                                 b3Vector3 vtxWorld = convexWorldTransform * vtx;
2275                                 b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld;
2276                                 float dist = planeNormal.dot(vtxInPlane) - planeConstant;
2277                                 if (dist < 0.f)
2278                                 {
2279                                         vtxWorld.w = dist;
2280                                         contactPoints[numPoints] = vtxWorld;
2281                                         numPoints++;
2282                                 }
2283                         }
2284                 }
2285
2286                 int numReducedPoints = 0;
2287
2288                 numReducedPoints = numPoints;
2289
2290                 if (numPoints > 4)
2291                 {
2292                         numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);
2293                 }
2294                 int dstIdx;
2295                 //    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
2296
2297                 if (numReducedPoints > 0)
2298                 {
2299                         if (nGlobalContactsOut < maxContactCapacity)
2300                         {
2301                                 dstIdx = nGlobalContactsOut;
2302                                 nGlobalContactsOut++;
2303
2304                                 b3Contact4* c = &globalContactsOut[dstIdx];
2305                                 c->m_worldNormalOnB = -planeNormalWorld;
2306                                 c->setFrictionCoeff(0.7);
2307                                 c->setRestituitionCoeff(0.f);
2308
2309                                 c->m_batchIdx = pairIndex;
2310                                 c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
2311                                 c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
2312                                 for (int i = 0; i < numReducedPoints; i++)
2313                                 {
2314                                         b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];
2315                                         c->m_worldPosB[i] = pOnB1;
2316                                 }
2317                                 c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints;
2318                         }  //if (dstIdx < numPairs)
2319                 }
2320         }
2321 }
2322
2323 void computeContactSphereConvex(int pairIndex,
2324                                                                 int bodyIndexA, int bodyIndexB,
2325                                                                 int collidableIndexA, int collidableIndexB,
2326                                                                 const b3RigidBodyData* rigidBodies,
2327                                                                 const b3Collidable* collidables,
2328                                                                 const b3ConvexPolyhedronData* convexShapes,
2329                                                                 const b3Vector3* convexVertices,
2330                                                                 const int* convexIndices,
2331                                                                 const b3GpuFace* faces,
2332                                                                 b3Contact4* globalContactsOut,
2333                                                                 int& nGlobalContactsOut,
2334                                                                 int maxContactCapacity)
2335 {
2336         float radius = collidables[collidableIndexA].m_radius;
2337         float4 spherePos1 = rigidBodies[bodyIndexA].m_pos;
2338         b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat;
2339
2340         float4 pos = rigidBodies[bodyIndexB].m_pos;
2341
2342         b3Quaternion quat = rigidBodies[bodyIndexB].m_quat;
2343
2344         b3Transform tr;
2345         tr.setIdentity();
2346         tr.setOrigin(pos);
2347         tr.setRotation(quat);
2348         b3Transform trInv = tr.inverse();
2349
2350         float4 spherePos = trInv(spherePos1);
2351
2352         int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx;
2353         int shapeIndex = collidables[collidableIndex].m_shapeIndex;
2354         int numFaces = convexShapes[shapeIndex].m_numFaces;
2355         float4 closestPnt = b3MakeVector3(0, 0, 0, 0);
2356         //      float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0);
2357         float minDist = -1000000.f;  // TODO: What is the largest/smallest float?
2358         bool bCollide = true;
2359         int region = -1;
2360         float4 localHitNormal;
2361         for (int f = 0; f < numFaces; f++)
2362         {
2363                 b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f];
2364                 float4 planeEqn;
2365                 float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f);
2366                 float4 n1 = localPlaneNormal;  //quatRotate(quat,localPlaneNormal);
2367                 planeEqn = n1;
2368                 planeEqn[3] = face.m_plane.w;
2369
2370                 float4 pntReturn;
2371                 float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);
2372
2373                 if (dist > radius)
2374                 {
2375                         bCollide = false;
2376                         break;
2377                 }
2378
2379                 if (dist > 0)
2380                 {
2381                         //might hit an edge or vertex
2382                         b3Vector3 out;
2383
2384                         bool isInPoly = IsPointInPolygon(spherePos,
2385                                                                                          &face,
2386                                                                                          &convexVertices[convexShapes[shapeIndex].m_vertexOffset],
2387                                                                                          convexIndices,
2388                                                                                          &out);
2389                         if (isInPoly)
2390                         {
2391                                 if (dist > minDist)
2392                                 {
2393                                         minDist = dist;
2394                                         closestPnt = pntReturn;
2395                                         localHitNormal = planeEqn;
2396                                         region = 1;
2397                                 }
2398                         }
2399                         else
2400                         {
2401                                 b3Vector3 tmp = spherePos - out;
2402                                 b3Scalar l2 = tmp.length2();
2403                                 if (l2 < radius * radius)
2404                                 {
2405                                         dist = b3Sqrt(l2);
2406                                         if (dist > minDist)
2407                                         {
2408                                                 minDist = dist;
2409                                                 closestPnt = out;
2410                                                 localHitNormal = tmp / dist;
2411                                                 region = 2;
2412                                         }
2413                                 }
2414                                 else
2415                                 {
2416                                         bCollide = false;
2417                                         break;
2418                                 }
2419                         }
2420                 }
2421                 else
2422                 {
2423                         if (dist > minDist)
2424                         {
2425                                 minDist = dist;
2426                                 closestPnt = pntReturn;
2427                                 localHitNormal = planeEqn;
2428                                 region = 3;
2429                         }
2430                 }
2431         }
2432         static int numChecks = 0;
2433         numChecks++;
2434
2435         if (bCollide && minDist > -10000)
2436         {
2437                 float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal;  //-hitNormalWorld;
2438                 float4 pOnB1 = tr(closestPnt);
2439                 //printf("dist ,%f,",minDist);
2440                 float actualDepth = minDist - radius;
2441                 if (actualDepth < 0)
2442                 {
2443                         //printf("actualDepth = ,%f,", actualDepth);
2444                         //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z);
2445                         //printf("region=,%d,\n", region);
2446                         pOnB1[3] = actualDepth;
2447
2448                         int dstIdx;
2449                         //    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
2450
2451                         if (nGlobalContactsOut < maxContactCapacity)
2452                         {
2453                                 dstIdx = nGlobalContactsOut;
2454                                 nGlobalContactsOut++;
2455
2456                                 b3Contact4* c = &globalContactsOut[dstIdx];
2457                                 c->m_worldNormalOnB = normalOnSurfaceB1;
2458                                 c->setFrictionCoeff(0.7);
2459                                 c->setRestituitionCoeff(0.f);
2460
2461                                 c->m_batchIdx = pairIndex;
2462                                 c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
2463                                 c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
2464                                 c->m_worldPosB[0] = pOnB1;
2465                                 int numPoints = 1;
2466                                 c->m_worldNormalOnB.w = (b3Scalar)numPoints;
2467                         }  //if (dstIdx < numPairs)
2468                 }
2469         }  //if (hasCollision)
2470 }
2471
2472 int computeContactConvexConvex2(
2473         int pairIndex,
2474         int bodyIndexA, int bodyIndexB,
2475         int collidableIndexA, int collidableIndexB,
2476         const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies,
2477         const b3AlignedObjectArray<b3Collidable>& collidables,
2478         const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes,
2479         const b3AlignedObjectArray<b3Vector3>& convexVertices,
2480         const b3AlignedObjectArray<b3Vector3>& uniqueEdges,
2481         const b3AlignedObjectArray<int>& convexIndices,
2482         const b3AlignedObjectArray<b3GpuFace>& faces,
2483         b3AlignedObjectArray<b3Contact4>& globalContactsOut,
2484         int& nGlobalContactsOut,
2485         int maxContactCapacity,
2486         const b3AlignedObjectArray<b3Contact4>& oldContacts)
2487 {
2488         int contactIndex = -1;
2489         b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
2490         b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
2491         b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;
2492         b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
2493
2494         b3ConvexPolyhedronData hullA, hullB;
2495
2496         b3Vector3 sepNormalWorldSpace;
2497
2498         b3Collidable colA = collidables[collidableIndexA];
2499         hullA = convexShapes[colA.m_shapeIndex];
2500         //printf("numvertsA = %d\n",hullA.m_numVertices);
2501
2502         b3Collidable colB = collidables[collidableIndexB];
2503         hullB = convexShapes[colB.m_shapeIndex];
2504         //printf("numvertsB = %d\n",hullB.m_numVertices);
2505
2506         //      int contactCapacity = MAX_VERTS;
2507         //int numContactsOut=0;
2508
2509 #ifdef _WIN32
2510         b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x));
2511         b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x));
2512 #endif
2513
2514         bool foundSepAxis = findSeparatingAxis(hullA, hullB,
2515                                                                                    posA,
2516                                                                                    ornA,
2517                                                                                    posB,
2518                                                                                    ornB,
2519
2520                                                                                    convexVertices, uniqueEdges, faces, convexIndices,
2521                                                                                    convexVertices, uniqueEdges, faces, convexIndices,
2522
2523                                                                                    sepNormalWorldSpace);
2524
2525         if (foundSepAxis)
2526         {
2527                 contactIndex = clipHullHullSingle(
2528                         bodyIndexA, bodyIndexB,
2529                         posA, ornA,
2530                         posB, ornB,
2531                         collidableIndexA, collidableIndexB,
2532                         &rigidBodies,
2533                         &globalContactsOut,
2534                         nGlobalContactsOut,
2535
2536                         convexShapes,
2537                         convexShapes,
2538
2539                         convexVertices,
2540                         uniqueEdges,
2541                         faces,
2542                         convexIndices,
2543
2544                         convexVertices,
2545                         uniqueEdges,
2546                         faces,
2547                         convexIndices,
2548
2549                         collidables,
2550                         collidables,
2551                         sepNormalWorldSpace,
2552                         maxContactCapacity);
2553         }
2554
2555         return contactIndex;
2556 }
2557
2558 void GpuSatCollision::computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4>* pairs, int nPairs,
2559                                                                                                                 const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
2560                                                                                                                 b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
2561                                                                                                                 const b3OpenCLArray<b3Contact4>* oldContacts,
2562                                                                                                                 int maxContactCapacity,
2563                                                                                                                 int compoundPairCapacity,
2564                                                                                                                 const b3OpenCLArray<b3ConvexPolyhedronData>& convexData,
2565                                                                                                                 const b3OpenCLArray<b3Vector3>& gpuVertices,
2566                                                                                                                 const b3OpenCLArray<b3Vector3>& gpuUniqueEdges,
2567                                                                                                                 const b3OpenCLArray<b3GpuFace>& gpuFaces,
2568                                                                                                                 const b3OpenCLArray<int>& gpuIndices,
2569                                                                                                                 const b3OpenCLArray<b3Collidable>& gpuCollidables,
2570                                                                                                                 const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
2571
2572                                                                                                                 const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace,
2573                                                                                                                 const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace,
2574
2575                                                                                                                 b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
2576                                                                                                                 b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
2577                                                                                                                 b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
2578                                                                                                                 b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
2579                                                                                                                 b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
2580                                                                                                                 b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused,
2581                                                                                                                 b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU,
2582                                                                                                                 b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU,
2583                                                                                                                 b3OpenCLArray<b3BvhInfo>* bvhInfo,
2584
2585                                                                                                                 int numObjects,
2586                                                                                                                 int maxTriConvexPairCapacity,
2587                                                                                                                 b3OpenCLArray<b3Int4>& triangleConvexPairsOut,
2588                                                                                                                 int& numTriConvexPairsOut)
2589 {
2590         myframecount++;
2591
2592         if (!nPairs)
2593                 return;
2594
2595 #ifdef CHECK_ON_HOST
2596
2597         b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
2598         treeNodesGPU->copyToHost(treeNodesCPU);
2599
2600         b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
2601         subTreesGPU->copyToHost(subTreesCPU);
2602
2603         b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
2604         bvhInfo->copyToHost(bvhInfoCPU);
2605
2606         b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
2607         clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
2608
2609         b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace;
2610         clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace);
2611
2612         b3AlignedObjectArray<b3Int4> hostPairs;
2613         pairs->copyToHost(hostPairs);
2614
2615         b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
2616         bodyBuf->copyToHost(hostBodyBuf);
2617
2618         b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
2619         convexData.copyToHost(hostConvexData);
2620
2621         b3AlignedObjectArray<b3Vector3> hostVertices;
2622         gpuVertices.copyToHost(hostVertices);
2623
2624         b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
2625         gpuUniqueEdges.copyToHost(hostUniqueEdges);
2626         b3AlignedObjectArray<b3GpuFace> hostFaces;
2627         gpuFaces.copyToHost(hostFaces);
2628         b3AlignedObjectArray<int> hostIndices;
2629         gpuIndices.copyToHost(hostIndices);
2630         b3AlignedObjectArray<b3Collidable> hostCollidables;
2631         gpuCollidables.copyToHost(hostCollidables);
2632
2633         b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
2634         gpuChildShapes.copyToHost(cpuChildShapes);
2635
2636         b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs;
2637
2638         b3AlignedObjectArray<b3Contact4> hostContacts;
2639         if (nContacts)
2640         {
2641                 contactOut->copyToHost(hostContacts);
2642         }
2643
2644         b3AlignedObjectArray<b3Contact4> oldHostContacts;
2645
2646         if (oldContacts->size())
2647         {
2648                 oldContacts->copyToHost(oldHostContacts);
2649         }
2650
2651         hostContacts.resize(maxContactCapacity);
2652
2653         for (int i = 0; i < nPairs; i++)
2654         {
2655                 int bodyIndexA = hostPairs[i].x;
2656                 int bodyIndexB = hostPairs[i].y;
2657                 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
2658                 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
2659
2660                 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&
2661                         hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2662                 {
2663                         computeContactSphereConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2664                                                                            &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2665                 }
2666
2667                 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2668                         hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)
2669                 {
2670                         computeContactSphereConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2671                                                                            &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2672                         //printf("convex-sphere\n");
2673                 }
2674
2675                 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2676                         hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)
2677                 {
2678                         computeContactPlaneConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2679                                                                           &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2680                         //                      printf("convex-plane\n");
2681                 }
2682
2683                 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&
2684                         hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2685                 {
2686                         computeContactPlaneConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2687                                                                           &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2688                         //                      printf("plane-convex\n");
2689                 }
2690
2691                 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
2692                         hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
2693                 {
2694                         computeContactCompoundCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2695                                                                                    &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], hostAabbsWorldSpace, hostAabbsLocalSpace, hostVertices, hostUniqueEdges, hostIndices, hostFaces, &hostContacts[0],
2696                                                                                    nContacts, maxContactCapacity, treeNodesCPU, subTreesCPU, bvhInfoCPU);
2697                         //                      printf("convex-plane\n");
2698                 }
2699
2700                 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
2701                         hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)
2702                 {
2703                         computeContactPlaneCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2704                                                                                 &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2705                         //                      printf("convex-plane\n");
2706                 }
2707
2708                 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&
2709                         hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
2710                 {
2711                         computeContactPlaneCompound(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2712                                                                                 &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2713                         //                      printf("plane-convex\n");
2714                 }
2715
2716                 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2717                         hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2718                 {
2719                         //printf("hostPairs[i].z=%d\n",hostPairs[i].z);
2720                         int contactIndex = computeContactConvexConvex2(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, hostBodyBuf, hostCollidables, hostConvexData, hostVertices, hostUniqueEdges, hostIndices, hostFaces, hostContacts, nContacts, maxContactCapacity, oldHostContacts);
2721                         //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
2722
2723                         if (contactIndex >= 0)
2724                         {
2725                                 //                              printf("convex convex contactIndex = %d\n",contactIndex);
2726                                 hostPairs[i].z = contactIndex;
2727                         }
2728                         //                      printf("plane-convex\n");
2729                 }
2730         }
2731
2732         if (hostPairs.size())
2733         {
2734                 pairs->copyFromHost(hostPairs);
2735         }
2736
2737         hostContacts.resize(nContacts);
2738         if (nContacts)
2739         {
2740                 contactOut->copyFromHost(hostContacts);
2741         }
2742         else
2743         {
2744                 contactOut->resize(0);
2745         }
2746
2747         m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
2748         //printf("(HOST) nContacts = %d\n",nContacts);
2749
2750 #else
2751
2752         {
2753                 if (nPairs)
2754                 {
2755                         m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
2756
2757                         B3_PROFILE("primitiveContactsKernel");
2758                         b3BufferInfoCL bInfo[] = {
2759                                 b3BufferInfoCL(pairs->getBufferCL(), true),
2760                                 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2761                                 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2762                                 b3BufferInfoCL(convexData.getBufferCL(), true),
2763                                 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2764                                 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2765                                 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2766                                 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2767                                 b3BufferInfoCL(contactOut->getBufferCL()),
2768                                 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
2769
2770                         b3LauncherCL launcher(m_queue, m_primitiveContactsKernel, "m_primitiveContactsKernel");
2771                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2772                         launcher.setConst(nPairs);
2773                         launcher.setConst(maxContactCapacity);
2774                         int num = nPairs;
2775                         launcher.launch1D(num);
2776                         clFinish(m_queue);
2777
2778                         nContacts = m_totalContactsOut.at(0);
2779                         contactOut->resize(nContacts);
2780                 }
2781         }
2782
2783 #endif  //CHECK_ON_HOST
2784
2785         B3_PROFILE("computeConvexConvexContactsGPUSAT");
2786         // printf("nContacts = %d\n",nContacts);
2787
2788         m_sepNormals.resize(nPairs);
2789         m_hasSeparatingNormals.resize(nPairs);
2790
2791         int concaveCapacity = maxTriConvexPairCapacity;
2792         m_concaveSepNormals.resize(concaveCapacity);
2793         m_concaveHasSeparatingNormals.resize(concaveCapacity);
2794         m_numConcavePairsOut.resize(0);
2795         m_numConcavePairsOut.push_back(0);
2796
2797         m_gpuCompoundPairs.resize(compoundPairCapacity);
2798
2799         m_gpuCompoundSepNormals.resize(compoundPairCapacity);
2800
2801         m_gpuHasCompoundSepNormals.resize(compoundPairCapacity);
2802
2803         m_numCompoundPairsOut.resize(0);
2804         m_numCompoundPairsOut.push_back(0);
2805
2806         int numCompoundPairs = 0;
2807
2808         int numConcavePairs = 0;
2809
2810         {
2811                 clFinish(m_queue);
2812                 if (findSeparatingAxisOnGpu)
2813                 {
2814                         m_dmins.resize(nPairs);
2815                         if (splitSearchSepAxisConvex)
2816                         {
2817                                 if (useMprGpu)
2818                                 {
2819                                         nContacts = m_totalContactsOut.at(0);
2820                                         {
2821                                                 B3_PROFILE("mprPenetrationKernel");
2822                                                 b3BufferInfoCL bInfo[] = {
2823                                                         b3BufferInfoCL(pairs->getBufferCL(), true),
2824                                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2825                                                         b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2826                                                         b3BufferInfoCL(convexData.getBufferCL(), true),
2827                                                         b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2828                                                         b3BufferInfoCL(m_sepNormals.getBufferCL()),
2829                                                         b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2830                                                         b3BufferInfoCL(contactOut->getBufferCL()),
2831                                                         b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
2832
2833                                                 b3LauncherCL launcher(m_queue, m_mprPenetrationKernel, "mprPenetrationKernel");
2834                                                 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2835
2836                                                 launcher.setConst(maxContactCapacity);
2837                                                 launcher.setConst(nPairs);
2838
2839                                                 int num = nPairs;
2840                                                 launcher.launch1D(num);
2841                                                 clFinish(m_queue);
2842                                                 /*
2843                                                 b3AlignedObjectArray<int>hostHasSepAxis;
2844                                                 m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
2845                                                 b3AlignedObjectArray<b3Vector3>hostSepAxis;
2846                                                 m_sepNormals.copyToHost(hostSepAxis);
2847                                                 */
2848                                                 nContacts = m_totalContactsOut.at(0);
2849                                                 contactOut->resize(nContacts);
2850                                                 //      printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts);
2851                                                 if (nContacts > maxContactCapacity)
2852                                                 {
2853                                                         b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
2854                                                         nContacts = maxContactCapacity;
2855                                                 }
2856                                         }
2857                                 }
2858
2859                                 if (1)
2860                                 {
2861                                         if (1)
2862                                         {
2863                                                 {
2864                                                         B3_PROFILE("findSeparatingAxisVertexFaceKernel");
2865                                                         b3BufferInfoCL bInfo[] = {
2866                                                                 b3BufferInfoCL(pairs->getBufferCL(), true),
2867                                                                 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2868                                                                 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2869                                                                 b3BufferInfoCL(convexData.getBufferCL(), true),
2870                                                                 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2871                                                                 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2872                                                                 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2873                                                                 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2874                                                                 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2875                                                                 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2876                                                                 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2877                                                                 b3BufferInfoCL(m_dmins.getBufferCL())};
2878
2879                                                         b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel, "findSeparatingAxisVertexFaceKernel");
2880                                                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2881                                                         launcher.setConst(nPairs);
2882
2883                                                         int num = nPairs;
2884                                                         launcher.launch1D(num);
2885                                                         clFinish(m_queue);
2886                                                 }
2887
2888                                                 int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
2889
2890                                                 {
2891                                                         B3_PROFILE("findSeparatingAxisEdgeEdgeKernel");
2892                                                         b3BufferInfoCL bInfo[] = {
2893                                                                 b3BufferInfoCL(pairs->getBufferCL(), true),
2894                                                                 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2895                                                                 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2896                                                                 b3BufferInfoCL(convexData.getBufferCL(), true),
2897                                                                 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2898                                                                 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2899                                                                 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2900                                                                 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2901                                                                 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2902                                                                 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2903                                                                 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2904                                                                 b3BufferInfoCL(m_dmins.getBufferCL()),
2905                                                                 b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true)
2906
2907                                                         };
2908
2909                                                         b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel, "findSeparatingAxisEdgeEdgeKernel");
2910                                                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2911                                                         launcher.setConst(numDirections);
2912                                                         launcher.setConst(nPairs);
2913                                                         int num = nPairs;
2914                                                         launcher.launch1D(num);
2915                                                         clFinish(m_queue);
2916                                                 }
2917                                         }
2918                                         if (useMprGpu)
2919                                         {
2920                                                 B3_PROFILE("findSeparatingAxisUnitSphereKernel");
2921                                                 b3BufferInfoCL bInfo[] = {
2922                                                         b3BufferInfoCL(pairs->getBufferCL(), true),
2923                                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2924                                                         b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2925                                                         b3BufferInfoCL(convexData.getBufferCL(), true),
2926                                                         b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2927                                                         b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true),
2928                                                         b3BufferInfoCL(m_sepNormals.getBufferCL()),
2929                                                         b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2930                                                         b3BufferInfoCL(m_dmins.getBufferCL())};
2931
2932                                                 b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel, "findSeparatingAxisUnitSphereKernel");
2933                                                 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2934                                                 int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
2935                                                 launcher.setConst(numDirections);
2936
2937                                                 launcher.setConst(nPairs);
2938
2939                                                 int num = nPairs;
2940                                                 launcher.launch1D(num);
2941                                                 clFinish(m_queue);
2942                                         }
2943                                 }
2944                         }
2945                         else
2946                         {
2947                                 B3_PROFILE("findSeparatingAxisKernel");
2948                                 b3BufferInfoCL bInfo[] = {
2949                                         b3BufferInfoCL(pairs->getBufferCL(), true),
2950                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2951                                         b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2952                                         b3BufferInfoCL(convexData.getBufferCL(), true),
2953                                         b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2954                                         b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2955                                         b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2956                                         b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2957                                         b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2958                                         b3BufferInfoCL(m_sepNormals.getBufferCL()),
2959                                         b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL())};
2960
2961                                 b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel, "m_findSeparatingAxisKernel");
2962                                 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2963                                 launcher.setConst(nPairs);
2964
2965                                 int num = nPairs;
2966                                 launcher.launch1D(num);
2967                                 clFinish(m_queue);
2968                         }
2969                 }
2970                 else
2971                 {
2972                         B3_PROFILE("findSeparatingAxisKernel CPU");
2973
2974                         b3AlignedObjectArray<b3Int4> hostPairs;
2975                         pairs->copyToHost(hostPairs);
2976                         b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
2977                         bodyBuf->copyToHost(hostBodyBuf);
2978
2979                         b3AlignedObjectArray<b3Collidable> hostCollidables;
2980                         gpuCollidables.copyToHost(hostCollidables);
2981
2982                         b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
2983                         gpuChildShapes.copyToHost(cpuChildShapes);
2984
2985                         b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData;
2986                         convexData.copyToHost(hostConvexShapeData);
2987
2988                         b3AlignedObjectArray<b3Vector3> hostVertices;
2989                         gpuVertices.copyToHost(hostVertices);
2990
2991                         b3AlignedObjectArray<int> hostHasSepAxis;
2992                         hostHasSepAxis.resize(nPairs);
2993                         b3AlignedObjectArray<b3Vector3> hostSepAxis;
2994                         hostSepAxis.resize(nPairs);
2995
2996                         b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
2997                         gpuUniqueEdges.copyToHost(hostUniqueEdges);
2998                         b3AlignedObjectArray<b3GpuFace> hostFaces;
2999                         gpuFaces.copyToHost(hostFaces);
3000
3001                         b3AlignedObjectArray<int> hostIndices;
3002                         gpuIndices.copyToHost(hostIndices);
3003
3004                         b3AlignedObjectArray<b3Contact4> hostContacts;
3005                         if (nContacts)
3006                         {
3007                                 contactOut->copyToHost(hostContacts);
3008                         }
3009                         hostContacts.resize(maxContactCapacity);
3010                         int nGlobalContactsOut = nContacts;
3011
3012                         for (int i = 0; i < nPairs; i++)
3013                         {
3014                                 int bodyIndexA = hostPairs[i].x;
3015                                 int bodyIndexB = hostPairs[i].y;
3016                                 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
3017                                 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
3018
3019                                 int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex;
3020                                 int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex;
3021
3022                                 hostHasSepAxis[i] = 0;
3023
3024                                 //once the broadphase avoids static-static pairs, we can remove this test
3025                                 if ((hostBodyBuf[bodyIndexA].m_invMass == 0) && (hostBodyBuf[bodyIndexB].m_invMass == 0))
3026                                 {
3027                                         continue;
3028                                 }
3029
3030                                 if ((hostCollidables[collidableIndexA].m_shapeType != SHAPE_CONVEX_HULL) || (hostCollidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL))
3031                                 {
3032                                         continue;
3033                                 }
3034
3035                                 float dmin = FLT_MAX;
3036
3037                                 b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA];
3038                                 b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB];
3039                                 b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos;
3040                                 b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos;
3041                                 b3Quaternion ornA = hostBodyBuf[bodyIndexA].m_quat;
3042                                 b3Quaternion ornB = hostBodyBuf[bodyIndexB].m_quat;
3043
3044                                 if (useGjk)
3045                                 {
3046                                         //first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR
3047                                         {
3048                                                 b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;
3049                                                 b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);
3050                                                 b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter;
3051                                                 b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);
3052                                                 b3Vector3 DeltaC2 = c0 - c1;
3053
3054                                                 b3Vector3 sepAxis;
3055
3056                                                 bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3057                                                                                                                                 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3058                                                                                                                                 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3059                                                                                                                                 &sepAxis, &dmin);
3060
3061                                                 if (hasSepAxisA)
3062                                                 {
3063                                                         bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2,
3064                                                                                                                                         &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3065                                                                                                                                         &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3066                                                                                                                                         &sepAxis, &dmin);
3067                                                         if (hasSepAxisB)
3068                                                         {
3069                                                                 bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3070                                                                                                                                                                 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3071                                                                                                                                                                 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3072                                                                                                                                                                 &sepAxis, &dmin, false);
3073
3074                                                                 if (hasEdgeEdge)
3075                                                                 {
3076                                                                         hostHasSepAxis[i] = 1;
3077                                                                         hostSepAxis[i] = sepAxis;
3078                                                                         hostSepAxis[i].w = dmin;
3079                                                                 }
3080                                                         }
3081                                                 }
3082                                         }
3083
3084                                         if (hostHasSepAxis[i])
3085                                         {
3086                                                 int pairIndex = i;
3087
3088                                                 bool useMpr = true;
3089                                                 if (useMpr)
3090                                                 {
3091                                                         int res = 0;
3092                                                         float depth = 0.f;
3093                                                         b3Vector3 sepAxis2 = b3MakeVector3(1, 0, 0);
3094                                                         b3Vector3 resultPointOnBWorld = b3MakeVector3(0, 0, 0);
3095
3096                                                         float depthOut;
3097                                                         b3Vector3 dirOut;
3098                                                         b3Vector3 posOut;
3099
3100                                                         //res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut);
3101                                                         res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB, &hostBodyBuf[0], &hostConvexShapeData[0], &hostCollidables[0], &hostVertices[0], &hostSepAxis[0], &hostHasSepAxis[0], &depthOut, &dirOut, &posOut);
3102                                                         depth = depthOut;
3103                                                         sepAxis2 = b3MakeVector3(-dirOut.x, -dirOut.y, -dirOut.z);
3104                                                         resultPointOnBWorld = posOut;
3105                                                         //hostHasSepAxis[i] = 0;
3106
3107                                                         if (res == 0)
3108                                                         {
3109                                                                 //add point?
3110                                                                 //printf("depth = %f\n",depth);
3111                                                                 //printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]);
3112                                                                 //qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]);
3113
3114                                                                 float dist = 0.f;
3115
3116                                                                 const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex];
3117                                                                 const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex];
3118
3119                                                                 if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))
3120                                                                 {
3121                                                                         if (depth > dist)
3122                                                                         {
3123                                                                                 float diff = depth - dist;
3124
3125                                                                                 static float maxdiff = 0.f;
3126                                                                                 if (maxdiff < diff)
3127                                                                                 {
3128                                                                                         maxdiff = diff;
3129                                                                                         printf("maxdiff = %20.10f\n", maxdiff);
3130                                                                                 }
3131                                                                         }
3132                                                                 }
3133                                                                 if (depth > dmin)
3134                                                                 {
3135                                                                         b3Vector3 oldAxis = hostSepAxis[i];
3136                                                                         depth = dmin;
3137                                                                         sepAxis2 = oldAxis;
3138                                                                 }
3139
3140                                                                 if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))
3141                                                                 {
3142                                                                         if (depth > dist)
3143                                                                         {
3144                                                                                 float diff = depth - dist;
3145                                                                                 //printf("?diff  = %f\n",diff );
3146                                                                                 static float maxdiff = 0.f;
3147                                                                                 if (maxdiff < diff)
3148                                                                                 {
3149                                                                                         maxdiff = diff;
3150                                                                                         printf("maxdiff = %20.10f\n", maxdiff);
3151                                                                                 }
3152                                                                         }
3153                                                                         //this is used for SAT
3154                                                                         //hostHasSepAxis[i] = 1;
3155                                                                         //hostSepAxis[i] = sepAxis2;
3156
3157                                                                         //add contact point
3158
3159                                                                         //int contactIndex = nGlobalContactsOut;
3160                                                                         b3Contact4& newContact = hostContacts.at(nGlobalContactsOut);
3161                                                                         nGlobalContactsOut++;
3162                                                                         newContact.m_batchIdx = 0;  //i;
3163                                                                         newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA;
3164                                                                         newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;
3165
3166                                                                         newContact.m_frictionCoeffCmp = 45874;
3167                                                                         newContact.m_restituitionCoeffCmp = 0;
3168
3169                                                                         static float maxDepth = 0.f;
3170
3171                                                                         if (depth > maxDepth)
3172                                                                         {
3173                                                                                 maxDepth = depth;
3174                                                                                 printf("MPR maxdepth = %f\n", maxDepth);
3175                                                                         }
3176
3177                                                                         resultPointOnBWorld.w = -depth;
3178                                                                         newContact.m_worldPosB[0] = resultPointOnBWorld;
3179                                                                         //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2;
3180                                                                         newContact.m_worldNormalOnB = sepAxis2;
3181                                                                         newContact.m_worldNormalOnB.w = (b3Scalar)1;
3182                                                                 }
3183                                                                 else
3184                                                                 {
3185                                                                         printf("rejected\n");
3186                                                                 }
3187                                                         }
3188                                                 }
3189                                                 else
3190                                                 {
3191                                                         //int contactIndex = computeContactConvexConvex2(           i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
3192                                                         b3AlignedObjectArray<b3Contact4> oldHostContacts;
3193                                                         int result;
3194                                                         result = computeContactConvexConvex2(  //hostPairs,
3195                                                                 pairIndex,
3196                                                                 bodyIndexA, bodyIndexB,
3197                                                                 collidableIndexA, collidableIndexB,
3198                                                                 hostBodyBuf,
3199                                                                 hostCollidables,
3200                                                                 hostConvexShapeData,
3201                                                                 hostVertices,
3202                                                                 hostUniqueEdges,
3203                                                                 hostIndices,
3204                                                                 hostFaces,
3205                                                                 hostContacts,
3206                                                                 nGlobalContactsOut,
3207                                                                 maxContactCapacity,
3208                                                                 oldHostContacts
3209                                                                 //hostHasSepAxis,
3210                                                                 //hostSepAxis
3211
3212                                                         );
3213                                                 }  //mpr
3214                                         }      //hostHasSepAxis[i] = 1;
3215                                 }
3216                                 else
3217                                 {
3218                                         b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;
3219                                         b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);
3220                                         b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter;
3221                                         b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);
3222                                         b3Vector3 DeltaC2 = c0 - c1;
3223
3224                                         b3Vector3 sepAxis;
3225
3226                                         bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3227                                                                                                                         &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3228                                                                                                                         &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3229                                                                                                                         &sepAxis, &dmin);
3230
3231                                         if (hasSepAxisA)
3232                                         {
3233                                                 bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2,
3234                                                                                                                                 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3235                                                                                                                                 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3236                                                                                                                                 &sepAxis, &dmin);
3237                                                 if (hasSepAxisB)
3238                                                 {
3239                                                         bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3240                                                                                                                                                         &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3241                                                                                                                                                         &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3242                                                                                                                                                         &sepAxis, &dmin, true);
3243
3244                                                         if (hasEdgeEdge)
3245                                                         {
3246                                                                 hostHasSepAxis[i] = 1;
3247                                                                 hostSepAxis[i] = sepAxis;
3248                                                         }
3249                                                 }
3250                                         }
3251                                 }
3252                         }
3253
3254                         if (useGjkContacts)  //nGlobalContactsOut>0)
3255                         {
3256                                 //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut);
3257                                 nContacts = nGlobalContactsOut;
3258                                 contactOut->copyFromHost(hostContacts);
3259
3260                                 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3261                         }
3262
3263                         m_hasSeparatingNormals.copyFromHost(hostHasSepAxis);
3264                         m_sepNormals.copyFromHost(hostSepAxis);
3265
3266                         /*
3267              //double-check results from GPU (comment-out the 'else' so both paths are executed
3268             b3AlignedObjectArray<int> checkHasSepAxis;
3269             m_hasSeparatingNormals.copyToHost(checkHasSepAxis);
3270             static int frameCount = 0;
3271             frameCount++;
3272             for (int i=0;i<nPairs;i++)
3273             {
3274                 if (hostHasSepAxis[i] != checkHasSepAxis[i])
3275                 {
3276                     printf("at frameCount %d hostHasSepAxis[%d] = %d but checkHasSepAxis[i] = %d\n",
3277                            frameCount,i,hostHasSepAxis[i],checkHasSepAxis[i]);
3278                 }
3279             }
3280             //m_hasSeparatingNormals.copyFromHost(hostHasSepAxis);
3281             //    m_sepNormals.copyFromHost(hostSepAxis);
3282             */
3283                 }
3284
3285                 numCompoundPairs = m_numCompoundPairsOut.at(0);
3286                 bool useGpuFindCompoundPairs = true;
3287                 if (useGpuFindCompoundPairs)
3288                 {
3289                         B3_PROFILE("findCompoundPairsKernel");
3290                         b3BufferInfoCL bInfo[] =
3291                                 {
3292                                         b3BufferInfoCL(pairs->getBufferCL(), true),
3293                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3294                                         b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3295                                         b3BufferInfoCL(convexData.getBufferCL(), true),
3296                                         b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3297                                         b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3298                                         b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3299                                         b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3300                                         b3BufferInfoCL(clAabbsLocalSpace.getBufferCL(), true),
3301                                         b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3302                                         b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL()),
3303                                         b3BufferInfoCL(m_numCompoundPairsOut.getBufferCL()),
3304                                         b3BufferInfoCL(subTreesGPU->getBufferCL()),
3305                                         b3BufferInfoCL(treeNodesGPU->getBufferCL()),
3306                                         b3BufferInfoCL(bvhInfo->getBufferCL())};
3307
3308                         b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel, "m_findCompoundPairsKernel");
3309                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3310                         launcher.setConst(nPairs);
3311                         launcher.setConst(compoundPairCapacity);
3312
3313                         int num = nPairs;
3314                         launcher.launch1D(num);
3315                         clFinish(m_queue);
3316
3317                         numCompoundPairs = m_numCompoundPairsOut.at(0);
3318                         //printf("numCompoundPairs =%d\n",numCompoundPairs );
3319                         if (numCompoundPairs)
3320                         {
3321                                 //printf("numCompoundPairs=%d\n",numCompoundPairs);
3322                         }
3323                 }
3324                 else
3325                 {
3326                         b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
3327                         treeNodesGPU->copyToHost(treeNodesCPU);
3328
3329                         b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
3330                         subTreesGPU->copyToHost(subTreesCPU);
3331
3332                         b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
3333                         bvhInfo->copyToHost(bvhInfoCPU);
3334
3335                         b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3336                         clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3337
3338                         b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace;
3339                         clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace);
3340
3341                         b3AlignedObjectArray<b3Int4> hostPairs;
3342                         pairs->copyToHost(hostPairs);
3343
3344                         b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3345                         bodyBuf->copyToHost(hostBodyBuf);
3346
3347                         b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut;
3348                         cpuCompoundPairsOut.resize(compoundPairCapacity);
3349
3350                         b3AlignedObjectArray<b3Collidable> hostCollidables;
3351                         gpuCollidables.copyToHost(hostCollidables);
3352
3353                         b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
3354                         gpuChildShapes.copyToHost(cpuChildShapes);
3355
3356                         b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
3357                         convexData.copyToHost(hostConvexData);
3358
3359                         b3AlignedObjectArray<b3Vector3> hostVertices;
3360                         gpuVertices.copyToHost(hostVertices);
3361
3362                         for (int pairIndex = 0; pairIndex < nPairs; pairIndex++)
3363                         {
3364                                 int bodyIndexA = hostPairs[pairIndex].x;
3365                                 int bodyIndexB = hostPairs[pairIndex].y;
3366                                 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
3367                                 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
3368                                 if (cpuChildShapes.size())
3369                                 {
3370                                         findCompoundPairsKernel(
3371                                                 pairIndex,
3372                                                 bodyIndexA,
3373                                                 bodyIndexB,
3374                                                 collidableIndexA,
3375                                                 collidableIndexB,
3376                                                 &hostBodyBuf[0],
3377                                                 &hostCollidables[0],
3378                                                 &hostConvexData[0],
3379                                                 hostVertices,
3380                                                 hostAabbsWorldSpace,
3381                                                 hostAabbsLocalSpace,
3382                                                 &cpuChildShapes[0],
3383                                                 &cpuCompoundPairsOut[0],
3384                                                 &numCompoundPairs,
3385                                                 compoundPairCapacity,
3386                                                 treeNodesCPU,
3387                                                 subTreesCPU,
3388                                                 bvhInfoCPU);
3389                                 }
3390                         }
3391
3392                         m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs, 1, 0, true);
3393                         if (numCompoundPairs)
3394                         {
3395                                 b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0];
3396                                 m_gpuCompoundPairs.copyFromHostPointer(ptr, numCompoundPairs, 0, true);
3397                         }
3398                         //cpuCompoundPairsOut
3399                 }
3400                 if (numCompoundPairs)
3401                 {
3402                         printf("numCompoundPairs=%d\n", numCompoundPairs);
3403                 }
3404
3405                 if (numCompoundPairs > compoundPairCapacity)
3406                 {
3407                         b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity);
3408                         numCompoundPairs = compoundPairCapacity;
3409                 }
3410
3411                 m_gpuCompoundPairs.resize(numCompoundPairs);
3412                 m_gpuHasCompoundSepNormals.resize(numCompoundPairs);
3413                 m_gpuCompoundSepNormals.resize(numCompoundPairs);
3414
3415                 if (numCompoundPairs)
3416                 {
3417                         B3_PROFILE("processCompoundPairsPrimitivesKernel");
3418                         b3BufferInfoCL bInfo[] =
3419                                 {
3420                                         b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
3421                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3422                                         b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3423                                         b3BufferInfoCL(convexData.getBufferCL(), true),
3424                                         b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3425                                         b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3426                                         b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3427                                         b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3428                                         b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3429                                         b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3430                                         b3BufferInfoCL(contactOut->getBufferCL()),
3431                                         b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3432
3433                         b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel, "m_processCompoundPairsPrimitivesKernel");
3434                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3435                         launcher.setConst(numCompoundPairs);
3436                         launcher.setConst(maxContactCapacity);
3437
3438                         int num = numCompoundPairs;
3439                         launcher.launch1D(num);
3440                         clFinish(m_queue);
3441                         nContacts = m_totalContactsOut.at(0);
3442                         //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts);
3443                         if (nContacts > maxContactCapacity)
3444                         {
3445                                 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
3446                                 nContacts = maxContactCapacity;
3447                         }
3448                 }
3449
3450                 if (numCompoundPairs)
3451                 {
3452                         B3_PROFILE("processCompoundPairsKernel");
3453                         b3BufferInfoCL bInfo[] =
3454                                 {
3455                                         b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
3456                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3457                                         b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3458                                         b3BufferInfoCL(convexData.getBufferCL(), true),
3459                                         b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3460                                         b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3461                                         b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3462                                         b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3463                                         b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3464                                         b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3465                                         b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL()),
3466                                         b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL())};
3467
3468                         b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel, "m_processCompoundPairsKernel");
3469                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3470                         launcher.setConst(numCompoundPairs);
3471
3472                         int num = numCompoundPairs;
3473                         launcher.launch1D(num);
3474                         clFinish(m_queue);
3475                 }
3476
3477                 //printf("numConcave  = %d\n",numConcave);
3478
3479                 //              printf("hostNormals.size()=%d\n",hostNormals.size());
3480                 //int numPairs = pairCount.at(0);
3481         }
3482         int vertexFaceCapacity = 64;
3483
3484         {
3485                 //now perform the tree query on GPU
3486
3487                 if (treeNodesGPU->size() && treeNodesGPU->size())
3488                 {
3489                         if (bvhTraversalKernelGPU)
3490                         {
3491                                 B3_PROFILE("m_bvhTraversalKernel");
3492
3493                                 numConcavePairs = m_numConcavePairsOut.at(0);
3494
3495                                 b3LauncherCL launcher(m_queue, m_bvhTraversalKernel, "m_bvhTraversalKernel");
3496                                 launcher.setBuffer(pairs->getBufferCL());
3497                                 launcher.setBuffer(bodyBuf->getBufferCL());
3498                                 launcher.setBuffer(gpuCollidables.getBufferCL());
3499                                 launcher.setBuffer(clAabbsWorldSpace.getBufferCL());
3500                                 launcher.setBuffer(triangleConvexPairsOut.getBufferCL());
3501                                 launcher.setBuffer(m_numConcavePairsOut.getBufferCL());
3502                                 launcher.setBuffer(subTreesGPU->getBufferCL());
3503                                 launcher.setBuffer(treeNodesGPU->getBufferCL());
3504                                 launcher.setBuffer(bvhInfo->getBufferCL());
3505
3506                                 launcher.setConst(nPairs);
3507                                 launcher.setConst(maxTriConvexPairCapacity);
3508                                 int num = nPairs;
3509                                 launcher.launch1D(num);
3510                                 clFinish(m_queue);
3511                                 numConcavePairs = m_numConcavePairsOut.at(0);
3512                         }
3513                         else
3514                         {
3515                                 b3AlignedObjectArray<b3Int4> hostPairs;
3516                                 pairs->copyToHost(hostPairs);
3517                                 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3518                                 bodyBuf->copyToHost(hostBodyBuf);
3519                                 b3AlignedObjectArray<b3Collidable> hostCollidables;
3520                                 gpuCollidables.copyToHost(hostCollidables);
3521                                 b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3522                                 clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3523
3524                                 //int maxTriConvexPairCapacity,
3525                                 b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3526                                 triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
3527
3528                                 //int numTriConvexPairsOutHost=0;
3529                                 numConcavePairs = 0;
3530                                 //m_numConcavePairsOut
3531
3532                                 b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
3533                                 treeNodesGPU->copyToHost(treeNodesCPU);
3534                                 b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
3535                                 subTreesGPU->copyToHost(subTreesCPU);
3536                                 b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
3537                                 bvhInfo->copyToHost(bvhInfoCPU);
3538                                 //compute it...
3539
3540                                 volatile int hostNumConcavePairsOut = 0;
3541
3542                                 //
3543                                 for (int i = 0; i < nPairs; i++)
3544                                 {
3545                                         b3BvhTraversal(&hostPairs.at(0),
3546                                                                    &hostBodyBuf.at(0),
3547                                                                    &hostCollidables.at(0),
3548                                                                    &hostAabbsWorldSpace.at(0),
3549                                                                    &triangleConvexPairsOutHost.at(0),
3550                                                                    &hostNumConcavePairsOut,
3551                                                                    &subTreesCPU.at(0),
3552                                                                    &treeNodesCPU.at(0),
3553                                                                    &bvhInfoCPU.at(0),
3554                                                                    nPairs,
3555                                                                    maxTriConvexPairCapacity,
3556                                                                    i);
3557                                 }
3558                                 numConcavePairs = hostNumConcavePairsOut;
3559
3560                                 if (hostNumConcavePairsOut)
3561                                 {
3562                                         triangleConvexPairsOutHost.resize(hostNumConcavePairsOut);
3563                                         triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost);
3564                                 }
3565                                 //
3566
3567                                 m_numConcavePairsOut.resize(0);
3568                                 m_numConcavePairsOut.push_back(numConcavePairs);
3569                         }
3570
3571                         //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity);
3572
3573                         if (numConcavePairs > maxTriConvexPairCapacity)
3574                         {
3575                                 static int exceeded_maxTriConvexPairCapacity_count = 0;
3576                                 b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n",
3577                                                 numConcavePairs, maxTriConvexPairCapacity, exceeded_maxTriConvexPairCapacity_count++);
3578                                 numConcavePairs = maxTriConvexPairCapacity;
3579                         }
3580                         triangleConvexPairsOut.resize(numConcavePairs);
3581
3582                         if (numConcavePairs)
3583                         {
3584                                 clippingFacesOutGPU.resize(numConcavePairs);
3585                                 worldNormalsAGPU.resize(numConcavePairs);
3586                                 worldVertsA1GPU.resize(vertexFaceCapacity * (numConcavePairs));
3587                                 worldVertsB1GPU.resize(vertexFaceCapacity * (numConcavePairs));
3588
3589                                 if (findConcaveSeparatingAxisKernelGPU)
3590                                 {
3591                                         /*
3592                                         m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
3593                                                 clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3594                                                 worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
3595                                                 worldNormalsAGPU.copyFromHost(worldNormalsACPU);
3596                                                 worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
3597                                         */
3598
3599                                         //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut)
3600                                         if (splitSearchSepAxisConcave)
3601                                         {
3602                                                 //printf("numConcavePairs = %d\n",numConcavePairs);
3603                                                 m_dmins.resize(numConcavePairs);
3604                                                 {
3605                                                         B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel");
3606                                                         b3BufferInfoCL bInfo[] = {
3607                                                                 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3608                                                                 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3609                                                                 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3610                                                                 b3BufferInfoCL(convexData.getBufferCL(), true),
3611                                                                 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3612                                                                 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3613                                                                 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3614                                                                 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3615                                                                 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3616                                                                 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3617                                                                 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3618                                                                 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3619                                                                 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3620                                                                 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3621                                                                 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3622                                                                 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3623                                                                 b3BufferInfoCL(m_dmins.getBufferCL())};
3624
3625                                                         b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel, "m_findConcaveSeparatingAxisVertexFaceKernel");
3626                                                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3627                                                         launcher.setConst(vertexFaceCapacity);
3628                                                         launcher.setConst(numConcavePairs);
3629
3630                                                         int num = numConcavePairs;
3631                                                         launcher.launch1D(num);
3632                                                         clFinish(m_queue);
3633                                                 }
3634                                                 //                        numConcavePairs = 0;
3635                                                 if (1)
3636                                                 {
3637                                                         B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel");
3638                                                         b3BufferInfoCL bInfo[] = {
3639                                                                 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3640                                                                 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3641                                                                 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3642                                                                 b3BufferInfoCL(convexData.getBufferCL(), true),
3643                                                                 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3644                                                                 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3645                                                                 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3646                                                                 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3647                                                                 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3648                                                                 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3649                                                                 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3650                                                                 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3651                                                                 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3652                                                                 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3653                                                                 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3654                                                                 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3655                                                                 b3BufferInfoCL(m_dmins.getBufferCL())};
3656
3657                                                         b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel, "m_findConcaveSeparatingAxisEdgeEdgeKernel");
3658                                                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3659                                                         launcher.setConst(vertexFaceCapacity);
3660                                                         launcher.setConst(numConcavePairs);
3661
3662                                                         int num = numConcavePairs;
3663                                                         launcher.launch1D(num);
3664                                                         clFinish(m_queue);
3665                                                 }
3666
3667                                                 // numConcavePairs = 0;
3668                                         }
3669                                         else
3670                                         {
3671                                                 B3_PROFILE("findConcaveSeparatingAxisKernel");
3672                                                 b3BufferInfoCL bInfo[] = {
3673                                                         b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3674                                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3675                                                         b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3676                                                         b3BufferInfoCL(convexData.getBufferCL(), true),
3677                                                         b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3678                                                         b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3679                                                         b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3680                                                         b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3681                                                         b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3682                                                         b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3683                                                         b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3684                                                         b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3685                                                         b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3686                                                         b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3687                                                         b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3688                                                         b3BufferInfoCL(worldVertsB1GPU.getBufferCL())};
3689
3690                                                 b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel, "m_findConcaveSeparatingAxisKernel");
3691                                                 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3692                                                 launcher.setConst(vertexFaceCapacity);
3693                                                 launcher.setConst(numConcavePairs);
3694
3695                                                 int num = numConcavePairs;
3696                                                 launcher.launch1D(num);
3697                                                 clFinish(m_queue);
3698                                         }
3699                                 }
3700                                 else
3701                                 {
3702                                         b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3703                                         b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
3704                                         b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
3705                                         b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
3706                                         b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3707
3708                                         b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3709                                         triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
3710                                         //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
3711                                         b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3712                                         bodyBuf->copyToHost(hostBodyBuf);
3713                                         b3AlignedObjectArray<b3Collidable> hostCollidables;
3714                                         gpuCollidables.copyToHost(hostCollidables);
3715                                         b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3716                                         clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3717
3718                                         b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
3719                                         convexData.copyToHost(hostConvexData);
3720
3721                                         b3AlignedObjectArray<b3Vector3> hostVertices;
3722                                         gpuVertices.copyToHost(hostVertices);
3723
3724                                         b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
3725                                         gpuUniqueEdges.copyToHost(hostUniqueEdges);
3726                                         b3AlignedObjectArray<b3GpuFace> hostFaces;
3727                                         gpuFaces.copyToHost(hostFaces);
3728                                         b3AlignedObjectArray<int> hostIndices;
3729                                         gpuIndices.copyToHost(hostIndices);
3730                                         b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
3731                                         gpuChildShapes.copyToHost(cpuChildShapes);
3732
3733                                         b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3734                                         m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3735                                         concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size());
3736
3737                                         b3GpuChildShape* childShapePointerCPU = 0;
3738                                         if (cpuChildShapes.size())
3739                                                 childShapePointerCPU = &cpuChildShapes.at(0);
3740
3741                                         clippingFacesOutCPU.resize(clippingFacesOutGPU.size());
3742                                         worldVertsA1CPU.resize(worldVertsA1GPU.size());
3743                                         worldNormalsACPU.resize(worldNormalsAGPU.size());
3744                                         worldVertsB1CPU.resize(worldVertsB1GPU.size());
3745
3746                                         for (int i = 0; i < numConcavePairs; i++)
3747                                         {
3748                                                 b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0),
3749                                                                                                                   &hostBodyBuf.at(0),
3750                                                                                                                   &hostCollidables.at(0),
3751                                                                                                                   &hostConvexData.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0),
3752                                                                                                                   &hostFaces.at(0), &hostIndices.at(0), childShapePointerCPU,
3753                                                                                                                   &hostAabbsWorldSpace.at(0),
3754                                                                                                                   &concaveSepNormalsHost.at(0),
3755                                                                                                                   &clippingFacesOutCPU.at(0),
3756                                                                                                                   &worldVertsA1CPU.at(0),
3757                                                                                                                   &worldNormalsACPU.at(0),
3758                                                                                                                   &worldVertsB1CPU.at(0),
3759                                                                                                                   &concaveHasSeparatingNormalsCPU.at(0),
3760                                                                                                                   vertexFaceCapacity,
3761                                                                                                                   numConcavePairs, i);
3762                                         };
3763
3764                                         m_concaveSepNormals.copyFromHost(concaveSepNormalsHost);
3765                                         m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
3766                                         clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3767                                         worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
3768                                         worldNormalsAGPU.copyFromHost(worldNormalsACPU);
3769                                         worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
3770                                 }
3771                                 //                                                      b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals;
3772                                 //                                              m_concaveSepNormals.copyToHost(cpuCompoundSepNormals);
3773                                 //                                      b3AlignedObjectArray<b3Int4> cpuConcavePairs;
3774                                 //                              triangleConvexPairsOut.copyToHost(cpuConcavePairs);
3775                         }
3776                 }
3777         }
3778
3779         if (numConcavePairs)
3780         {
3781                 if (numConcavePairs)
3782                 {
3783                         B3_PROFILE("findConcaveSphereContactsKernel");
3784                         nContacts = m_totalContactsOut.at(0);
3785                         //                              printf("nContacts1 = %d\n",nContacts);
3786                         b3BufferInfoCL bInfo[] = {
3787                                 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3788                                 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3789                                 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3790                                 b3BufferInfoCL(convexData.getBufferCL(), true),
3791                                 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3792                                 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3793                                 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3794                                 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3795                                 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3796                                 b3BufferInfoCL(contactOut->getBufferCL()),
3797                                 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3798
3799                         b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel, "m_findConcaveSphereContactsKernel");
3800                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3801
3802                         launcher.setConst(numConcavePairs);
3803                         launcher.setConst(maxContactCapacity);
3804
3805                         int num = numConcavePairs;
3806                         launcher.launch1D(num);
3807                         clFinish(m_queue);
3808                         nContacts = m_totalContactsOut.at(0);
3809                         //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts);
3810
3811                         //printf("nContacts2 = %d\n",nContacts);
3812
3813                         if (nContacts >= maxContactCapacity)
3814                         {
3815                                 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
3816                                 nContacts = maxContactCapacity;
3817                         }
3818                 }
3819         }
3820
3821 #ifdef __APPLE__
3822         bool contactClippingOnGpu = true;
3823 #else
3824         bool contactClippingOnGpu = true;
3825 #endif
3826
3827         if (contactClippingOnGpu)
3828         {
3829                 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3830                 //              printf("nContacts3 = %d\n",nContacts);
3831
3832                 //B3_PROFILE("clipHullHullKernel");
3833
3834                 bool breakupConcaveConvexKernel = true;
3835
3836 #ifdef __APPLE__
3837                 //actually, some Apple OpenCL platform/device combinations work fine...
3838                 breakupConcaveConvexKernel = true;
3839 #endif
3840                 //concave-convex contact clipping
3841                 if (numConcavePairs)
3842                 {
3843                         //                      printf("numConcavePairs = %d\n", numConcavePairs);
3844                         //              nContacts = m_totalContactsOut.at(0);
3845                         //      printf("nContacts before = %d\n", nContacts);
3846
3847                         if (breakupConcaveConvexKernel)
3848                         {
3849                                 worldVertsB2GPU.resize(vertexFaceCapacity * numConcavePairs);
3850
3851                                 //clipFacesAndFindContacts
3852
3853                                 if (clipConcaveFacesAndFindContactsCPU)
3854                                 {
3855                                         b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3856                                         b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
3857                                         b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
3858                                         b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
3859
3860                                         clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
3861                                         worldVertsA1GPU.copyToHost(worldVertsA1CPU);
3862                                         worldNormalsAGPU.copyToHost(worldNormalsACPU);
3863                                         worldVertsB1GPU.copyToHost(worldVertsB1CPU);
3864
3865                                         b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3866                                         m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);
3867
3868                                         b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3869                                         m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3870
3871                                         b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
3872                                         worldVertsB2CPU.resize(worldVertsB2GPU.size());
3873
3874                                         for (int i = 0; i < numConcavePairs; i++)
3875                                         {
3876                                                 clipFacesAndFindContactsKernel(&concaveSepNormalsHost.at(0),
3877                                                                                                            &concaveHasSeparatingNormalsCPU.at(0),
3878                                                                                                            &clippingFacesOutCPU.at(0),
3879                                                                                                            &worldVertsA1CPU.at(0),
3880                                                                                                            &worldNormalsACPU.at(0),
3881                                                                                                            &worldVertsB1CPU.at(0),
3882                                                                                                            &worldVertsB2CPU.at(0),
3883                                                                                                            vertexFaceCapacity,
3884                                                                                                            i);
3885                                         }
3886
3887                                         clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3888                                         worldVertsB2GPU.copyFromHost(worldVertsB2CPU);
3889                                 }
3890                                 else
3891                                 {
3892                                         if (1)
3893                                         {
3894                                                 B3_PROFILE("clipFacesAndFindContacts");
3895                                                 //nContacts = m_totalContactsOut.at(0);
3896                                                 //int h = m_hasSeparatingNormals.at(0);
3897                                                 //int4 p = clippingFacesOutGPU.at(0);
3898                                                 b3BufferInfoCL bInfo[] = {
3899                                                         b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3900                                                         b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3901                                                         b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3902                                                         b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3903                                                         b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3904                                                         b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3905                                                         b3BufferInfoCL(worldVertsB2GPU.getBufferCL())};
3906                                                 b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts");
3907                                                 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3908                                                 launcher.setConst(vertexFaceCapacity);
3909
3910                                                 launcher.setConst(numConcavePairs);
3911                                                 int debugMode = 0;
3912                                                 launcher.setConst(debugMode);
3913                                                 int num = numConcavePairs;
3914                                                 launcher.launch1D(num);
3915                                                 clFinish(m_queue);
3916                                                 //int bla = m_totalContactsOut.at(0);
3917                                         }
3918                                 }
3919                                 //contactReduction
3920                                 {
3921                                         int newContactCapacity = nContacts + numConcavePairs;
3922                                         contactOut->reserve(newContactCapacity);
3923                                         if (reduceConcaveContactsOnGPU)
3924                                         {
3925                                                 //                                              printf("newReservation = %d\n",newReservation);
3926                                                 {
3927                                                         B3_PROFILE("newContactReductionKernel");
3928                                                         b3BufferInfoCL bInfo[] =
3929                                                                 {
3930                                                                         b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true),
3931                                                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3932                                                                         b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3933                                                                         b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3934                                                                         b3BufferInfoCL(contactOut->getBufferCL()),
3935                                                                         b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3936                                                                         b3BufferInfoCL(worldVertsB2GPU.getBufferCL()),
3937                                                                         b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3938
3939                                                         b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel");
3940                                                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3941                                                         launcher.setConst(vertexFaceCapacity);
3942                                                         launcher.setConst(newContactCapacity);
3943                                                         launcher.setConst(numConcavePairs);
3944                                                         int num = numConcavePairs;
3945
3946                                                         launcher.launch1D(num);
3947                                                 }
3948                                                 nContacts = m_totalContactsOut.at(0);
3949                                                 contactOut->resize(nContacts);
3950
3951                                                 //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
3952                                         }
3953                                         else
3954                                         {
3955                                                 volatile int nGlobalContactsOut = nContacts;
3956                                                 b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3957                                                 triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
3958                                                 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3959                                                 bodyBuf->copyToHost(hostBodyBuf);
3960
3961                                                 b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3962                                                 m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);
3963
3964                                                 b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3965                                                 m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3966
3967                                                 b3AlignedObjectArray<b3Contact4> hostContacts;
3968                                                 if (nContacts)
3969                                                 {
3970                                                         contactOut->copyToHost(hostContacts);
3971                                                 }
3972                                                 hostContacts.resize(newContactCapacity);
3973
3974                                                 b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3975                                                 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
3976
3977                                                 clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
3978                                                 worldVertsB2GPU.copyToHost(worldVertsB2CPU);
3979
3980                                                 for (int i = 0; i < numConcavePairs; i++)
3981                                                 {
3982                                                         b3NewContactReductionKernel(&triangleConvexPairsOutHost.at(0),
3983                                                                                                                 &hostBodyBuf.at(0),
3984                                                                                                                 &concaveSepNormalsHost.at(0),
3985                                                                                                                 &concaveHasSeparatingNormalsCPU.at(0),
3986                                                                                                                 &hostContacts.at(0),
3987                                                                                                                 &clippingFacesOutCPU.at(0),
3988                                                                                                                 &worldVertsB2CPU.at(0),
3989                                                                                                                 &nGlobalContactsOut,
3990                                                                                                                 vertexFaceCapacity,
3991                                                                                                                 newContactCapacity,
3992                                                                                                                 numConcavePairs,
3993                                                                                                                 i);
3994                                                 }
3995
3996                                                 nContacts = nGlobalContactsOut;
3997                                                 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3998                                                 //                                              nContacts = m_totalContactsOut.at(0);
3999                                                 //contactOut->resize(nContacts);
4000                                                 hostContacts.resize(nContacts);
4001                                                 //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
4002                                                 contactOut->copyFromHost(hostContacts);
4003                                         }
4004                                 }
4005                                 //re-use?
4006                         }
4007                         else
4008                         {
4009                                 B3_PROFILE("clipHullHullConcaveConvexKernel");
4010                                 nContacts = m_totalContactsOut.at(0);
4011                                 int newContactCapacity = contactOut->capacity();
4012
4013                                 //printf("contactOut5 = %d\n",nContacts);
4014                                 b3BufferInfoCL bInfo[] = {
4015                                         b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true),
4016                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4017                                         b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4018                                         b3BufferInfoCL(convexData.getBufferCL(), true),
4019                                         b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4020                                         b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4021                                         b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4022                                         b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4023                                         b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
4024                                         b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
4025                                         b3BufferInfoCL(contactOut->getBufferCL()),
4026                                         b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4027                                 b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel, "m_clipHullHullConcaveConvexKernel");
4028                                 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4029                                 launcher.setConst(newContactCapacity);
4030                                 launcher.setConst(numConcavePairs);
4031                                 int num = numConcavePairs;
4032                                 launcher.launch1D(num);
4033                                 clFinish(m_queue);
4034                                 nContacts = m_totalContactsOut.at(0);
4035                                 contactOut->resize(nContacts);
4036                                 //printf("contactOut6 = %d\n",nContacts);
4037                                 b3AlignedObjectArray<b3Contact4> cpuContacts;
4038                                 contactOut->copyToHost(cpuContacts);
4039                         }
4040                         //                      printf("nContacts after = %d\n", nContacts);
4041                 }  //numConcavePairs
4042
4043                 //convex-convex contact clipping
4044
4045                 bool breakupKernel = false;
4046
4047 #ifdef __APPLE__
4048                 breakupKernel = true;
4049 #endif
4050
4051 #ifdef CHECK_ON_HOST
4052                 bool computeConvexConvex = false;
4053 #else
4054                 bool computeConvexConvex = true;
4055 #endif  //CHECK_ON_HOST
4056                 if (computeConvexConvex)
4057                 {
4058                         B3_PROFILE("clipHullHullKernel");
4059                         if (breakupKernel)
4060                         {
4061                                 worldVertsB1GPU.resize(vertexFaceCapacity * nPairs);
4062                                 clippingFacesOutGPU.resize(nPairs);
4063                                 worldNormalsAGPU.resize(nPairs);
4064                                 worldVertsA1GPU.resize(vertexFaceCapacity * nPairs);
4065                                 worldVertsB2GPU.resize(vertexFaceCapacity * nPairs);
4066
4067                                 if (findConvexClippingFacesGPU)
4068                                 {
4069                                         B3_PROFILE("findClippingFacesKernel");
4070                                         b3BufferInfoCL bInfo[] = {
4071                                                 b3BufferInfoCL(pairs->getBufferCL(), true),
4072                                                 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4073                                                 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4074                                                 b3BufferInfoCL(convexData.getBufferCL(), true),
4075                                                 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4076                                                 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4077                                                 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4078                                                 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4079                                                 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4080                                                 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4081                                                 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4082                                                 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
4083                                                 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
4084                                                 b3BufferInfoCL(worldVertsB1GPU.getBufferCL())};
4085
4086                                         b3LauncherCL launcher(m_queue, m_findClippingFacesKernel, "m_findClippingFacesKernel");
4087                                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4088                                         launcher.setConst(vertexFaceCapacity);
4089                                         launcher.setConst(nPairs);
4090                                         int num = nPairs;
4091                                         launcher.launch1D(num);
4092                                         clFinish(m_queue);
4093                                 }
4094                                 else
4095                                 {
4096                                         float minDist = -1e30f;
4097                                         float maxDist = 0.02f;
4098
4099                                         b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
4100                                         convexData.copyToHost(hostConvexData);
4101                                         b3AlignedObjectArray<b3Collidable> hostCollidables;
4102                                         gpuCollidables.copyToHost(hostCollidables);
4103
4104                                         b3AlignedObjectArray<int> hostHasSepNormals;
4105                                         m_hasSeparatingNormals.copyToHost(hostHasSepNormals);
4106                                         b3AlignedObjectArray<b3Vector3> cpuSepNormals;
4107                                         m_sepNormals.copyToHost(cpuSepNormals);
4108
4109                                         b3AlignedObjectArray<b3Int4> hostPairs;
4110                                         pairs->copyToHost(hostPairs);
4111                                         b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
4112                                         bodyBuf->copyToHost(hostBodyBuf);
4113
4114                                         //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs);
4115                                         b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
4116                                         worldVertsB1GPU.copyToHost(worldVertsB1CPU);
4117
4118                                         b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
4119                                         clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
4120
4121                                         b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
4122                                         worldNormalsACPU.resize(nPairs);
4123
4124                                         b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
4125                                         worldVertsA1CPU.resize(worldVertsA1GPU.size());
4126
4127                                         b3AlignedObjectArray<b3Vector3> hostVertices;
4128                                         gpuVertices.copyToHost(hostVertices);
4129                                         b3AlignedObjectArray<b3GpuFace> hostFaces;
4130                                         gpuFaces.copyToHost(hostFaces);
4131                                         b3AlignedObjectArray<int> hostIndices;
4132                                         gpuIndices.copyToHost(hostIndices);
4133
4134                                         for (int i = 0; i < nPairs; i++)
4135                                         {
4136                                                 int bodyIndexA = hostPairs[i].x;
4137                                                 int bodyIndexB = hostPairs[i].y;
4138
4139                                                 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
4140                                                 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
4141
4142                                                 int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex;
4143                                                 int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex;
4144
4145                                                 if (hostHasSepNormals[i])
4146                                                 {
4147                                                         b3FindClippingFaces(cpuSepNormals[i],
4148                                                                                                 &hostConvexData[shapeIndexA],
4149                                                                                                 &hostConvexData[shapeIndexB],
4150                                                                                                 hostBodyBuf[bodyIndexA].m_pos, hostBodyBuf[bodyIndexA].m_quat,
4151                                                                                                 hostBodyBuf[bodyIndexB].m_pos, hostBodyBuf[bodyIndexB].m_quat,
4152                                                                                                 &worldVertsA1CPU.at(0), &worldNormalsACPU.at(0),
4153                                                                                                 &worldVertsB1CPU.at(0),
4154                                                                                                 vertexFaceCapacity, minDist, maxDist,
4155                                                                                                 &hostVertices.at(0), &hostFaces.at(0),
4156                                                                                                 &hostIndices.at(0),
4157                                                                                                 &hostVertices.at(0), &hostFaces.at(0),
4158                                                                                                 &hostIndices.at(0), &clippingFacesOutCPU.at(0), i);
4159                                                 }
4160                                         }
4161
4162                                         clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
4163                                         worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
4164                                         worldNormalsAGPU.copyFromHost(worldNormalsACPU);
4165                                         worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
4166                                 }
4167
4168                                 ///clip face B against face A, reduce contacts and append them to a global contact array
4169                                 if (1)
4170                                 {
4171                                         if (clipConvexFacesAndFindContactsCPU)
4172                                         {
4173                                                 //b3AlignedObjectArray<b3Int4> hostPairs;
4174                                                 //pairs->copyToHost(hostPairs);
4175
4176                                                 b3AlignedObjectArray<b3Vector3> hostSepNormals;
4177                                                 m_sepNormals.copyToHost(hostSepNormals);
4178                                                 b3AlignedObjectArray<int> hostHasSepAxis;
4179                                                 m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
4180
4181                                                 b3AlignedObjectArray<b3Int4> hostClippingFaces;
4182                                                 clippingFacesOutGPU.copyToHost(hostClippingFaces);
4183                                                 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
4184                                                 worldVertsB2CPU.resize(vertexFaceCapacity * nPairs);
4185
4186                                                 b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
4187                                                 worldVertsA1GPU.copyToHost(worldVertsA1CPU);
4188                                                 b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
4189                                                 worldNormalsAGPU.copyToHost(worldNormalsACPU);
4190
4191                                                 b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
4192                                                 worldVertsB1GPU.copyToHost(worldVertsB1CPU);
4193
4194                                                 /*
4195                                           __global const b3Float4* separatingNormals,
4196                                                    __global const int* hasSeparatingAxis,
4197                                                    __global b3Int4* clippingFacesOut,
4198                                                    __global b3Float4* worldVertsA1,
4199                                                    __global b3Float4* worldNormalsA1,
4200                                                    __global b3Float4* worldVertsB1,
4201                                                    __global b3Float4* worldVertsB2,
4202                                                     int vertexFaceCapacity,
4203                                                                                                                         int pairIndex
4204                                         */
4205                                                 for (int i = 0; i < nPairs; i++)
4206                                                 {
4207                                                         clipFacesAndFindContactsKernel(
4208                                                                 &hostSepNormals.at(0),
4209                                                                 &hostHasSepAxis.at(0),
4210                                                                 &hostClippingFaces.at(0),
4211                                                                 &worldVertsA1CPU.at(0),
4212                                                                 &worldNormalsACPU.at(0),
4213                                                                 &worldVertsB1CPU.at(0),
4214                                                                 &worldVertsB2CPU.at(0),
4215
4216                                                                 vertexFaceCapacity,
4217                                                                 i);
4218                                                 }
4219
4220                                                 clippingFacesOutGPU.copyFromHost(hostClippingFaces);
4221                                                 worldVertsB2GPU.copyFromHost(worldVertsB2CPU);
4222                                         }
4223                                         else
4224                                         {
4225                                                 B3_PROFILE("clipFacesAndFindContacts");
4226                                                 //nContacts = m_totalContactsOut.at(0);
4227                                                 //int h = m_hasSeparatingNormals.at(0);
4228                                                 //int4 p = clippingFacesOutGPU.at(0);
4229                                                 b3BufferInfoCL bInfo[] = {
4230                                                         b3BufferInfoCL(m_sepNormals.getBufferCL()),
4231                                                         b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4232                                                         b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4233                                                         b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
4234                                                         b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
4235                                                         b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
4236                                                         b3BufferInfoCL(worldVertsB2GPU.getBufferCL())};
4237
4238                                                 b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts");
4239                                                 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4240                                                 launcher.setConst(vertexFaceCapacity);
4241
4242                                                 launcher.setConst(nPairs);
4243                                                 int debugMode = 0;
4244                                                 launcher.setConst(debugMode);
4245                                                 int num = nPairs;
4246                                                 launcher.launch1D(num);
4247                                                 clFinish(m_queue);
4248                                         }
4249
4250                                         {
4251                                                 nContacts = m_totalContactsOut.at(0);
4252                                                 //printf("nContacts = %d\n",nContacts);
4253
4254                                                 int newContactCapacity = nContacts + nPairs;
4255                                                 contactOut->reserve(newContactCapacity);
4256
4257                                                 if (reduceConvexContactsOnGPU)
4258                                                 {
4259                                                         {
4260                                                                 B3_PROFILE("newContactReductionKernel");
4261                                                                 b3BufferInfoCL bInfo[] =
4262                                                                         {
4263                                                                                 b3BufferInfoCL(pairs->getBufferCL(), true),
4264                                                                                 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4265                                                                                 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4266                                                                                 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4267                                                                                 b3BufferInfoCL(contactOut->getBufferCL()),
4268                                                                                 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4269                                                                                 b3BufferInfoCL(worldVertsB2GPU.getBufferCL()),
4270                                                                                 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4271
4272                                                                 b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel");
4273                                                                 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4274                                                                 launcher.setConst(vertexFaceCapacity);
4275                                                                 launcher.setConst(newContactCapacity);
4276                                                                 launcher.setConst(nPairs);
4277                                                                 int num = nPairs;
4278
4279                                                                 launcher.launch1D(num);
4280                                                         }
4281                                                         nContacts = m_totalContactsOut.at(0);
4282                                                         contactOut->resize(nContacts);
4283                                                 }
4284                                                 else
4285                                                 {
4286                                                         volatile int nGlobalContactsOut = nContacts;
4287                                                         b3AlignedObjectArray<b3Int4> hostPairs;
4288                                                         pairs->copyToHost(hostPairs);
4289                                                         b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
4290                                                         bodyBuf->copyToHost(hostBodyBuf);
4291                                                         b3AlignedObjectArray<b3Vector3> hostSepNormals;
4292                                                         m_sepNormals.copyToHost(hostSepNormals);
4293                                                         b3AlignedObjectArray<int> hostHasSepAxis;
4294                                                         m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
4295                                                         b3AlignedObjectArray<b3Contact4> hostContactsOut;
4296                                                         contactOut->copyToHost(hostContactsOut);
4297                                                         hostContactsOut.resize(newContactCapacity);
4298
4299                                                         b3AlignedObjectArray<b3Int4> hostClippingFaces;
4300                                                         clippingFacesOutGPU.copyToHost(hostClippingFaces);
4301                                                         b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
4302                                                         worldVertsB2GPU.copyToHost(worldVertsB2CPU);
4303
4304                                                         for (int i = 0; i < nPairs; i++)
4305                                                         {
4306                                                                 b3NewContactReductionKernel(&hostPairs.at(0),
4307                                                                                                                         &hostBodyBuf.at(0),
4308                                                                                                                         &hostSepNormals.at(0),
4309                                                                                                                         &hostHasSepAxis.at(0),
4310                                                                                                                         &hostContactsOut.at(0),
4311                                                                                                                         &hostClippingFaces.at(0),
4312                                                                                                                         &worldVertsB2CPU.at(0),
4313                                                                                                                         &nGlobalContactsOut,
4314                                                                                                                         vertexFaceCapacity,
4315                                                                                                                         newContactCapacity,
4316                                                                                                                         nPairs,
4317                                                                                                                         i);
4318                                                         }
4319
4320                                                         nContacts = nGlobalContactsOut;
4321                                                         m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
4322                                                         hostContactsOut.resize(nContacts);
4323                                                         //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
4324                                                         contactOut->copyFromHost(hostContactsOut);
4325                                                 }
4326                                                 //                    b3Contact4 pt = contactOut->at(0);
4327                                                 //                  printf("nContacts = %d\n",nContacts);
4328                                         }
4329                                 }
4330                         }
4331                         else  //breakupKernel
4332                         {
4333                                 if (nPairs)
4334                                 {
4335                                         b3BufferInfoCL bInfo[] = {
4336                                                 b3BufferInfoCL(pairs->getBufferCL(), true),
4337                                                 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4338                                                 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4339                                                 b3BufferInfoCL(convexData.getBufferCL(), true),
4340                                                 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4341                                                 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4342                                                 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4343                                                 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4344                                                 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4345                                                 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4346                                                 b3BufferInfoCL(contactOut->getBufferCL()),
4347                                                 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4348                                         b3LauncherCL launcher(m_queue, m_clipHullHullKernel, "m_clipHullHullKernel");
4349                                         launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4350                                         launcher.setConst(nPairs);
4351                                         launcher.setConst(maxContactCapacity);
4352
4353                                         int num = nPairs;
4354                                         launcher.launch1D(num);
4355                                         clFinish(m_queue);
4356
4357                                         nContacts = m_totalContactsOut.at(0);
4358                                         if (nContacts >= maxContactCapacity)
4359                                         {
4360                                                 b3Error("Exceeded contact capacity (%d/%d)\n", nContacts, maxContactCapacity);
4361                                                 nContacts = maxContactCapacity;
4362                                         }
4363                                         contactOut->resize(nContacts);
4364                                 }
4365                         }
4366
4367                         int nCompoundsPairs = m_gpuCompoundPairs.size();
4368
4369                         if (nCompoundsPairs)
4370                         {
4371                                 b3BufferInfoCL bInfo[] = {
4372                                         b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
4373                                         b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4374                                         b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4375                                         b3BufferInfoCL(convexData.getBufferCL(), true),
4376                                         b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4377                                         b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4378                                         b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4379                                         b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4380                                         b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
4381                                         b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL(), true),
4382                                         b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL(), true),
4383                                         b3BufferInfoCL(contactOut->getBufferCL()),
4384                                         b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4385                                 b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel, "m_clipCompoundsHullHullKernel");
4386                                 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4387                                 launcher.setConst(nCompoundsPairs);
4388                                 launcher.setConst(maxContactCapacity);
4389
4390                                 int num = nCompoundsPairs;
4391                                 launcher.launch1D(num);
4392                                 clFinish(m_queue);
4393
4394                                 nContacts = m_totalContactsOut.at(0);
4395                                 if (nContacts > maxContactCapacity)
4396                                 {
4397                                         b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
4398                                         nContacts = maxContactCapacity;
4399                                 }
4400                                 contactOut->resize(nContacts);
4401                         }  //if nCompoundsPairs
4402                 }
4403         }  //contactClippingOnGpu
4404
4405         //printf("nContacts end = %d\n",nContacts);
4406
4407         //printf("frameCount = %d\n",frameCount++);
4408 }