2 Bullet Continuous Collision Detection and Physics Library
3 Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
16 bool findSeparatingAxisOnGpu = true;
17 bool splitSearchSepAxisConcave = false;
18 bool splitSearchSepAxisConvex = true;
19 bool useMprGpu = true; //use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling...
20 bool bvhTraversalKernelGPU = true;
21 bool findConcaveSeparatingAxisKernelGPU = true;
22 bool clipConcaveFacesAndFindContactsCPU = false; //false;//true;
23 bool clipConvexFacesAndFindContactsCPU = false; //false;//true;
24 bool reduceConcaveContactsOnGPU = true; //false;
25 bool reduceConvexContactsOnGPU = true; //false;
26 bool findConvexClippingFacesGPU = true;
27 bool useGjk = false; ///option for CPU/host testing, when findSeparatingAxisOnGpu = false
28 bool useGjkContacts = false; //////option for CPU/host testing when findSeparatingAxisOnGpu = false
30 static int myframecount = 0; ///for testing
32 ///This file was written by Erwin Coumans
33 ///Separating axis rest based on work from Pierre Terdiman, see
34 ///And contact clipping based on work from Simon Hobbs
36 //#define B3_DEBUG_SAT_FACE
38 //#define CHECK_ON_HOST
41 //#define PERSISTENT_CONTACTS_HOST
44 int b3g_actualSATPairTests = 0;
46 #include "b3ConvexHullContact.h"
47 #include <string.h> //memcpy
48 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
49 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h"
51 #include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h"
52 #include "Bullet3Geometry/b3AabbUtil.h"
54 typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;
56 #include <float.h> //for FLT_MAX
57 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
58 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
59 //#include "AdlQuaternion.h"
61 #include "kernels/satKernels.h"
62 #include "kernels/mprKernels.h"
64 #include "kernels/satConcaveKernels.h"
66 #include "kernels/satClipHullContacts.h"
67 #include "kernels/bvhTraversal.h"
68 #include "kernels/primitiveContacts.h"
70 #include "Bullet3Geometry/b3AabbUtil.h"
72 #define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl"
73 #define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl"
75 #define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl"
77 #define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl"
78 #define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl"
79 #define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl"
89 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h"
90 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h"
91 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h"
92 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h"
96 GpuSatCollision::GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q)
101 m_findSeparatingAxisKernel(0),
102 m_findSeparatingAxisVertexFaceKernel(0),
103 m_findSeparatingAxisEdgeEdgeKernel(0),
104 m_unitSphereDirections(m_context, m_queue),
106 m_totalContactsOut(m_context, m_queue),
107 m_sepNormals(m_context, m_queue),
108 m_dmins(m_context, m_queue),
110 m_hasSeparatingNormals(m_context, m_queue),
111 m_concaveSepNormals(m_context, m_queue),
112 m_concaveHasSeparatingNormals(m_context, m_queue),
113 m_numConcavePairsOut(m_context, m_queue),
115 m_gpuCompoundPairs(m_context, m_queue),
117 m_gpuCompoundSepNormals(m_context, m_queue),
118 m_gpuHasCompoundSepNormals(m_context, m_queue),
120 m_numCompoundPairsOut(m_context, m_queue)
122 m_totalContactsOut.push_back(0);
128 const char* mprSrc = mprKernelsCL;
130 const char* srcConcave = satConcaveKernelsCL;
131 char flags[1024] = {0};
132 //#ifdef CL_PLATFORM_INTEL
133 // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl");
135 m_mprPenetrationKernel = 0;
136 m_findSeparatingAxisUnitSphereKernel = 0;
140 cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, mprSrc, &errNum, flags, BT_NARROWPHASE_MPR_PATH);
141 b3Assert(errNum == CL_SUCCESS);
143 m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "mprPenetrationKernel", &errNum, mprProg);
144 b3Assert(m_mprPenetrationKernel);
145 b3Assert(errNum == CL_SUCCESS);
147 m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "findSeparatingAxisUnitSphereKernel", &errNum, mprProg);
148 b3Assert(m_findSeparatingAxisUnitSphereKernel);
149 b3Assert(errNum == CL_SUCCESS);
151 int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
152 m_unitSphereDirections.resize(numDirections);
153 m_unitSphereDirections.copyFromHostPointer(unitSphere162, numDirections, 0, true);
156 cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, satKernelsCL, &errNum, flags, BT_NARROWPHASE_SAT_PATH);
157 b3Assert(errNum == CL_SUCCESS);
159 cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcConcave, &errNum, flags, BT_NARROWPHASE_SAT_CONCAVE_PATH);
160 b3Assert(errNum == CL_SUCCESS);
162 m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisKernel", &errNum, satProg);
163 b3Assert(m_findSeparatingAxisKernel);
164 b3Assert(errNum == CL_SUCCESS);
166 m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisVertexFaceKernel", &errNum, satProg);
167 b3Assert(m_findSeparatingAxisVertexFaceKernel);
169 m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisEdgeEdgeKernel", &errNum, satProg);
170 b3Assert(m_findSeparatingAxisVertexFaceKernel);
172 m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findConcaveSeparatingAxisKernel", &errNum, satProg);
173 b3Assert(m_findConcaveSeparatingAxisKernel);
174 b3Assert(errNum == CL_SUCCESS);
176 m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisVertexFaceKernel", &errNum, satConcaveProg);
177 b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel);
178 b3Assert(errNum == CL_SUCCESS);
180 m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel", &errNum, satConcaveProg);
181 b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel);
182 b3Assert(errNum == CL_SUCCESS);
184 m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findCompoundPairsKernel", &errNum, satProg);
185 b3Assert(m_findCompoundPairsKernel);
186 b3Assert(errNum == CL_SUCCESS);
187 m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "processCompoundPairsKernel", &errNum, satProg);
188 b3Assert(m_processCompoundPairsKernel);
189 b3Assert(errNum == CL_SUCCESS);
194 const char* srcClip = satClipKernelsCL;
196 char flags[1024] = {0};
197 //#ifdef CL_PLATFORM_INTEL
198 // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
201 cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcClip, &errNum, flags, BT_NARROWPHASE_CLIPHULL_PATH);
202 b3Assert(errNum == CL_SUCCESS);
204 m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullKernel", &errNum, satClipContactsProg);
205 b3Assert(errNum == CL_SUCCESS);
207 m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipCompoundsHullHullKernel", &errNum, satClipContactsProg);
208 b3Assert(errNum == CL_SUCCESS);
210 m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "findClippingFacesKernel", &errNum, satClipContactsProg);
211 b3Assert(errNum == CL_SUCCESS);
213 m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipFacesAndFindContactsKernel", &errNum, satClipContactsProg);
214 b3Assert(errNum == CL_SUCCESS);
216 m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullConcaveConvexKernel", &errNum, satClipContactsProg);
217 b3Assert(errNum == CL_SUCCESS);
219 // m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
220 // b3Assert(errNum==CL_SUCCESS);
222 m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip,
223 "newContactReductionKernel", &errNum, satClipContactsProg);
224 b3Assert(errNum == CL_SUCCESS);
228 m_clipHullHullKernel = 0;
229 m_clipCompoundsHullHullKernel = 0;
230 m_findClippingFacesKernel = 0;
231 m_newContactReductionKernel = 0;
232 m_clipFacesAndFindContacts = 0;
233 m_clipHullHullConcaveConvexKernel = 0;
234 // m_extractManifoldAndAddContactKernel = 0;
239 const char* srcBvh = bvhTraversalKernelCL;
240 cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcBvh, &errNum, "", BT_NARROWPHASE_BVH_TRAVERSAL_PATH);
241 b3Assert(errNum == CL_SUCCESS);
243 m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcBvh, "bvhTraversalKernel", &errNum, bvhTraversalProg, "");
244 b3Assert(errNum == CL_SUCCESS);
248 const char* primitiveContactsSrc = primitiveContactsKernelsCL;
249 cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, primitiveContactsSrc, &errNum, "", BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH);
250 b3Assert(errNum == CL_SUCCESS);
252 m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "primitiveContactsKernel", &errNum, primitiveContactsProg, "");
253 b3Assert(errNum == CL_SUCCESS);
255 m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "findConcaveSphereContactsKernel", &errNum, primitiveContactsProg);
256 b3Assert(errNum == CL_SUCCESS);
257 b3Assert(m_findConcaveSphereContactsKernel);
259 m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "processCompoundPairsPrimitivesKernel", &errNum, primitiveContactsProg, "");
260 b3Assert(errNum == CL_SUCCESS);
261 b3Assert(m_processCompoundPairsPrimitivesKernel);
265 GpuSatCollision::~GpuSatCollision()
267 if (m_findSeparatingAxisVertexFaceKernel)
268 clReleaseKernel(m_findSeparatingAxisVertexFaceKernel);
270 if (m_findSeparatingAxisEdgeEdgeKernel)
271 clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel);
273 if (m_findSeparatingAxisUnitSphereKernel)
274 clReleaseKernel(m_findSeparatingAxisUnitSphereKernel);
276 if (m_mprPenetrationKernel)
277 clReleaseKernel(m_mprPenetrationKernel);
279 if (m_findSeparatingAxisKernel)
280 clReleaseKernel(m_findSeparatingAxisKernel);
282 if (m_findConcaveSeparatingAxisVertexFaceKernel)
283 clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel);
285 if (m_findConcaveSeparatingAxisEdgeEdgeKernel)
286 clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel);
288 if (m_findConcaveSeparatingAxisKernel)
289 clReleaseKernel(m_findConcaveSeparatingAxisKernel);
291 if (m_findCompoundPairsKernel)
292 clReleaseKernel(m_findCompoundPairsKernel);
294 if (m_processCompoundPairsKernel)
295 clReleaseKernel(m_processCompoundPairsKernel);
297 if (m_findClippingFacesKernel)
298 clReleaseKernel(m_findClippingFacesKernel);
300 if (m_clipFacesAndFindContacts)
301 clReleaseKernel(m_clipFacesAndFindContacts);
302 if (m_newContactReductionKernel)
303 clReleaseKernel(m_newContactReductionKernel);
304 if (m_primitiveContactsKernel)
305 clReleaseKernel(m_primitiveContactsKernel);
307 if (m_findConcaveSphereContactsKernel)
308 clReleaseKernel(m_findConcaveSphereContactsKernel);
310 if (m_processCompoundPairsPrimitivesKernel)
311 clReleaseKernel(m_processCompoundPairsPrimitivesKernel);
313 if (m_clipHullHullKernel)
314 clReleaseKernel(m_clipHullHullKernel);
315 if (m_clipCompoundsHullHullKernel)
316 clReleaseKernel(m_clipCompoundsHullHullKernel);
318 if (m_clipHullHullConcaveConvexKernel)
319 clReleaseKernel(m_clipHullHullConcaveConvexKernel);
320 // if (m_extractManifoldAndAddContactKernel)
321 // clReleaseKernel(m_extractManifoldAndAddContactKernel);
323 if (m_bvhTraversalKernel)
324 clReleaseKernel(m_bvhTraversalKernel);
327 struct MyTriangleCallback : public b3NodeOverlapCallback
332 virtual void processNode(int subPart, int triangleIndex)
334 printf("bodyIndexA %d, bodyIndexB %d\n", m_bodyIndexA, m_bodyIndexB);
335 printf("triangleIndex %d\n", triangleIndex);
339 #define float4 b3Vector3
340 #define make_float4(x, y, z, w) b3MakeVector3(x, y, z, w)
342 float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace)
346 float dist = dot3F4(n, point) + planeEqn[3];
347 *closestPointOnFace = point - dist * n;
351 #define cross3(a, b) (a.cross(b))
352 b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn)
357 tr.setRotation(*orn);
358 b3Vector3 res = tr(*v);
362 inline bool IsPointInPolygon(const float4& p,
363 const b3GpuFace* face,
364 const float4* baseVertex,
365 const int* convexIndices,
374 float4 plane = b3MakeVector3(face->m_plane.x, face->m_plane.y, face->m_plane.z, 0.f);
376 if (face->m_numIndices < 2)
379 float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices - 1]];
382 for (unsigned i = 0; i != face->m_numIndices; ++i)
385 float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];
389 v = cross3(ab, plane);
391 if (b3Dot(ap, v) > 0.f)
393 float ab_m2 = b3Dot(ab, ab);
394 float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f;
406 out[0].x = s * a.x + rt * b.x;
407 out[0].y = s * a.y + rt * b.y;
408 out[0].z = s * a.z + rt * b.z;
416 #define normalize3(a) (a.normalize())
418 int extractManifoldSequentialGlobal(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)
429 float4 center = b3MakeVector3(0, 0, 0, 0);
431 for (int i = 0; i < nPoints; i++)
433 center /= (float)nPoints;
436 // sample 4 directions
438 float4 aVector = p[0] - center;
439 float4 u = cross3(nearNormal, aVector);
440 float4 v = cross3(nearNormal, u);
444 //keep point with deepest penetration
445 float minW = FLT_MAX;
456 for (int ie = 0; ie < nPoints; ie++)
464 float4 r = p[ie] - center;
469 contactIdx[0].x = ie;
476 contactIdx[0].y = ie;
483 contactIdx[0].z = ie;
490 contactIdx[0].w = ie;
494 if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
496 //replace the first contact with minimum (todo: replace contact with least penetration)
497 contactIdx[0].x = minIndex;
503 #define MAX_VERTS 1024
505 inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max)
509 int numVerts = hull.m_numVertices;
511 const float4 localDir = b3QuatRotate(orn.inverse(), dir);
513 b3Scalar offset = dot3F4(pos, dir);
515 for (int i = 0; i < numVerts; i++)
517 //b3Vector3 pt = trans * vertices[m_vertexOffset+i];
518 //b3Scalar dp = pt.dot(dir);
519 //b3Vector3 vertex = vertices[hull.m_vertexOffset+i];
520 b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset + i], localDir);
522 if (dp < min) min = dp;
523 if (dp > max) max = dp;
535 static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
536 const float4& posA, const b3Quaternion& ornA,
537 const float4& posB, const b3Quaternion& ornB,
538 const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB, b3Scalar& depth)
542 project(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0);
543 project(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1);
545 if (Max0 < Min1 || Max1 < Min0)
548 b3Scalar d0 = Max0 - Min1;
550 b3Scalar d1 = Max1 - Min0;
552 depth = d0 < d1 ? d0 : d1;
556 inline bool IsAlmostZero(const b3Vector3& v)
558 if (fabsf(v.x) > 1e-6 || fabsf(v.y) > 1e-6 || fabsf(v.z) > 1e-6) return false;
562 static bool findSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
564 const b3Quaternion& ornA,
566 const b3Quaternion& ornB,
567 const b3AlignedObjectArray<b3Vector3>& verticesA,
568 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
569 const b3AlignedObjectArray<b3GpuFace>& facesA,
570 const b3AlignedObjectArray<int>& indicesA,
571 const b3AlignedObjectArray<b3Vector3>& verticesB,
572 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
573 const b3AlignedObjectArray<b3GpuFace>& facesB,
574 const b3AlignedObjectArray<int>& indicesB,
578 B3_PROFILE("findSeparatingAxis");
580 b3g_actualSATPairTests++;
585 //#ifdef TEST_INTERNAL_OBJECTS
586 float4 c0local = (float4&)hullA.m_localCenter;
587 float4 c0 = transform(&c0local, &posA, &ornA);
588 float4 c1local = (float4&)hullB.m_localCenter;
589 float4 c1 = transform(&c1local, &posB, &ornB);
590 const float4 deltaC2 = c0 - c1;
593 b3Scalar dmin = FLT_MAX;
594 int curPlaneTests = 0;
596 int numFacesA = hullA.m_numFaces;
597 // Test normals from hullA
598 for (int i = 0; i < numFacesA; i++)
600 const float4& normal = (float4&)facesA[hullA.m_faceOffset + i].m_plane;
601 float4 faceANormalWS = b3QuatRotate(ornA, normal);
603 if (dot3F4(deltaC2, faceANormalWS) < 0)
604 faceANormalWS *= -1.f;
607 #ifdef TEST_INTERNAL_OBJECTS
609 if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))
615 if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d))
621 sep = (b3Vector3&)faceANormalWS;
625 int numFacesB = hullB.m_numFaces;
626 // Test normals from hullB
627 for (int i = 0; i < numFacesB; i++)
629 float4 normal = (float4&)facesB[hullB.m_faceOffset + i].m_plane;
630 float4 WorldNormal = b3QuatRotate(ornB, normal);
632 if (dot3F4(deltaC2, WorldNormal) < 0)
637 #ifdef TEST_INTERNAL_OBJECTS
639 if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin))
645 if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d))
651 sep = (b3Vector3&)WorldNormal;
657 for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++)
659 const float4& edge0 = (float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0];
660 float4 edge0World = b3QuatRotate(ornA, (float4&)edge0);
662 for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++)
664 const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1];
665 float4 edge1World = b3QuatRotate(ornB, (float4&)edge1);
667 float4 crossje = cross3(edge0World, edge1World);
670 if (!IsAlmostZero((b3Vector3&)crossje))
672 crossje = normalize3(crossje);
673 if (dot3F4(deltaC2, crossje) < 0)
676 #ifdef TEST_INTERNAL_OBJECTS
678 if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin))
684 if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist))
690 sep = (b3Vector3&)crossje;
696 if ((dot3F4(-deltaC2, (float4&)sep)) > 0.0f)
702 bool findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
703 const b3Float4& posA1,
705 const b3Float4& posB1,
707 const b3Float4& DeltaC2,
708 __global const b3AlignedObjectArray<float4>& vertices,
709 __global const b3AlignedObjectArray<float4>& uniqueEdges,
710 __global const b3AlignedObjectArray<b3GpuFace>& faces,
711 __global const b3AlignedObjectArray<int>& indices,
715 // int i = get_global_id(0);
722 //int curPlaneTests=0;
726 for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++)
728 const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset + e0];
729 float4 edge0World = b3QuatRotate(ornA, edge0);
731 for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++)
733 const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset + e1];
734 float4 edge1World = b3QuatRotate(ornB, edge1);
736 float4 crossje = cross3(edge0World, edge1World);
739 if (!IsAlmostZero(crossje))
741 crossje = normalize3(crossje);
742 if (dot3F4(DeltaC2, crossje) < 0)
750 project(*hullA, posA, ornA, crossje, vertices, Min0, Max0);
751 project(*hullB, posB, ornB, crossje, vertices, Min1, Max1);
753 if (Max0 < Min1 || Max1 < Min0)
756 float d0 = Max0 - Min1;
757 float d1 = Max1 - Min0;
758 dist = d0 < d1 ? d0 : d1;
771 if ((dot3F4(-DeltaC2, *sep)) > 0.0f)
778 __inline float4 lerp3(const float4& a, const float4& b, float t)
780 return b3MakeVector3(a.x + (b.x - a.x) * t,
781 a.y + (b.y - a.y) * t,
782 a.z + (b.z - a.z) * t,
786 // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
787 int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS, float planeEqWS, float4* ppVtxOut)
795 float4 firstVertex = pVtxIn[numVertsIn - 1];
796 float4 endVertex = pVtxIn[0];
798 ds = dot3F4(planeNormalWS, firstVertex) + planeEqWS;
800 for (ve = 0; ve < numVertsIn; ve++)
802 endVertex = pVtxIn[ve];
804 de = dot3F4(planeNormalWS, endVertex) + planeEqWS;
810 // Start < 0, end < 0, so output endVertex
811 ppVtxOut[numVertsOut++] = endVertex;
815 // Start < 0, end >= 0, so output intersection
816 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
823 // Start >= 0, end < 0 so output intersection and end
824 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
825 ppVtxOut[numVertsOut++] = endVertex;
828 firstVertex = endVertex;
834 int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA,
835 const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1,
836 float4* worldVertsB2, int capacityWorldVertsB2,
837 const float minDist, float maxDist,
838 const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
839 //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB,
843 int numContactsOut = 0;
845 float4* pVtxIn = worldVertsB1;
846 float4* pVtxOut = worldVertsB2;
848 int numVertsIn = numWorldVertsB1;
851 int closestFaceA = -1;
853 float dmin = FLT_MAX;
854 for (int face = 0; face < hullA->m_numFaces; face++)
856 const float4 Normal = b3MakeVector3(
857 facesA[hullA->m_faceOffset + face].m_plane.x,
858 facesA[hullA->m_faceOffset + face].m_plane.y,
859 facesA[hullA->m_faceOffset + face].m_plane.z, 0.f);
860 const float4 faceANormalWS = b3QuatRotate(ornA, Normal);
862 float d = dot3F4(faceANormalWS, separatingNormal);
870 if (closestFaceA < 0)
871 return numContactsOut;
873 b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA];
875 // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
876 // int numContacts = numWorldVertsB1;
877 int numVerticesA = polyA.m_numIndices;
878 for (int e0 = 0; e0 < numVerticesA; e0++)
880 const float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]];
881 const float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]];
882 const float4 edge0 = a - b;
883 const float4 WorldEdge0 = b3QuatRotate(ornA, edge0);
884 float4 planeNormalA = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
885 float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA);
887 float4 planeNormalWS1 = -cross3(WorldEdge0, worldPlaneAnormal1);
888 float4 worldA1 = transform(&a, &posA, &ornA);
889 float planeEqWS1 = -dot3F4(worldA1, planeNormalWS1);
891 float4 planeNormalWS = planeNormalWS1;
892 float planeEqWS = planeEqWS1;
895 //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
896 numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut);
898 //btSwap(pVtxIn,pVtxOut);
899 float4* tmp = pVtxOut;
902 numVertsIn = numVertsOut;
906 // only keep points that are behind the witness face
908 float4 localPlaneNormal = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
909 float localPlaneEq = polyA.m_plane.w;
910 float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal);
911 float planeEqWS = localPlaneEq - dot3F4(planeNormalWS, posA);
912 for (int i = 0; i < numVertsIn; i++)
914 float depth = dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS;
915 if (depth <= minDist)
919 if (numContactsOut < contactCapacity)
921 if (depth <= maxDist)
923 float4 pointInWorld = pVtxIn[i];
924 //resultOut.addContactPoint(separatingNormal,point,depth);
925 contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth);
926 //printf("depth=%f\n",depth);
931 b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity);
936 return numContactsOut;
939 static int clipHullAgainstHull(const float4& separatingNormal,
940 const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
941 const float4& posA, const b3Quaternion& ornA, const float4& posB, const b3Quaternion& ornB,
942 float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,
943 const float minDist, float maxDist,
944 const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
945 const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB,
950 int numContactsOut = 0;
951 int numWorldVertsB1 = 0;
953 B3_PROFILE("clipHullAgainstHull");
955 // float curMaxDist=maxDist;
956 int closestFaceB = -1;
957 float dmax = -FLT_MAX;
960 //B3_PROFILE("closestFaceB");
961 if (hullB.m_numFaces != 1)
965 static bool once = true;
966 //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z);
968 for (int face = 0; face < hullB.m_numFaces; face++)
970 #ifdef BT_DEBUG_SAT_FACE
972 printf("face %d\n", face);
973 const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face];
976 for (int i = 0; i < faceB->m_numIndices; i++)
978 float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]];
979 printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z);
982 #endif //BT_DEBUG_SAT_FACE \
983 //if (facesB[hullB.m_faceOffset+face].m_numIndices>2)
985 const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x,
986 facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f);
987 const float4 WorldNormal = b3QuatRotate(ornB, Normal);
988 #ifdef BT_DEBUG_SAT_FACE
990 printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z);
992 float d = dot3F4(WorldNormal, separatingNormal);
1003 b3Assert(closestFaceB >= 0);
1005 //B3_PROFILE("worldVertsB1");
1006 const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB];
1007 const int numVertices = polyB.m_numIndices;
1008 for (int e0 = 0; e0 < numVertices; e0++)
1010 const float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]];
1011 worldVertsB1[numWorldVertsB1++] = transform(&b, &posB, &ornB);
1015 if (closestFaceB >= 0)
1017 //B3_PROFILE("clipFaceAgainstHull");
1018 numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA,
1020 worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist,
1021 verticesA, facesA, indicesA,
1022 contactsOut, contactCapacity);
1025 return numContactsOut;
1028 #define PARALLEL_SUM(v, n) \
1029 for (int j = 1; j < n; j++) v[0] += v[j];
1030 #define PARALLEL_DO(execution, n) \
1031 for (int ie = 0; ie < n; ie++) \
1035 #define REDUCE_MAX(v, n) \
1038 for (int offset = 0; offset < n; offset++) v[i] = (v[i].y > v[i + offset].y) ? v[i] : v[i + offset]; \
1040 #define REDUCE_MIN(v, n) \
1043 for (int offset = 0; offset < n; offset++) v[i] = (v[i].y < v[i + offset].y) ? v[i] : v[i + offset]; \
1046 int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)
1057 float4 center = make_float4(0, 0, 0, 0);
1059 for (int i = 0; i < nPoints; i++)
1061 center /= (float)nPoints;
1064 // sample 4 directions
1066 float4 aVector = p[0] - center;
1067 float4 u = cross3(nearNormal, aVector);
1068 float4 v = cross3(nearNormal, u);
1072 //keep point with deepest penetration
1073 float minW = FLT_MAX;
1078 maxDots.x = FLT_MIN;
1079 maxDots.y = FLT_MIN;
1080 maxDots.z = FLT_MIN;
1081 maxDots.w = FLT_MIN;
1084 for (int ie = 0; ie < nPoints; ie++)
1092 float4 r = p[ie] - center;
1097 contactIdx[0].x = ie;
1104 contactIdx[0].y = ie;
1111 contactIdx[0].z = ie;
1118 contactIdx[0].w = ie;
1122 if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
1124 //replace the first contact with minimum (todo: replace contact with least penetration)
1125 contactIdx[0].x = minIndex;
1131 int clipHullHullSingle(
1132 int bodyIndexA, int bodyIndexB,
1134 const b3Quaternion& ornA,
1136 const b3Quaternion& ornB,
1138 int collidableIndexA, int collidableIndexB,
1140 const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf,
1141 b3AlignedObjectArray<b3Contact4>* globalContactOut,
1144 const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA,
1145 const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB,
1147 const b3AlignedObjectArray<b3Vector3>& verticesA,
1148 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
1149 const b3AlignedObjectArray<b3GpuFace>& facesA,
1150 const b3AlignedObjectArray<int>& indicesA,
1152 const b3AlignedObjectArray<b3Vector3>& verticesB,
1153 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
1154 const b3AlignedObjectArray<b3GpuFace>& facesB,
1155 const b3AlignedObjectArray<int>& indicesB,
1157 const b3AlignedObjectArray<b3Collidable>& hostCollidablesA,
1158 const b3AlignedObjectArray<b3Collidable>& hostCollidablesB,
1159 const b3Vector3& sepNormalWorldSpace,
1160 int maxContactCapacity)
1162 int contactIndex = -1;
1163 b3ConvexPolyhedronData hullA, hullB;
1165 b3Collidable colA = hostCollidablesA[collidableIndexA];
1166 hullA = hostConvexDataA[colA.m_shapeIndex];
1167 //printf("numvertsA = %d\n",hullA.m_numVertices);
1169 b3Collidable colB = hostCollidablesB[collidableIndexB];
1170 hullB = hostConvexDataB[colB.m_shapeIndex];
1171 //printf("numvertsB = %d\n",hullB.m_numVertices);
1173 float4 contactsOut[MAX_VERTS];
1174 int localContactCapacity = MAX_VERTS;
1177 b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x));
1178 b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x));
1182 float4 worldVertsB1[MAX_VERTS];
1183 float4 worldVertsB2[MAX_VERTS];
1184 int capacityWorldVerts = MAX_VERTS;
1186 float4 hostNormal = make_float4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f);
1187 int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex;
1188 int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex;
1190 b3Scalar minDist = -1;
1191 b3Scalar maxDist = 0.;
1193 b3Transform trA, trB;
1195 //B3_PROFILE("transform computation");
1196 //trA.setIdentity();
1197 trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z));
1198 trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w));
1200 //trB.setIdentity();
1201 trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z));
1202 trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w));
1205 b3Quaternion trAorn = trA.getRotation();
1206 b3Quaternion trBorn = trB.getRotation();
1208 int numContactsOut = clipHullAgainstHull(hostNormal,
1209 hostConvexDataA.at(shapeA),
1210 hostConvexDataB.at(shapeB),
1211 (float4&)trA.getOrigin(), (b3Quaternion&)trAorn,
1212 (float4&)trB.getOrigin(), (b3Quaternion&)trBorn,
1213 worldVertsB1, worldVertsB2, capacityWorldVerts,
1215 verticesA, facesA, indicesA,
1216 verticesB, facesB, indicesB,
1218 contactsOut, localContactCapacity);
1220 if (numContactsOut > 0)
1222 B3_PROFILE("overlap");
1224 float4 normalOnSurfaceB = (float4&)hostNormal;
1235 // B3_PROFILE("extractManifold");
1236 numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx);
1239 b3Assert(numPoints);
1241 if (nContacts < maxContactCapacity)
1243 contactIndex = nContacts;
1244 globalContactOut->expand();
1245 b3Contact4& contact = globalContactOut->at(nContacts);
1246 contact.m_batchIdx = 0; //i;
1247 contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA;
1248 contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;
1250 contact.m_frictionCoeffCmp = 45874;
1251 contact.m_restituitionCoeffCmp = 0;
1253 // float distance = 0.f;
1254 for (int p = 0; p < numPoints; p++)
1256 contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]]; //check if it is actually on B
1257 contact.m_worldNormalOnB = normalOnSurfaceB;
1259 //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);
1260 contact.m_worldNormalOnB.w = (b3Scalar)numPoints;
1265 b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity);
1269 return contactIndex;
1272 void computeContactPlaneConvex(int pairIndex,
1273 int bodyIndexA, int bodyIndexB,
1274 int collidableIndexA, int collidableIndexB,
1275 const b3RigidBodyData* rigidBodies,
1276 const b3Collidable* collidables,
1277 const b3ConvexPolyhedronData* convexShapes,
1278 const b3Vector3* convexVertices,
1279 const int* convexIndices,
1280 const b3GpuFace* faces,
1281 b3Contact4* globalContactsOut,
1282 int& nGlobalContactsOut,
1283 int maxContactCapacity)
1285 int shapeIndex = collidables[collidableIndexB].m_shapeIndex;
1286 const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex];
1288 b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;
1289 b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
1290 b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
1291 b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
1293 // int numContactsOut = 0;
1294 // int numWorldVertsB1= 0;
1296 b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;
1297 b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z);
1298 b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);
1299 float planeConstant = planeEq.w;
1300 b3Transform convexWorldTransform;
1301 convexWorldTransform.setIdentity();
1302 convexWorldTransform.setOrigin(posB);
1303 convexWorldTransform.setRotation(ornB);
1304 b3Transform planeTransform;
1305 planeTransform.setIdentity();
1306 planeTransform.setOrigin(posA);
1307 planeTransform.setRotation(ornA);
1309 b3Transform planeInConvex;
1310 planeInConvex = convexWorldTransform.inverse() * planeTransform;
1311 b3Transform convexInPlane;
1312 convexInPlane = planeTransform.inverse() * convexWorldTransform;
1314 b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;
1315 float maxDot = -1e30;
1319 #define MAX_PLANE_CONVEX_POINTS 64
1321 b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
1325 contactIdx.s[0] = 0;
1326 contactIdx.s[1] = 1;
1327 contactIdx.s[2] = 2;
1328 contactIdx.s[3] = 3;
1330 for (int i = 0; i < hullB->m_numVertices; i++)
1332 b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];
1333 float curDot = vtx.dot(planeNormalInConvex);
1335 if (curDot > maxDot)
1340 //make sure the deepest points is always included
1341 if (numPoints == MAX_PLANE_CONVEX_POINTS)
1345 if (numPoints < MAX_PLANE_CONVEX_POINTS)
1347 b3Vector3 vtxWorld = convexWorldTransform * vtx;
1348 b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld;
1349 float dist = planeNormal.dot(vtxInPlane) - planeConstant;
1353 contactPoints[numPoints] = vtxWorld;
1359 int numReducedPoints = 0;
1361 numReducedPoints = numPoints;
1365 numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);
1368 // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
1370 if (numReducedPoints > 0)
1372 if (nGlobalContactsOut < maxContactCapacity)
1374 dstIdx = nGlobalContactsOut;
1375 nGlobalContactsOut++;
1377 b3Contact4* c = &globalContactsOut[dstIdx];
1378 c->m_worldNormalOnB = -planeNormalWorld;
1379 c->setFrictionCoeff(0.7);
1380 c->setRestituitionCoeff(0.f);
1382 c->m_batchIdx = pairIndex;
1383 c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
1384 c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
1385 for (int i = 0; i < numReducedPoints; i++)
1387 b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];
1388 c->m_worldPosB[i] = pOnB1;
1390 c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints;
1391 } //if (dstIdx < numPairs)
1394 // printf("computeContactPlaneConvex\n");
1397 B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin)
1401 (b3Scalar)(vecIn[0]) / (quantization.x),
1402 (b3Scalar)(vecIn[1]) / (quantization.y),
1403 (b3Scalar)(vecIn[2]) / (quantization.z));
1404 vecOut += bvhAabbMin;
1408 void traverseTreeTree()
1412 #include "Bullet3Common/shared/b3Mat3x3.h"
1414 int numAabbChecks = 0;
1415 int maxNumAabbChecks = 0;
1419 __kernel void findCompoundPairsKernel(
1423 int collidableIndexA,
1424 int collidableIndexB,
1425 __global const b3RigidBodyData* rigidBodies,
1426 __global const b3Collidable* collidables,
1427 __global const b3ConvexPolyhedronData* convexShapes,
1428 __global const b3AlignedObjectArray<b3Float4>& vertices,
1429 __global const b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace,
1430 __global const b3AlignedObjectArray<b3Aabb>& aabbsLocalSpace,
1431 __global const b3GpuChildShape* gpuChildShapes,
1432 __global b3Int4* gpuCompoundPairsOut,
1433 __global int* numCompoundPairsOut,
1434 int maxNumCompoundPairsCapacity,
1435 b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
1436 b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
1437 b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)
1440 maxNumAabbChecks = 0;
1441 // int i = pairIndex;
1443 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1444 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1446 //once the broadphase avoids static-static pairs, we can remove this test
1447 if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0))
1452 if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1454 int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;
1455 int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;
1456 int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees;
1457 int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset;
1458 int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset;
1460 int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees;
1462 float4 posA = rigidBodies[bodyIndexA].m_pos;
1463 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1466 transA.setIdentity();
1467 transA.setOrigin(posA);
1468 transA.setRotation(ornA);
1470 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1471 float4 posB = rigidBodies[bodyIndexB].m_pos;
1474 transB.setIdentity();
1475 transB.setOrigin(posB);
1476 transB.setRotation(ornB);
1478 for (int p = 0; p < numSubTreesA; p++)
1480 b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA + p];
1481 //bvhInfoCPU[bvhA].m_quantization
1482 b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1483 b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1485 b3Vector3 aabbAMinOut, aabbAMaxOut;
1487 b3TransformAabb2(treeAminLocal, treeAmaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1489 for (int q = 0; q < numSubTreesB; q++)
1491 b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB + q];
1493 b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1494 b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1496 b3Vector3 aabbBMinOut, aabbBMaxOut;
1498 b3TransformAabb2(treeBminLocal, treeBmaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1501 bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1504 int startNodeIndexA = subtreeA.m_rootNodeIndex + bvhInfoCPU[bvhA].m_nodeOffset;
1505 // int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;
1507 int startNodeIndexB = subtreeB.m_rootNodeIndex + bvhInfoCPU[bvhB].m_nodeOffset;
1508 // int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;
1510 b3AlignedObjectArray<b3Int2> nodeStack;
1512 node0.x = startNodeIndexA;
1513 node0.y = startNodeIndexB;
1515 int maxStackDepth = 1024;
1516 nodeStack.resize(maxStackDepth);
1518 nodeStack[depth++] = node0;
1522 if (depth > maxDepth)
1525 printf("maxDepth=%d\n", maxDepth);
1527 b3Int2 node = nodeStack[--depth];
1529 b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1530 b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1532 b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1533 b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1536 b3Vector3 aabbAMinOut, aabbAMaxOut;
1537 b3TransformAabb2(aMinLocal, aMaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1539 b3Vector3 aabbBMinOut, aabbBMaxOut;
1540 b3TransformAabb2(bMinLocal, bMaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1543 bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1546 bool isLeafA = treeNodesCPU[node.x].isLeafNode();
1547 bool isLeafB = treeNodesCPU[node.y].isLeafNode();
1548 bool isInternalA = !isLeafA;
1549 bool isInternalB = !isLeafB;
1551 //fail, even though it might hit two leaf nodes
1552 if (depth + 4 > maxStackDepth && !(isLeafA && isLeafB))
1554 b3Error("Error: traversal exceeded maxStackDepth\n");
1560 int nodeAleftChild = node.x + 1;
1561 bool isNodeALeftChildLeaf = treeNodesCPU[node.x + 1].isLeafNode();
1562 int nodeArightChild = isNodeALeftChildLeaf ? node.x + 2 : node.x + 1 + treeNodesCPU[node.x + 1].getEscapeIndex();
1566 int nodeBleftChild = node.y + 1;
1567 bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode();
1568 int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex();
1570 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);
1571 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);
1572 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);
1573 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);
1577 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, node.y);
1578 nodeStack[depth++] = b3MakeInt2(nodeArightChild, node.y);
1585 int nodeBleftChild = node.y + 1;
1586 bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode();
1587 int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex();
1588 nodeStack[depth++] = b3MakeInt2(node.x, nodeBleftChild);
1589 nodeStack[depth++] = b3MakeInt2(node.x, nodeBrightChild);
1593 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1594 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1596 int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex();
1597 int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex();
1598 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);
1604 maxNumAabbChecks = b3Max(numAabbChecks, maxNumAabbChecks);
1612 if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) || (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1614 if (collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
1616 int numChildrenA = collidables[collidableIndexA].m_numChildShapes;
1617 for (int c = 0; c < numChildrenA; c++)
1619 int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex + c;
1620 int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1622 float4 posA = rigidBodies[bodyIndexA].m_pos;
1623 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1624 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1625 b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1626 float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1627 b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1629 b3Aabb aabbA = aabbsLocalSpace[childColIndexA];
1632 transA.setIdentity();
1633 transA.setOrigin(newPosA);
1634 transA.setRotation(newOrnA);
1635 b3Scalar margin = 0.0f;
1637 b3Vector3 aabbAMinOut, aabbAMaxOut;
1639 b3TransformAabb2((const b3Float4&)aabbA.m_min, (const b3Float4&)aabbA.m_max, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1641 if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
1643 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1644 for (int b = 0; b < numChildrenB; b++)
1646 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;
1647 int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1648 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1649 float4 posB = rigidBodies[bodyIndexB].m_pos;
1650 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1651 b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1652 float4 newPosB = transform(&childPosB, &posB, &ornB);
1653 b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1655 b3Aabb aabbB = aabbsLocalSpace[childColIndexB];
1658 transB.setIdentity();
1659 transB.setOrigin(newPosB);
1660 transB.setRotation(newOrnB);
1662 b3Vector3 aabbBMinOut, aabbBMaxOut;
1663 b3TransformAabb2((const b3Float4&)aabbB.m_min, (const b3Float4&)aabbB.m_max, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1666 bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1670 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1671 float dmin = FLT_MAX;
1672 float4 posA = newPosA;
1674 float4 posB = newPosB;
1676 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1677 b3Quat ornA = newOrnA;
1678 float4 c0 = transform(&c0local, &posA, &ornA);
1679 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1680 b3Quat ornB =newOrnB;
1681 float4 c1 = transform(&c1local,&posB,&ornB);
1682 const float4 DeltaC2 = c0 - c1;
1685 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1686 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1688 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);
1693 } //if (collidables[collidableIndexB].
1694 else //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1698 // int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1699 // float dmin = FLT_MAX;
1700 float4 posA = newPosA;
1702 float4 posB = rigidBodies[bodyIndexB].m_pos;
1704 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1705 b3Quat ornA = newOrnA;
1707 c0 = transform(&c0local, &posA, &ornA);
1708 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1709 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1711 c1 = transform(&c1local, &posB, &ornB);
1712 // const float4 DeltaC2 = c0 - c1;
1715 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1716 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1718 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, -1);
1719 } //if (compoundPairIdx<maxNumCompoundPairsCapacity)
1722 } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1723 } //for (int b=0;b<numChildrenB;b++)
1725 } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1726 if ((collidables[collidableIndexA].m_shapeType != SHAPE_CONCAVE_TRIMESH) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1728 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1729 for (int b = 0; b < numChildrenB; b++)
1731 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;
1732 int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1733 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1734 float4 posB = rigidBodies[bodyIndexB].m_pos;
1735 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1736 b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1737 float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1738 b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1740 int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1742 //////////////////////////////////////
1746 // int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1747 // float dmin = FLT_MAX;
1748 float4 posA = rigidBodies[bodyIndexA].m_pos;
1750 float4 posB = newPosB;
1752 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1753 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1755 c0 = transform(&c0local, &posA, &ornA);
1756 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1757 b3Quat ornB = newOrnB;
1759 c1 = transform(&c1local, &posB, &ornB);
1760 // const float4 DeltaC2 = c0 - c1;
1762 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1763 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1765 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, -1, childShapeIndexB);
1766 } //fi (compoundPairIdx<maxNumCompoundPairsCapacity)
1769 } //for (int b=0;b<numChildrenB;b++)
1771 } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1773 } //fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1777 __kernel void processCompoundPairsKernel(__global const b3Int4* gpuCompoundPairs,
1778 __global const b3RigidBodyData* rigidBodies,
1779 __global const b3Collidable* collidables,
1780 __global const b3ConvexPolyhedronData* convexShapes,
1781 __global const b3AlignedObjectArray<b3Float4>& vertices,
1782 __global const b3AlignedObjectArray<b3Float4>& uniqueEdges,
1783 __global const b3AlignedObjectArray<b3GpuFace>& faces,
1784 __global const b3AlignedObjectArray<int>& indices,
1785 __global b3Aabb* aabbs,
1786 __global const b3GpuChildShape* gpuChildShapes,
1787 __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut,
1788 __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut,
1789 int numCompoundPairs,
1792 // int i = get_global_id(0);
1793 if (i < numCompoundPairs)
1795 int bodyIndexA = gpuCompoundPairs[i].x;
1796 int bodyIndexB = gpuCompoundPairs[i].y;
1798 int childShapeIndexA = gpuCompoundPairs[i].z;
1799 int childShapeIndexB = gpuCompoundPairs[i].w;
1801 int collidableIndexA = -1;
1802 int collidableIndexB = -1;
1804 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1805 float4 posA = rigidBodies[bodyIndexA].m_pos;
1807 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1808 float4 posB = rigidBodies[bodyIndexB].m_pos;
1810 if (childShapeIndexA >= 0)
1812 collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1813 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1814 b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1815 float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1816 b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1822 collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1825 if (childShapeIndexB >= 0)
1827 collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1828 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1829 b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1830 float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1831 b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1837 collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1840 gpuHasCompoundSepNormalsOut[i] = 0;
1842 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1843 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1845 int shapeTypeA = collidables[collidableIndexA].m_shapeType;
1846 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
1848 if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))
1853 int hasSeparatingAxis = 5;
1855 // int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1856 float dmin = FLT_MAX;
1859 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1860 float4 c0 = transform(&c0local, &posA, &ornA);
1861 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1862 float4 c1 = transform(&c1local, &posB, &ornB);
1863 const float4 DeltaC2 = c0 - c1;
1864 float4 sepNormal = make_float4(1, 0, 0, 0);
1865 // bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
1866 bool sepA = findSeparatingAxis(convexShapes[shapeIndexA], convexShapes[shapeIndexB], posA, ornA, posB, ornB, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin);
1868 hasSeparatingAxis = 4;
1871 hasSeparatingAxis = 0;
1875 bool sepB = findSeparatingAxis(convexShapes[shapeIndexB], convexShapes[shapeIndexA], posB, ornB, posA, ornA, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin);
1879 hasSeparatingAxis = 0;
1883 bool sepEE = findSeparatingAxisEdgeEdge(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB], posA, ornA, posB, ornB, DeltaC2, vertices, uniqueEdges, faces, indices, &sepNormal, &dmin);
1886 gpuCompoundSepNormalsOut[i] = sepNormal; //fastNormalize4(sepNormal);
1887 gpuHasCompoundSepNormalsOut[i] = 1;
1894 __kernel void clipCompoundsHullHullKernel(__global const b3Int4* gpuCompoundPairs,
1895 __global const b3RigidBodyData* rigidBodies,
1896 __global const b3Collidable* collidables,
1897 __global const b3ConvexPolyhedronData* convexShapes,
1898 __global const b3AlignedObjectArray<b3Float4>& vertices,
1899 __global const b3AlignedObjectArray<b3Float4>& uniqueEdges,
1900 __global const b3AlignedObjectArray<b3GpuFace>& faces,
1901 __global const b3AlignedObjectArray<int>& indices,
1902 __global const b3GpuChildShape* gpuChildShapes,
1903 __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut,
1904 __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut,
1905 __global struct b3Contact4Data* globalContactsOut,
1906 int* nGlobalContactsOut,
1907 int numCompoundPairs, int maxContactCapacity, int i)
1909 // int i = get_global_id(0);
1912 float4 worldVertsB1[64];
1913 float4 worldVertsB2[64];
1914 int capacityWorldVerts = 64;
1916 float4 localContactsOut[64];
1917 int localContactCapacity = 64;
1919 float minDist = -1e30f;
1920 float maxDist = 0.0f;
1922 if (i < numCompoundPairs)
1924 if (gpuHasCompoundSepNormalsOut[i])
1926 int bodyIndexA = gpuCompoundPairs[i].x;
1927 int bodyIndexB = gpuCompoundPairs[i].y;
1929 int childShapeIndexA = gpuCompoundPairs[i].z;
1930 int childShapeIndexB = gpuCompoundPairs[i].w;
1932 int collidableIndexA = -1;
1933 int collidableIndexB = -1;
1935 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1936 float4 posA = rigidBodies[bodyIndexA].m_pos;
1938 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1939 float4 posB = rigidBodies[bodyIndexB].m_pos;
1941 if (childShapeIndexA >= 0)
1943 collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1944 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1945 b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1946 float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1947 b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1953 collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1956 if (childShapeIndexB >= 0)
1958 collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1959 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1960 b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1961 float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1962 b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1968 collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1971 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1972 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1974 int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],
1975 convexShapes[shapeIndexA], convexShapes[shapeIndexB],
1978 worldVertsB1, worldVertsB2, capacityWorldVerts,
1980 vertices, faces, indices,
1981 vertices, faces, indices,
1982 localContactsOut, localContactCapacity);
1984 if (numLocalContactsOut > 0)
1986 float4 normal = -gpuCompoundSepNormalsOut[i];
1987 int nPoints = numLocalContactsOut;
1988 float4* pointsIn = localContactsOut;
1989 b3Int4 contactIdx; // = {-1,-1,-1,-1};
1991 contactIdx.s[0] = 0;
1992 contactIdx.s[1] = 1;
1993 contactIdx.s[2] = 2;
1994 contactIdx.s[3] = 3;
1996 int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
1999 dstIdx = b3AtomicInc(nGlobalContactsOut);
2000 if ((dstIdx + nReducedContacts) < maxContactCapacity)
2002 __global struct b3Contact4Data* c = globalContactsOut + dstIdx;
2003 c->m_worldNormalOnB = -normal;
2004 c->m_restituitionCoeffCmp = (0.f * 0xffff);
2005 c->m_frictionCoeffCmp = (0.7f * 0xffff);
2006 c->m_batchIdx = pairIndex;
2007 int bodyA = gpuCompoundPairs[pairIndex].x;
2008 int bodyB = gpuCompoundPairs[pairIndex].y;
2009 c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA;
2010 c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB;
2011 c->m_childIndexA = childShapeIndexA;
2012 c->m_childIndexB = childShapeIndexB;
2013 for (int i = 0; i < nReducedContacts; i++)
2015 c->m_worldPosB[i] = pointsIn[contactIdx.s[i]];
2017 b3Contact4Data_setNumPoints(c, nReducedContacts);
2020 } // if (numContactsOut>0)
2021 } // if (gpuHasCompoundSepNormalsOut[i])
2022 } // if (i<numCompoundPairs)
2025 void computeContactCompoundCompound(int pairIndex,
2026 int bodyIndexA, int bodyIndexB,
2027 int collidableIndexA, int collidableIndexB,
2028 const b3RigidBodyData* rigidBodies,
2029 const b3Collidable* collidables,
2030 const b3ConvexPolyhedronData* convexShapes,
2031 const b3GpuChildShape* cpuChildShapes,
2032 const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace,
2033 const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace,
2035 const b3AlignedObjectArray<b3Vector3>& convexVertices,
2036 const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges,
2037 const b3AlignedObjectArray<int>& convexIndices,
2038 const b3AlignedObjectArray<b3GpuFace>& faces,
2040 b3Contact4* globalContactsOut,
2041 int& nGlobalContactsOut,
2042 int maxContactCapacity,
2043 b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
2044 b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
2045 b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)
2047 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
2048 b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
2050 b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut;
2051 int numCompoundPairsOut = 0;
2052 int maxNumCompoundPairsCapacity = 8192; //1024;
2053 cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity);
2056 findCompoundPairsKernel(
2058 bodyIndexA, bodyIndexB,
2059 collidableIndexA, collidableIndexB,
2064 hostAabbsWorldSpace,
2065 hostAabbsLocalSpace,
2067 &cpuCompoundPairsOut[0],
2068 &numCompoundPairsOut,
2069 maxNumCompoundPairsCapacity,
2074 printf("maxNumAabbChecks=%d\n", maxNumAabbChecks);
2075 if (numCompoundPairsOut > maxNumCompoundPairsCapacity)
2077 b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n", maxNumCompoundPairsCapacity);
2078 numCompoundPairsOut = maxNumCompoundPairsCapacity;
2080 b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut;
2081 b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut;
2082 cpuCompoundSepNormalsOut.resize(numCompoundPairsOut);
2083 cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut);
2085 for (int i = 0; i < numCompoundPairsOut; i++)
2087 processCompoundPairsKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, 0, cpuChildShapes,
2088 cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, numCompoundPairsOut, i);
2091 for (int i = 0; i < numCompoundPairsOut; i++)
2093 clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, cpuChildShapes,
2094 cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, globalContactsOut, &nGlobalContactsOut, numCompoundPairsOut, maxContactCapacity, i);
2097 int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
2099 float4 posA = rigidBodies[bodyIndexA].m_pos;
2100 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
2101 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
2102 b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
2103 float4 newPosA = b3QuatRotate(ornA,childPosA)+posA;
2104 b3Quat newOrnA = b3QuatMul(ornA,childOrnA);
2106 int shapeIndexA = collidables[childColIndexA].m_shapeIndex;
2109 bool foundSepAxis = findSeparatingAxis(hullA,hullB,
2115 convexVertices,uniqueEdges,faces,convexIndices,
2116 convexVertices,uniqueEdges,faces,convexIndices,
2127 contactIndex = clipHullHullSingle(
2128 bodyIndexA, bodyIndexB,
2131 collidableIndexA, collidableIndexB,
2151 sepNormalWorldSpace,
2152 maxContactCapacity);
2157 // return contactIndex;
2161 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
2162 for (int c=0;c<numChildrenB;c++)
2164 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c;
2165 int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex;
2167 float4 rootPosB = rigidBodies[bodyIndexB].m_pos;
2168 b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat;
2169 b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition;
2170 b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation;
2171 float4 posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB;
2172 b3Quaternion ornB = b3QuatMul(rootOrnB,childOrnB);//b3QuatMul(ornB,childOrnB);
2174 int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
2176 const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB];
2182 void computeContactPlaneCompound(int pairIndex,
2183 int bodyIndexA, int bodyIndexB,
2184 int collidableIndexA, int collidableIndexB,
2185 const b3RigidBodyData* rigidBodies,
2186 const b3Collidable* collidables,
2187 const b3ConvexPolyhedronData* convexShapes,
2188 const b3GpuChildShape* cpuChildShapes,
2189 const b3Vector3* convexVertices,
2190 const int* convexIndices,
2191 const b3GpuFace* faces,
2193 b3Contact4* globalContactsOut,
2194 int& nGlobalContactsOut,
2195 int maxContactCapacity)
2197 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
2198 b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
2200 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
2201 for (int c = 0; c < numChildrenB; c++)
2203 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + c;
2204 int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex;
2206 float4 rootPosB = rigidBodies[bodyIndexB].m_pos;
2207 b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat;
2208 b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition;
2209 b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation;
2210 float4 posB = b3QuatRotate(rootOrnB, childPosB) + rootPosB;
2211 b3Quaternion ornB = rootOrnB * childOrnB; //b3QuatMul(ornB,childOrnB);
2213 int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
2215 const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB];
2217 b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
2218 b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
2220 // int numContactsOut = 0;
2221 // int numWorldVertsB1= 0;
2223 b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;
2224 b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z);
2225 b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);
2226 float planeConstant = planeEq.w;
2227 b3Transform convexWorldTransform;
2228 convexWorldTransform.setIdentity();
2229 convexWorldTransform.setOrigin(posB);
2230 convexWorldTransform.setRotation(ornB);
2231 b3Transform planeTransform;
2232 planeTransform.setIdentity();
2233 planeTransform.setOrigin(posA);
2234 planeTransform.setRotation(ornA);
2236 b3Transform planeInConvex;
2237 planeInConvex = convexWorldTransform.inverse() * planeTransform;
2238 b3Transform convexInPlane;
2239 convexInPlane = planeTransform.inverse() * convexWorldTransform;
2241 b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;
2242 float maxDot = -1e30;
2246 #define MAX_PLANE_CONVEX_POINTS 64
2248 b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
2252 contactIdx.s[0] = 0;
2253 contactIdx.s[1] = 1;
2254 contactIdx.s[2] = 2;
2255 contactIdx.s[3] = 3;
2257 for (int i = 0; i < hullB->m_numVertices; i++)
2259 b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];
2260 float curDot = vtx.dot(planeNormalInConvex);
2262 if (curDot > maxDot)
2267 //make sure the deepest points is always included
2268 if (numPoints == MAX_PLANE_CONVEX_POINTS)
2272 if (numPoints < MAX_PLANE_CONVEX_POINTS)
2274 b3Vector3 vtxWorld = convexWorldTransform * vtx;
2275 b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld;
2276 float dist = planeNormal.dot(vtxInPlane) - planeConstant;
2280 contactPoints[numPoints] = vtxWorld;
2286 int numReducedPoints = 0;
2288 numReducedPoints = numPoints;
2292 numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);
2295 // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
2297 if (numReducedPoints > 0)
2299 if (nGlobalContactsOut < maxContactCapacity)
2301 dstIdx = nGlobalContactsOut;
2302 nGlobalContactsOut++;
2304 b3Contact4* c = &globalContactsOut[dstIdx];
2305 c->m_worldNormalOnB = -planeNormalWorld;
2306 c->setFrictionCoeff(0.7);
2307 c->setRestituitionCoeff(0.f);
2309 c->m_batchIdx = pairIndex;
2310 c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
2311 c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
2312 for (int i = 0; i < numReducedPoints; i++)
2314 b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];
2315 c->m_worldPosB[i] = pOnB1;
2317 c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints;
2318 } //if (dstIdx < numPairs)
2323 void computeContactSphereConvex(int pairIndex,
2324 int bodyIndexA, int bodyIndexB,
2325 int collidableIndexA, int collidableIndexB,
2326 const b3RigidBodyData* rigidBodies,
2327 const b3Collidable* collidables,
2328 const b3ConvexPolyhedronData* convexShapes,
2329 const b3Vector3* convexVertices,
2330 const int* convexIndices,
2331 const b3GpuFace* faces,
2332 b3Contact4* globalContactsOut,
2333 int& nGlobalContactsOut,
2334 int maxContactCapacity)
2336 float radius = collidables[collidableIndexA].m_radius;
2337 float4 spherePos1 = rigidBodies[bodyIndexA].m_pos;
2338 b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat;
2340 float4 pos = rigidBodies[bodyIndexB].m_pos;
2342 b3Quaternion quat = rigidBodies[bodyIndexB].m_quat;
2347 tr.setRotation(quat);
2348 b3Transform trInv = tr.inverse();
2350 float4 spherePos = trInv(spherePos1);
2352 int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx;
2353 int shapeIndex = collidables[collidableIndex].m_shapeIndex;
2354 int numFaces = convexShapes[shapeIndex].m_numFaces;
2355 float4 closestPnt = b3MakeVector3(0, 0, 0, 0);
2356 // float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0);
2357 float minDist = -1000000.f; // TODO: What is the largest/smallest float?
2358 bool bCollide = true;
2360 float4 localHitNormal;
2361 for (int f = 0; f < numFaces; f++)
2363 b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f];
2365 float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f);
2366 float4 n1 = localPlaneNormal; //quatRotate(quat,localPlaneNormal);
2368 planeEqn[3] = face.m_plane.w;
2371 float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);
2381 //might hit an edge or vertex
2384 bool isInPoly = IsPointInPolygon(spherePos,
2386 &convexVertices[convexShapes[shapeIndex].m_vertexOffset],
2394 closestPnt = pntReturn;
2395 localHitNormal = planeEqn;
2401 b3Vector3 tmp = spherePos - out;
2402 b3Scalar l2 = tmp.length2();
2403 if (l2 < radius * radius)
2410 localHitNormal = tmp / dist;
2426 closestPnt = pntReturn;
2427 localHitNormal = planeEqn;
2432 static int numChecks = 0;
2435 if (bCollide && minDist > -10000)
2437 float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal; //-hitNormalWorld;
2438 float4 pOnB1 = tr(closestPnt);
2439 //printf("dist ,%f,",minDist);
2440 float actualDepth = minDist - radius;
2441 if (actualDepth < 0)
2443 //printf("actualDepth = ,%f,", actualDepth);
2444 //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z);
2445 //printf("region=,%d,\n", region);
2446 pOnB1[3] = actualDepth;
2449 // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
2451 if (nGlobalContactsOut < maxContactCapacity)
2453 dstIdx = nGlobalContactsOut;
2454 nGlobalContactsOut++;
2456 b3Contact4* c = &globalContactsOut[dstIdx];
2457 c->m_worldNormalOnB = normalOnSurfaceB1;
2458 c->setFrictionCoeff(0.7);
2459 c->setRestituitionCoeff(0.f);
2461 c->m_batchIdx = pairIndex;
2462 c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
2463 c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
2464 c->m_worldPosB[0] = pOnB1;
2466 c->m_worldNormalOnB.w = (b3Scalar)numPoints;
2467 } //if (dstIdx < numPairs)
2469 } //if (hasCollision)
2472 int computeContactConvexConvex2(
2474 int bodyIndexA, int bodyIndexB,
2475 int collidableIndexA, int collidableIndexB,
2476 const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies,
2477 const b3AlignedObjectArray<b3Collidable>& collidables,
2478 const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes,
2479 const b3AlignedObjectArray<b3Vector3>& convexVertices,
2480 const b3AlignedObjectArray<b3Vector3>& uniqueEdges,
2481 const b3AlignedObjectArray<int>& convexIndices,
2482 const b3AlignedObjectArray<b3GpuFace>& faces,
2483 b3AlignedObjectArray<b3Contact4>& globalContactsOut,
2484 int& nGlobalContactsOut,
2485 int maxContactCapacity,
2486 const b3AlignedObjectArray<b3Contact4>& oldContacts)
2488 int contactIndex = -1;
2489 b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
2490 b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
2491 b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;
2492 b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
2494 b3ConvexPolyhedronData hullA, hullB;
2496 b3Vector3 sepNormalWorldSpace;
2498 b3Collidable colA = collidables[collidableIndexA];
2499 hullA = convexShapes[colA.m_shapeIndex];
2500 //printf("numvertsA = %d\n",hullA.m_numVertices);
2502 b3Collidable colB = collidables[collidableIndexB];
2503 hullB = convexShapes[colB.m_shapeIndex];
2504 //printf("numvertsB = %d\n",hullB.m_numVertices);
2506 // int contactCapacity = MAX_VERTS;
2507 //int numContactsOut=0;
2510 b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x));
2511 b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x));
2514 bool foundSepAxis = findSeparatingAxis(hullA, hullB,
2520 convexVertices, uniqueEdges, faces, convexIndices,
2521 convexVertices, uniqueEdges, faces, convexIndices,
2523 sepNormalWorldSpace);
2527 contactIndex = clipHullHullSingle(
2528 bodyIndexA, bodyIndexB,
2531 collidableIndexA, collidableIndexB,
2551 sepNormalWorldSpace,
2552 maxContactCapacity);
2555 return contactIndex;
2558 void GpuSatCollision::computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4>* pairs, int nPairs,
2559 const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
2560 b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
2561 const b3OpenCLArray<b3Contact4>* oldContacts,
2562 int maxContactCapacity,
2563 int compoundPairCapacity,
2564 const b3OpenCLArray<b3ConvexPolyhedronData>& convexData,
2565 const b3OpenCLArray<b3Vector3>& gpuVertices,
2566 const b3OpenCLArray<b3Vector3>& gpuUniqueEdges,
2567 const b3OpenCLArray<b3GpuFace>& gpuFaces,
2568 const b3OpenCLArray<int>& gpuIndices,
2569 const b3OpenCLArray<b3Collidable>& gpuCollidables,
2570 const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
2572 const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace,
2573 const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace,
2575 b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
2576 b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
2577 b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
2578 b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
2579 b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
2580 b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused,
2581 b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU,
2582 b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU,
2583 b3OpenCLArray<b3BvhInfo>* bvhInfo,
2586 int maxTriConvexPairCapacity,
2587 b3OpenCLArray<b3Int4>& triangleConvexPairsOut,
2588 int& numTriConvexPairsOut)
2595 #ifdef CHECK_ON_HOST
2597 b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
2598 treeNodesGPU->copyToHost(treeNodesCPU);
2600 b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
2601 subTreesGPU->copyToHost(subTreesCPU);
2603 b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
2604 bvhInfo->copyToHost(bvhInfoCPU);
2606 b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
2607 clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
2609 b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace;
2610 clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace);
2612 b3AlignedObjectArray<b3Int4> hostPairs;
2613 pairs->copyToHost(hostPairs);
2615 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
2616 bodyBuf->copyToHost(hostBodyBuf);
2618 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
2619 convexData.copyToHost(hostConvexData);
2621 b3AlignedObjectArray<b3Vector3> hostVertices;
2622 gpuVertices.copyToHost(hostVertices);
2624 b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
2625 gpuUniqueEdges.copyToHost(hostUniqueEdges);
2626 b3AlignedObjectArray<b3GpuFace> hostFaces;
2627 gpuFaces.copyToHost(hostFaces);
2628 b3AlignedObjectArray<int> hostIndices;
2629 gpuIndices.copyToHost(hostIndices);
2630 b3AlignedObjectArray<b3Collidable> hostCollidables;
2631 gpuCollidables.copyToHost(hostCollidables);
2633 b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
2634 gpuChildShapes.copyToHost(cpuChildShapes);
2636 b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs;
2638 b3AlignedObjectArray<b3Contact4> hostContacts;
2641 contactOut->copyToHost(hostContacts);
2644 b3AlignedObjectArray<b3Contact4> oldHostContacts;
2646 if (oldContacts->size())
2648 oldContacts->copyToHost(oldHostContacts);
2651 hostContacts.resize(maxContactCapacity);
2653 for (int i = 0; i < nPairs; i++)
2655 int bodyIndexA = hostPairs[i].x;
2656 int bodyIndexB = hostPairs[i].y;
2657 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
2658 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
2660 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&
2661 hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2663 computeContactSphereConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2664 &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2667 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2668 hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)
2670 computeContactSphereConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2671 &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2672 //printf("convex-sphere\n");
2675 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2676 hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)
2678 computeContactPlaneConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2679 &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2680 // printf("convex-plane\n");
2683 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&
2684 hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2686 computeContactPlaneConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2687 &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2688 // printf("plane-convex\n");
2691 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
2692 hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
2694 computeContactCompoundCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2695 &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], hostAabbsWorldSpace, hostAabbsLocalSpace, hostVertices, hostUniqueEdges, hostIndices, hostFaces, &hostContacts[0],
2696 nContacts, maxContactCapacity, treeNodesCPU, subTreesCPU, bvhInfoCPU);
2697 // printf("convex-plane\n");
2700 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
2701 hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)
2703 computeContactPlaneCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2704 &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2705 // printf("convex-plane\n");
2708 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&
2709 hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
2711 computeContactPlaneCompound(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2712 &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2713 // printf("plane-convex\n");
2716 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2717 hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2719 //printf("hostPairs[i].z=%d\n",hostPairs[i].z);
2720 int contactIndex = computeContactConvexConvex2(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, hostBodyBuf, hostCollidables, hostConvexData, hostVertices, hostUniqueEdges, hostIndices, hostFaces, hostContacts, nContacts, maxContactCapacity, oldHostContacts);
2721 //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
2723 if (contactIndex >= 0)
2725 // printf("convex convex contactIndex = %d\n",contactIndex);
2726 hostPairs[i].z = contactIndex;
2728 // printf("plane-convex\n");
2732 if (hostPairs.size())
2734 pairs->copyFromHost(hostPairs);
2737 hostContacts.resize(nContacts);
2740 contactOut->copyFromHost(hostContacts);
2744 contactOut->resize(0);
2747 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
2748 //printf("(HOST) nContacts = %d\n",nContacts);
2755 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
2757 B3_PROFILE("primitiveContactsKernel");
2758 b3BufferInfoCL bInfo[] = {
2759 b3BufferInfoCL(pairs->getBufferCL(), true),
2760 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2761 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2762 b3BufferInfoCL(convexData.getBufferCL(), true),
2763 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2764 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2765 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2766 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2767 b3BufferInfoCL(contactOut->getBufferCL()),
2768 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
2770 b3LauncherCL launcher(m_queue, m_primitiveContactsKernel, "m_primitiveContactsKernel");
2771 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2772 launcher.setConst(nPairs);
2773 launcher.setConst(maxContactCapacity);
2775 launcher.launch1D(num);
2778 nContacts = m_totalContactsOut.at(0);
2779 contactOut->resize(nContacts);
2783 #endif //CHECK_ON_HOST
2785 B3_PROFILE("computeConvexConvexContactsGPUSAT");
2786 // printf("nContacts = %d\n",nContacts);
2788 m_sepNormals.resize(nPairs);
2789 m_hasSeparatingNormals.resize(nPairs);
2791 int concaveCapacity = maxTriConvexPairCapacity;
2792 m_concaveSepNormals.resize(concaveCapacity);
2793 m_concaveHasSeparatingNormals.resize(concaveCapacity);
2794 m_numConcavePairsOut.resize(0);
2795 m_numConcavePairsOut.push_back(0);
2797 m_gpuCompoundPairs.resize(compoundPairCapacity);
2799 m_gpuCompoundSepNormals.resize(compoundPairCapacity);
2801 m_gpuHasCompoundSepNormals.resize(compoundPairCapacity);
2803 m_numCompoundPairsOut.resize(0);
2804 m_numCompoundPairsOut.push_back(0);
2806 int numCompoundPairs = 0;
2808 int numConcavePairs = 0;
2812 if (findSeparatingAxisOnGpu)
2814 m_dmins.resize(nPairs);
2815 if (splitSearchSepAxisConvex)
2819 nContacts = m_totalContactsOut.at(0);
2821 B3_PROFILE("mprPenetrationKernel");
2822 b3BufferInfoCL bInfo[] = {
2823 b3BufferInfoCL(pairs->getBufferCL(), true),
2824 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2825 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2826 b3BufferInfoCL(convexData.getBufferCL(), true),
2827 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2828 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2829 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2830 b3BufferInfoCL(contactOut->getBufferCL()),
2831 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
2833 b3LauncherCL launcher(m_queue, m_mprPenetrationKernel, "mprPenetrationKernel");
2834 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2836 launcher.setConst(maxContactCapacity);
2837 launcher.setConst(nPairs);
2840 launcher.launch1D(num);
2843 b3AlignedObjectArray<int>hostHasSepAxis;
2844 m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
2845 b3AlignedObjectArray<b3Vector3>hostSepAxis;
2846 m_sepNormals.copyToHost(hostSepAxis);
2848 nContacts = m_totalContactsOut.at(0);
2849 contactOut->resize(nContacts);
2850 // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts);
2851 if (nContacts > maxContactCapacity)
2853 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
2854 nContacts = maxContactCapacity;
2864 B3_PROFILE("findSeparatingAxisVertexFaceKernel");
2865 b3BufferInfoCL bInfo[] = {
2866 b3BufferInfoCL(pairs->getBufferCL(), true),
2867 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2868 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2869 b3BufferInfoCL(convexData.getBufferCL(), true),
2870 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2871 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2872 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2873 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2874 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2875 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2876 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2877 b3BufferInfoCL(m_dmins.getBufferCL())};
2879 b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel, "findSeparatingAxisVertexFaceKernel");
2880 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2881 launcher.setConst(nPairs);
2884 launcher.launch1D(num);
2888 int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
2891 B3_PROFILE("findSeparatingAxisEdgeEdgeKernel");
2892 b3BufferInfoCL bInfo[] = {
2893 b3BufferInfoCL(pairs->getBufferCL(), true),
2894 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2895 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2896 b3BufferInfoCL(convexData.getBufferCL(), true),
2897 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2898 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2899 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2900 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2901 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2902 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2903 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2904 b3BufferInfoCL(m_dmins.getBufferCL()),
2905 b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true)
2909 b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel, "findSeparatingAxisEdgeEdgeKernel");
2910 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2911 launcher.setConst(numDirections);
2912 launcher.setConst(nPairs);
2914 launcher.launch1D(num);
2920 B3_PROFILE("findSeparatingAxisUnitSphereKernel");
2921 b3BufferInfoCL bInfo[] = {
2922 b3BufferInfoCL(pairs->getBufferCL(), true),
2923 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2924 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2925 b3BufferInfoCL(convexData.getBufferCL(), true),
2926 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2927 b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true),
2928 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2929 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2930 b3BufferInfoCL(m_dmins.getBufferCL())};
2932 b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel, "findSeparatingAxisUnitSphereKernel");
2933 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2934 int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
2935 launcher.setConst(numDirections);
2937 launcher.setConst(nPairs);
2940 launcher.launch1D(num);
2947 B3_PROFILE("findSeparatingAxisKernel");
2948 b3BufferInfoCL bInfo[] = {
2949 b3BufferInfoCL(pairs->getBufferCL(), true),
2950 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2951 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2952 b3BufferInfoCL(convexData.getBufferCL(), true),
2953 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2954 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2955 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2956 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2957 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2958 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2959 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL())};
2961 b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel, "m_findSeparatingAxisKernel");
2962 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2963 launcher.setConst(nPairs);
2966 launcher.launch1D(num);
2972 B3_PROFILE("findSeparatingAxisKernel CPU");
2974 b3AlignedObjectArray<b3Int4> hostPairs;
2975 pairs->copyToHost(hostPairs);
2976 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
2977 bodyBuf->copyToHost(hostBodyBuf);
2979 b3AlignedObjectArray<b3Collidable> hostCollidables;
2980 gpuCollidables.copyToHost(hostCollidables);
2982 b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
2983 gpuChildShapes.copyToHost(cpuChildShapes);
2985 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData;
2986 convexData.copyToHost(hostConvexShapeData);
2988 b3AlignedObjectArray<b3Vector3> hostVertices;
2989 gpuVertices.copyToHost(hostVertices);
2991 b3AlignedObjectArray<int> hostHasSepAxis;
2992 hostHasSepAxis.resize(nPairs);
2993 b3AlignedObjectArray<b3Vector3> hostSepAxis;
2994 hostSepAxis.resize(nPairs);
2996 b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
2997 gpuUniqueEdges.copyToHost(hostUniqueEdges);
2998 b3AlignedObjectArray<b3GpuFace> hostFaces;
2999 gpuFaces.copyToHost(hostFaces);
3001 b3AlignedObjectArray<int> hostIndices;
3002 gpuIndices.copyToHost(hostIndices);
3004 b3AlignedObjectArray<b3Contact4> hostContacts;
3007 contactOut->copyToHost(hostContacts);
3009 hostContacts.resize(maxContactCapacity);
3010 int nGlobalContactsOut = nContacts;
3012 for (int i = 0; i < nPairs; i++)
3014 int bodyIndexA = hostPairs[i].x;
3015 int bodyIndexB = hostPairs[i].y;
3016 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
3017 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
3019 int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex;
3020 int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex;
3022 hostHasSepAxis[i] = 0;
3024 //once the broadphase avoids static-static pairs, we can remove this test
3025 if ((hostBodyBuf[bodyIndexA].m_invMass == 0) && (hostBodyBuf[bodyIndexB].m_invMass == 0))
3030 if ((hostCollidables[collidableIndexA].m_shapeType != SHAPE_CONVEX_HULL) || (hostCollidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL))
3035 float dmin = FLT_MAX;
3037 b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA];
3038 b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB];
3039 b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos;
3040 b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos;
3041 b3Quaternion ornA = hostBodyBuf[bodyIndexA].m_quat;
3042 b3Quaternion ornB = hostBodyBuf[bodyIndexB].m_quat;
3046 //first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR
3048 b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;
3049 b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);
3050 b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter;
3051 b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);
3052 b3Vector3 DeltaC2 = c0 - c1;
3056 bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3057 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3058 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3063 bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2,
3064 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3065 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3069 bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3070 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3071 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3072 &sepAxis, &dmin, false);
3076 hostHasSepAxis[i] = 1;
3077 hostSepAxis[i] = sepAxis;
3078 hostSepAxis[i].w = dmin;
3084 if (hostHasSepAxis[i])
3093 b3Vector3 sepAxis2 = b3MakeVector3(1, 0, 0);
3094 b3Vector3 resultPointOnBWorld = b3MakeVector3(0, 0, 0);
3100 //res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut);
3101 res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB, &hostBodyBuf[0], &hostConvexShapeData[0], &hostCollidables[0], &hostVertices[0], &hostSepAxis[0], &hostHasSepAxis[0], &depthOut, &dirOut, &posOut);
3103 sepAxis2 = b3MakeVector3(-dirOut.x, -dirOut.y, -dirOut.z);
3104 resultPointOnBWorld = posOut;
3105 //hostHasSepAxis[i] = 0;
3110 //printf("depth = %f\n",depth);
3111 //printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]);
3112 //qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]);
3116 const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex];
3117 const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex];
3119 if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))
3123 float diff = depth - dist;
3125 static float maxdiff = 0.f;
3129 printf("maxdiff = %20.10f\n", maxdiff);
3135 b3Vector3 oldAxis = hostSepAxis[i];
3140 if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))
3144 float diff = depth - dist;
3145 //printf("?diff = %f\n",diff );
3146 static float maxdiff = 0.f;
3150 printf("maxdiff = %20.10f\n", maxdiff);
3153 //this is used for SAT
3154 //hostHasSepAxis[i] = 1;
3155 //hostSepAxis[i] = sepAxis2;
3159 //int contactIndex = nGlobalContactsOut;
3160 b3Contact4& newContact = hostContacts.at(nGlobalContactsOut);
3161 nGlobalContactsOut++;
3162 newContact.m_batchIdx = 0; //i;
3163 newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA;
3164 newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;
3166 newContact.m_frictionCoeffCmp = 45874;
3167 newContact.m_restituitionCoeffCmp = 0;
3169 static float maxDepth = 0.f;
3171 if (depth > maxDepth)
3174 printf("MPR maxdepth = %f\n", maxDepth);
3177 resultPointOnBWorld.w = -depth;
3178 newContact.m_worldPosB[0] = resultPointOnBWorld;
3179 //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2;
3180 newContact.m_worldNormalOnB = sepAxis2;
3181 newContact.m_worldNormalOnB.w = (b3Scalar)1;
3185 printf("rejected\n");
3191 //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
3192 b3AlignedObjectArray<b3Contact4> oldHostContacts;
3194 result = computeContactConvexConvex2( //hostPairs,
3196 bodyIndexA, bodyIndexB,
3197 collidableIndexA, collidableIndexB,
3200 hostConvexShapeData,
3214 } //hostHasSepAxis[i] = 1;
3218 b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;
3219 b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);
3220 b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter;
3221 b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);
3222 b3Vector3 DeltaC2 = c0 - c1;
3226 bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3227 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3228 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3233 bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2,
3234 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3235 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3239 bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3240 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3241 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3242 &sepAxis, &dmin, true);
3246 hostHasSepAxis[i] = 1;
3247 hostSepAxis[i] = sepAxis;
3254 if (useGjkContacts) //nGlobalContactsOut>0)
3256 //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut);
3257 nContacts = nGlobalContactsOut;
3258 contactOut->copyFromHost(hostContacts);
3260 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3263 m_hasSeparatingNormals.copyFromHost(hostHasSepAxis);
3264 m_sepNormals.copyFromHost(hostSepAxis);
3267 //double-check results from GPU (comment-out the 'else' so both paths are executed
3268 b3AlignedObjectArray<int> checkHasSepAxis;
3269 m_hasSeparatingNormals.copyToHost(checkHasSepAxis);
3270 static int frameCount = 0;
3272 for (int i=0;i<nPairs;i++)
3274 if (hostHasSepAxis[i] != checkHasSepAxis[i])
3276 printf("at frameCount %d hostHasSepAxis[%d] = %d but checkHasSepAxis[i] = %d\n",
3277 frameCount,i,hostHasSepAxis[i],checkHasSepAxis[i]);
3280 //m_hasSeparatingNormals.copyFromHost(hostHasSepAxis);
3281 // m_sepNormals.copyFromHost(hostSepAxis);
3285 numCompoundPairs = m_numCompoundPairsOut.at(0);
3286 bool useGpuFindCompoundPairs = true;
3287 if (useGpuFindCompoundPairs)
3289 B3_PROFILE("findCompoundPairsKernel");
3290 b3BufferInfoCL bInfo[] =
3292 b3BufferInfoCL(pairs->getBufferCL(), true),
3293 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3294 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3295 b3BufferInfoCL(convexData.getBufferCL(), true),
3296 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3297 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3298 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3299 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3300 b3BufferInfoCL(clAabbsLocalSpace.getBufferCL(), true),
3301 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3302 b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL()),
3303 b3BufferInfoCL(m_numCompoundPairsOut.getBufferCL()),
3304 b3BufferInfoCL(subTreesGPU->getBufferCL()),
3305 b3BufferInfoCL(treeNodesGPU->getBufferCL()),
3306 b3BufferInfoCL(bvhInfo->getBufferCL())};
3308 b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel, "m_findCompoundPairsKernel");
3309 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3310 launcher.setConst(nPairs);
3311 launcher.setConst(compoundPairCapacity);
3314 launcher.launch1D(num);
3317 numCompoundPairs = m_numCompoundPairsOut.at(0);
3318 //printf("numCompoundPairs =%d\n",numCompoundPairs );
3319 if (numCompoundPairs)
3321 //printf("numCompoundPairs=%d\n",numCompoundPairs);
3326 b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
3327 treeNodesGPU->copyToHost(treeNodesCPU);
3329 b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
3330 subTreesGPU->copyToHost(subTreesCPU);
3332 b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
3333 bvhInfo->copyToHost(bvhInfoCPU);
3335 b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3336 clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3338 b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace;
3339 clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace);
3341 b3AlignedObjectArray<b3Int4> hostPairs;
3342 pairs->copyToHost(hostPairs);
3344 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3345 bodyBuf->copyToHost(hostBodyBuf);
3347 b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut;
3348 cpuCompoundPairsOut.resize(compoundPairCapacity);
3350 b3AlignedObjectArray<b3Collidable> hostCollidables;
3351 gpuCollidables.copyToHost(hostCollidables);
3353 b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
3354 gpuChildShapes.copyToHost(cpuChildShapes);
3356 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
3357 convexData.copyToHost(hostConvexData);
3359 b3AlignedObjectArray<b3Vector3> hostVertices;
3360 gpuVertices.copyToHost(hostVertices);
3362 for (int pairIndex = 0; pairIndex < nPairs; pairIndex++)
3364 int bodyIndexA = hostPairs[pairIndex].x;
3365 int bodyIndexB = hostPairs[pairIndex].y;
3366 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
3367 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
3368 if (cpuChildShapes.size())
3370 findCompoundPairsKernel(
3377 &hostCollidables[0],
3380 hostAabbsWorldSpace,
3381 hostAabbsLocalSpace,
3383 &cpuCompoundPairsOut[0],
3385 compoundPairCapacity,
3392 m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs, 1, 0, true);
3393 if (numCompoundPairs)
3395 b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0];
3396 m_gpuCompoundPairs.copyFromHostPointer(ptr, numCompoundPairs, 0, true);
3398 //cpuCompoundPairsOut
3400 if (numCompoundPairs)
3402 printf("numCompoundPairs=%d\n", numCompoundPairs);
3405 if (numCompoundPairs > compoundPairCapacity)
3407 b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity);
3408 numCompoundPairs = compoundPairCapacity;
3411 m_gpuCompoundPairs.resize(numCompoundPairs);
3412 m_gpuHasCompoundSepNormals.resize(numCompoundPairs);
3413 m_gpuCompoundSepNormals.resize(numCompoundPairs);
3415 if (numCompoundPairs)
3417 B3_PROFILE("processCompoundPairsPrimitivesKernel");
3418 b3BufferInfoCL bInfo[] =
3420 b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
3421 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3422 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3423 b3BufferInfoCL(convexData.getBufferCL(), true),
3424 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3425 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3426 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3427 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3428 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3429 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3430 b3BufferInfoCL(contactOut->getBufferCL()),
3431 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3433 b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel, "m_processCompoundPairsPrimitivesKernel");
3434 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3435 launcher.setConst(numCompoundPairs);
3436 launcher.setConst(maxContactCapacity);
3438 int num = numCompoundPairs;
3439 launcher.launch1D(num);
3441 nContacts = m_totalContactsOut.at(0);
3442 //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts);
3443 if (nContacts > maxContactCapacity)
3445 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
3446 nContacts = maxContactCapacity;
3450 if (numCompoundPairs)
3452 B3_PROFILE("processCompoundPairsKernel");
3453 b3BufferInfoCL bInfo[] =
3455 b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
3456 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3457 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3458 b3BufferInfoCL(convexData.getBufferCL(), true),
3459 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3460 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3461 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3462 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3463 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3464 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3465 b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL()),
3466 b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL())};
3468 b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel, "m_processCompoundPairsKernel");
3469 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3470 launcher.setConst(numCompoundPairs);
3472 int num = numCompoundPairs;
3473 launcher.launch1D(num);
3477 //printf("numConcave = %d\n",numConcave);
3479 // printf("hostNormals.size()=%d\n",hostNormals.size());
3480 //int numPairs = pairCount.at(0);
3482 int vertexFaceCapacity = 64;
3485 //now perform the tree query on GPU
3487 if (treeNodesGPU->size() && treeNodesGPU->size())
3489 if (bvhTraversalKernelGPU)
3491 B3_PROFILE("m_bvhTraversalKernel");
3493 numConcavePairs = m_numConcavePairsOut.at(0);
3495 b3LauncherCL launcher(m_queue, m_bvhTraversalKernel, "m_bvhTraversalKernel");
3496 launcher.setBuffer(pairs->getBufferCL());
3497 launcher.setBuffer(bodyBuf->getBufferCL());
3498 launcher.setBuffer(gpuCollidables.getBufferCL());
3499 launcher.setBuffer(clAabbsWorldSpace.getBufferCL());
3500 launcher.setBuffer(triangleConvexPairsOut.getBufferCL());
3501 launcher.setBuffer(m_numConcavePairsOut.getBufferCL());
3502 launcher.setBuffer(subTreesGPU->getBufferCL());
3503 launcher.setBuffer(treeNodesGPU->getBufferCL());
3504 launcher.setBuffer(bvhInfo->getBufferCL());
3506 launcher.setConst(nPairs);
3507 launcher.setConst(maxTriConvexPairCapacity);
3509 launcher.launch1D(num);
3511 numConcavePairs = m_numConcavePairsOut.at(0);
3515 b3AlignedObjectArray<b3Int4> hostPairs;
3516 pairs->copyToHost(hostPairs);
3517 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3518 bodyBuf->copyToHost(hostBodyBuf);
3519 b3AlignedObjectArray<b3Collidable> hostCollidables;
3520 gpuCollidables.copyToHost(hostCollidables);
3521 b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3522 clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3524 //int maxTriConvexPairCapacity,
3525 b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3526 triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
3528 //int numTriConvexPairsOutHost=0;
3529 numConcavePairs = 0;
3530 //m_numConcavePairsOut
3532 b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
3533 treeNodesGPU->copyToHost(treeNodesCPU);
3534 b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
3535 subTreesGPU->copyToHost(subTreesCPU);
3536 b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
3537 bvhInfo->copyToHost(bvhInfoCPU);
3540 volatile int hostNumConcavePairsOut = 0;
3543 for (int i = 0; i < nPairs; i++)
3545 b3BvhTraversal(&hostPairs.at(0),
3547 &hostCollidables.at(0),
3548 &hostAabbsWorldSpace.at(0),
3549 &triangleConvexPairsOutHost.at(0),
3550 &hostNumConcavePairsOut,
3552 &treeNodesCPU.at(0),
3555 maxTriConvexPairCapacity,
3558 numConcavePairs = hostNumConcavePairsOut;
3560 if (hostNumConcavePairsOut)
3562 triangleConvexPairsOutHost.resize(hostNumConcavePairsOut);
3563 triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost);
3567 m_numConcavePairsOut.resize(0);
3568 m_numConcavePairsOut.push_back(numConcavePairs);
3571 //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity);
3573 if (numConcavePairs > maxTriConvexPairCapacity)
3575 static int exceeded_maxTriConvexPairCapacity_count = 0;
3576 b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n",
3577 numConcavePairs, maxTriConvexPairCapacity, exceeded_maxTriConvexPairCapacity_count++);
3578 numConcavePairs = maxTriConvexPairCapacity;
3580 triangleConvexPairsOut.resize(numConcavePairs);
3582 if (numConcavePairs)
3584 clippingFacesOutGPU.resize(numConcavePairs);
3585 worldNormalsAGPU.resize(numConcavePairs);
3586 worldVertsA1GPU.resize(vertexFaceCapacity * (numConcavePairs));
3587 worldVertsB1GPU.resize(vertexFaceCapacity * (numConcavePairs));
3589 if (findConcaveSeparatingAxisKernelGPU)
3592 m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
3593 clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3594 worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
3595 worldNormalsAGPU.copyFromHost(worldNormalsACPU);
3596 worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
3599 //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut)
3600 if (splitSearchSepAxisConcave)
3602 //printf("numConcavePairs = %d\n",numConcavePairs);
3603 m_dmins.resize(numConcavePairs);
3605 B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel");
3606 b3BufferInfoCL bInfo[] = {
3607 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3608 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3609 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3610 b3BufferInfoCL(convexData.getBufferCL(), true),
3611 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3612 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3613 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3614 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3615 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3616 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3617 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3618 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3619 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3620 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3621 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3622 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3623 b3BufferInfoCL(m_dmins.getBufferCL())};
3625 b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel, "m_findConcaveSeparatingAxisVertexFaceKernel");
3626 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3627 launcher.setConst(vertexFaceCapacity);
3628 launcher.setConst(numConcavePairs);
3630 int num = numConcavePairs;
3631 launcher.launch1D(num);
3634 // numConcavePairs = 0;
3637 B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel");
3638 b3BufferInfoCL bInfo[] = {
3639 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3640 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3641 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3642 b3BufferInfoCL(convexData.getBufferCL(), true),
3643 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3644 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3645 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3646 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3647 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3648 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3649 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3650 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3651 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3652 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3653 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3654 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3655 b3BufferInfoCL(m_dmins.getBufferCL())};
3657 b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel, "m_findConcaveSeparatingAxisEdgeEdgeKernel");
3658 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3659 launcher.setConst(vertexFaceCapacity);
3660 launcher.setConst(numConcavePairs);
3662 int num = numConcavePairs;
3663 launcher.launch1D(num);
3667 // numConcavePairs = 0;
3671 B3_PROFILE("findConcaveSeparatingAxisKernel");
3672 b3BufferInfoCL bInfo[] = {
3673 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3674 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3675 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3676 b3BufferInfoCL(convexData.getBufferCL(), true),
3677 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3678 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3679 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3680 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3681 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3682 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3683 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3684 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3685 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3686 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3687 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3688 b3BufferInfoCL(worldVertsB1GPU.getBufferCL())};
3690 b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel, "m_findConcaveSeparatingAxisKernel");
3691 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3692 launcher.setConst(vertexFaceCapacity);
3693 launcher.setConst(numConcavePairs);
3695 int num = numConcavePairs;
3696 launcher.launch1D(num);
3702 b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3703 b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
3704 b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
3705 b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
3706 b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3708 b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3709 triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
3710 //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
3711 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3712 bodyBuf->copyToHost(hostBodyBuf);
3713 b3AlignedObjectArray<b3Collidable> hostCollidables;
3714 gpuCollidables.copyToHost(hostCollidables);
3715 b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3716 clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3718 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
3719 convexData.copyToHost(hostConvexData);
3721 b3AlignedObjectArray<b3Vector3> hostVertices;
3722 gpuVertices.copyToHost(hostVertices);
3724 b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
3725 gpuUniqueEdges.copyToHost(hostUniqueEdges);
3726 b3AlignedObjectArray<b3GpuFace> hostFaces;
3727 gpuFaces.copyToHost(hostFaces);
3728 b3AlignedObjectArray<int> hostIndices;
3729 gpuIndices.copyToHost(hostIndices);
3730 b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
3731 gpuChildShapes.copyToHost(cpuChildShapes);
3733 b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3734 m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3735 concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size());
3737 b3GpuChildShape* childShapePointerCPU = 0;
3738 if (cpuChildShapes.size())
3739 childShapePointerCPU = &cpuChildShapes.at(0);
3741 clippingFacesOutCPU.resize(clippingFacesOutGPU.size());
3742 worldVertsA1CPU.resize(worldVertsA1GPU.size());
3743 worldNormalsACPU.resize(worldNormalsAGPU.size());
3744 worldVertsB1CPU.resize(worldVertsB1GPU.size());
3746 for (int i = 0; i < numConcavePairs; i++)
3748 b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0),
3750 &hostCollidables.at(0),
3751 &hostConvexData.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0),
3752 &hostFaces.at(0), &hostIndices.at(0), childShapePointerCPU,
3753 &hostAabbsWorldSpace.at(0),
3754 &concaveSepNormalsHost.at(0),
3755 &clippingFacesOutCPU.at(0),
3756 &worldVertsA1CPU.at(0),
3757 &worldNormalsACPU.at(0),
3758 &worldVertsB1CPU.at(0),
3759 &concaveHasSeparatingNormalsCPU.at(0),
3761 numConcavePairs, i);
3764 m_concaveSepNormals.copyFromHost(concaveSepNormalsHost);
3765 m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
3766 clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3767 worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
3768 worldNormalsAGPU.copyFromHost(worldNormalsACPU);
3769 worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
3771 // b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals;
3772 // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals);
3773 // b3AlignedObjectArray<b3Int4> cpuConcavePairs;
3774 // triangleConvexPairsOut.copyToHost(cpuConcavePairs);
3779 if (numConcavePairs)
3781 if (numConcavePairs)
3783 B3_PROFILE("findConcaveSphereContactsKernel");
3784 nContacts = m_totalContactsOut.at(0);
3785 // printf("nContacts1 = %d\n",nContacts);
3786 b3BufferInfoCL bInfo[] = {
3787 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3788 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3789 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3790 b3BufferInfoCL(convexData.getBufferCL(), true),
3791 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3792 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3793 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3794 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3795 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3796 b3BufferInfoCL(contactOut->getBufferCL()),
3797 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3799 b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel, "m_findConcaveSphereContactsKernel");
3800 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3802 launcher.setConst(numConcavePairs);
3803 launcher.setConst(maxContactCapacity);
3805 int num = numConcavePairs;
3806 launcher.launch1D(num);
3808 nContacts = m_totalContactsOut.at(0);
3809 //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts);
3811 //printf("nContacts2 = %d\n",nContacts);
3813 if (nContacts >= maxContactCapacity)
3815 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
3816 nContacts = maxContactCapacity;
3822 bool contactClippingOnGpu = true;
3824 bool contactClippingOnGpu = true;
3827 if (contactClippingOnGpu)
3829 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3830 // printf("nContacts3 = %d\n",nContacts);
3832 //B3_PROFILE("clipHullHullKernel");
3834 bool breakupConcaveConvexKernel = true;
3837 //actually, some Apple OpenCL platform/device combinations work fine...
3838 breakupConcaveConvexKernel = true;
3840 //concave-convex contact clipping
3841 if (numConcavePairs)
3843 // printf("numConcavePairs = %d\n", numConcavePairs);
3844 // nContacts = m_totalContactsOut.at(0);
3845 // printf("nContacts before = %d\n", nContacts);
3847 if (breakupConcaveConvexKernel)
3849 worldVertsB2GPU.resize(vertexFaceCapacity * numConcavePairs);
3851 //clipFacesAndFindContacts
3853 if (clipConcaveFacesAndFindContactsCPU)
3855 b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3856 b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
3857 b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
3858 b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
3860 clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
3861 worldVertsA1GPU.copyToHost(worldVertsA1CPU);
3862 worldNormalsAGPU.copyToHost(worldNormalsACPU);
3863 worldVertsB1GPU.copyToHost(worldVertsB1CPU);
3865 b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3866 m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);
3868 b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3869 m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3871 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
3872 worldVertsB2CPU.resize(worldVertsB2GPU.size());
3874 for (int i = 0; i < numConcavePairs; i++)
3876 clipFacesAndFindContactsKernel(&concaveSepNormalsHost.at(0),
3877 &concaveHasSeparatingNormalsCPU.at(0),
3878 &clippingFacesOutCPU.at(0),
3879 &worldVertsA1CPU.at(0),
3880 &worldNormalsACPU.at(0),
3881 &worldVertsB1CPU.at(0),
3882 &worldVertsB2CPU.at(0),
3887 clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3888 worldVertsB2GPU.copyFromHost(worldVertsB2CPU);
3894 B3_PROFILE("clipFacesAndFindContacts");
3895 //nContacts = m_totalContactsOut.at(0);
3896 //int h = m_hasSeparatingNormals.at(0);
3897 //int4 p = clippingFacesOutGPU.at(0);
3898 b3BufferInfoCL bInfo[] = {
3899 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3900 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3901 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3902 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3903 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3904 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3905 b3BufferInfoCL(worldVertsB2GPU.getBufferCL())};
3906 b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts");
3907 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3908 launcher.setConst(vertexFaceCapacity);
3910 launcher.setConst(numConcavePairs);
3912 launcher.setConst(debugMode);
3913 int num = numConcavePairs;
3914 launcher.launch1D(num);
3916 //int bla = m_totalContactsOut.at(0);
3921 int newContactCapacity = nContacts + numConcavePairs;
3922 contactOut->reserve(newContactCapacity);
3923 if (reduceConcaveContactsOnGPU)
3925 // printf("newReservation = %d\n",newReservation);
3927 B3_PROFILE("newContactReductionKernel");
3928 b3BufferInfoCL bInfo[] =
3930 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true),
3931 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3932 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3933 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3934 b3BufferInfoCL(contactOut->getBufferCL()),
3935 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3936 b3BufferInfoCL(worldVertsB2GPU.getBufferCL()),
3937 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3939 b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel");
3940 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3941 launcher.setConst(vertexFaceCapacity);
3942 launcher.setConst(newContactCapacity);
3943 launcher.setConst(numConcavePairs);
3944 int num = numConcavePairs;
3946 launcher.launch1D(num);
3948 nContacts = m_totalContactsOut.at(0);
3949 contactOut->resize(nContacts);
3951 //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
3955 volatile int nGlobalContactsOut = nContacts;
3956 b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3957 triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
3958 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3959 bodyBuf->copyToHost(hostBodyBuf);
3961 b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3962 m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);
3964 b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3965 m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3967 b3AlignedObjectArray<b3Contact4> hostContacts;
3970 contactOut->copyToHost(hostContacts);
3972 hostContacts.resize(newContactCapacity);
3974 b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3975 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
3977 clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
3978 worldVertsB2GPU.copyToHost(worldVertsB2CPU);
3980 for (int i = 0; i < numConcavePairs; i++)
3982 b3NewContactReductionKernel(&triangleConvexPairsOutHost.at(0),
3984 &concaveSepNormalsHost.at(0),
3985 &concaveHasSeparatingNormalsCPU.at(0),
3986 &hostContacts.at(0),
3987 &clippingFacesOutCPU.at(0),
3988 &worldVertsB2CPU.at(0),
3989 &nGlobalContactsOut,
3996 nContacts = nGlobalContactsOut;
3997 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3998 // nContacts = m_totalContactsOut.at(0);
3999 //contactOut->resize(nContacts);
4000 hostContacts.resize(nContacts);
4001 //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
4002 contactOut->copyFromHost(hostContacts);
4009 B3_PROFILE("clipHullHullConcaveConvexKernel");
4010 nContacts = m_totalContactsOut.at(0);
4011 int newContactCapacity = contactOut->capacity();
4013 //printf("contactOut5 = %d\n",nContacts);
4014 b3BufferInfoCL bInfo[] = {
4015 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true),
4016 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4017 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4018 b3BufferInfoCL(convexData.getBufferCL(), true),
4019 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4020 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4021 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4022 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4023 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
4024 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
4025 b3BufferInfoCL(contactOut->getBufferCL()),
4026 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4027 b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel, "m_clipHullHullConcaveConvexKernel");
4028 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4029 launcher.setConst(newContactCapacity);
4030 launcher.setConst(numConcavePairs);
4031 int num = numConcavePairs;
4032 launcher.launch1D(num);
4034 nContacts = m_totalContactsOut.at(0);
4035 contactOut->resize(nContacts);
4036 //printf("contactOut6 = %d\n",nContacts);
4037 b3AlignedObjectArray<b3Contact4> cpuContacts;
4038 contactOut->copyToHost(cpuContacts);
4040 // printf("nContacts after = %d\n", nContacts);
4043 //convex-convex contact clipping
4045 bool breakupKernel = false;
4048 breakupKernel = true;
4051 #ifdef CHECK_ON_HOST
4052 bool computeConvexConvex = false;
4054 bool computeConvexConvex = true;
4055 #endif //CHECK_ON_HOST
4056 if (computeConvexConvex)
4058 B3_PROFILE("clipHullHullKernel");
4061 worldVertsB1GPU.resize(vertexFaceCapacity * nPairs);
4062 clippingFacesOutGPU.resize(nPairs);
4063 worldNormalsAGPU.resize(nPairs);
4064 worldVertsA1GPU.resize(vertexFaceCapacity * nPairs);
4065 worldVertsB2GPU.resize(vertexFaceCapacity * nPairs);
4067 if (findConvexClippingFacesGPU)
4069 B3_PROFILE("findClippingFacesKernel");
4070 b3BufferInfoCL bInfo[] = {
4071 b3BufferInfoCL(pairs->getBufferCL(), true),
4072 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4073 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4074 b3BufferInfoCL(convexData.getBufferCL(), true),
4075 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4076 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4077 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4078 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4079 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4080 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4081 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4082 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
4083 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
4084 b3BufferInfoCL(worldVertsB1GPU.getBufferCL())};
4086 b3LauncherCL launcher(m_queue, m_findClippingFacesKernel, "m_findClippingFacesKernel");
4087 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4088 launcher.setConst(vertexFaceCapacity);
4089 launcher.setConst(nPairs);
4091 launcher.launch1D(num);
4096 float minDist = -1e30f;
4097 float maxDist = 0.02f;
4099 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
4100 convexData.copyToHost(hostConvexData);
4101 b3AlignedObjectArray<b3Collidable> hostCollidables;
4102 gpuCollidables.copyToHost(hostCollidables);
4104 b3AlignedObjectArray<int> hostHasSepNormals;
4105 m_hasSeparatingNormals.copyToHost(hostHasSepNormals);
4106 b3AlignedObjectArray<b3Vector3> cpuSepNormals;
4107 m_sepNormals.copyToHost(cpuSepNormals);
4109 b3AlignedObjectArray<b3Int4> hostPairs;
4110 pairs->copyToHost(hostPairs);
4111 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
4112 bodyBuf->copyToHost(hostBodyBuf);
4114 //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs);
4115 b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
4116 worldVertsB1GPU.copyToHost(worldVertsB1CPU);
4118 b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
4119 clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
4121 b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
4122 worldNormalsACPU.resize(nPairs);
4124 b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
4125 worldVertsA1CPU.resize(worldVertsA1GPU.size());
4127 b3AlignedObjectArray<b3Vector3> hostVertices;
4128 gpuVertices.copyToHost(hostVertices);
4129 b3AlignedObjectArray<b3GpuFace> hostFaces;
4130 gpuFaces.copyToHost(hostFaces);
4131 b3AlignedObjectArray<int> hostIndices;
4132 gpuIndices.copyToHost(hostIndices);
4134 for (int i = 0; i < nPairs; i++)
4136 int bodyIndexA = hostPairs[i].x;
4137 int bodyIndexB = hostPairs[i].y;
4139 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
4140 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
4142 int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex;
4143 int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex;
4145 if (hostHasSepNormals[i])
4147 b3FindClippingFaces(cpuSepNormals[i],
4148 &hostConvexData[shapeIndexA],
4149 &hostConvexData[shapeIndexB],
4150 hostBodyBuf[bodyIndexA].m_pos, hostBodyBuf[bodyIndexA].m_quat,
4151 hostBodyBuf[bodyIndexB].m_pos, hostBodyBuf[bodyIndexB].m_quat,
4152 &worldVertsA1CPU.at(0), &worldNormalsACPU.at(0),
4153 &worldVertsB1CPU.at(0),
4154 vertexFaceCapacity, minDist, maxDist,
4155 &hostVertices.at(0), &hostFaces.at(0),
4157 &hostVertices.at(0), &hostFaces.at(0),
4158 &hostIndices.at(0), &clippingFacesOutCPU.at(0), i);
4162 clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
4163 worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
4164 worldNormalsAGPU.copyFromHost(worldNormalsACPU);
4165 worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
4168 ///clip face B against face A, reduce contacts and append them to a global contact array
4171 if (clipConvexFacesAndFindContactsCPU)
4173 //b3AlignedObjectArray<b3Int4> hostPairs;
4174 //pairs->copyToHost(hostPairs);
4176 b3AlignedObjectArray<b3Vector3> hostSepNormals;
4177 m_sepNormals.copyToHost(hostSepNormals);
4178 b3AlignedObjectArray<int> hostHasSepAxis;
4179 m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
4181 b3AlignedObjectArray<b3Int4> hostClippingFaces;
4182 clippingFacesOutGPU.copyToHost(hostClippingFaces);
4183 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
4184 worldVertsB2CPU.resize(vertexFaceCapacity * nPairs);
4186 b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
4187 worldVertsA1GPU.copyToHost(worldVertsA1CPU);
4188 b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
4189 worldNormalsAGPU.copyToHost(worldNormalsACPU);
4191 b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
4192 worldVertsB1GPU.copyToHost(worldVertsB1CPU);
4195 __global const b3Float4* separatingNormals,
4196 __global const int* hasSeparatingAxis,
4197 __global b3Int4* clippingFacesOut,
4198 __global b3Float4* worldVertsA1,
4199 __global b3Float4* worldNormalsA1,
4200 __global b3Float4* worldVertsB1,
4201 __global b3Float4* worldVertsB2,
4202 int vertexFaceCapacity,
4205 for (int i = 0; i < nPairs; i++)
4207 clipFacesAndFindContactsKernel(
4208 &hostSepNormals.at(0),
4209 &hostHasSepAxis.at(0),
4210 &hostClippingFaces.at(0),
4211 &worldVertsA1CPU.at(0),
4212 &worldNormalsACPU.at(0),
4213 &worldVertsB1CPU.at(0),
4214 &worldVertsB2CPU.at(0),
4220 clippingFacesOutGPU.copyFromHost(hostClippingFaces);
4221 worldVertsB2GPU.copyFromHost(worldVertsB2CPU);
4225 B3_PROFILE("clipFacesAndFindContacts");
4226 //nContacts = m_totalContactsOut.at(0);
4227 //int h = m_hasSeparatingNormals.at(0);
4228 //int4 p = clippingFacesOutGPU.at(0);
4229 b3BufferInfoCL bInfo[] = {
4230 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4231 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4232 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4233 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
4234 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
4235 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
4236 b3BufferInfoCL(worldVertsB2GPU.getBufferCL())};
4238 b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts");
4239 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4240 launcher.setConst(vertexFaceCapacity);
4242 launcher.setConst(nPairs);
4244 launcher.setConst(debugMode);
4246 launcher.launch1D(num);
4251 nContacts = m_totalContactsOut.at(0);
4252 //printf("nContacts = %d\n",nContacts);
4254 int newContactCapacity = nContacts + nPairs;
4255 contactOut->reserve(newContactCapacity);
4257 if (reduceConvexContactsOnGPU)
4260 B3_PROFILE("newContactReductionKernel");
4261 b3BufferInfoCL bInfo[] =
4263 b3BufferInfoCL(pairs->getBufferCL(), true),
4264 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4265 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4266 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4267 b3BufferInfoCL(contactOut->getBufferCL()),
4268 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4269 b3BufferInfoCL(worldVertsB2GPU.getBufferCL()),
4270 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4272 b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel");
4273 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4274 launcher.setConst(vertexFaceCapacity);
4275 launcher.setConst(newContactCapacity);
4276 launcher.setConst(nPairs);
4279 launcher.launch1D(num);
4281 nContacts = m_totalContactsOut.at(0);
4282 contactOut->resize(nContacts);
4286 volatile int nGlobalContactsOut = nContacts;
4287 b3AlignedObjectArray<b3Int4> hostPairs;
4288 pairs->copyToHost(hostPairs);
4289 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
4290 bodyBuf->copyToHost(hostBodyBuf);
4291 b3AlignedObjectArray<b3Vector3> hostSepNormals;
4292 m_sepNormals.copyToHost(hostSepNormals);
4293 b3AlignedObjectArray<int> hostHasSepAxis;
4294 m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
4295 b3AlignedObjectArray<b3Contact4> hostContactsOut;
4296 contactOut->copyToHost(hostContactsOut);
4297 hostContactsOut.resize(newContactCapacity);
4299 b3AlignedObjectArray<b3Int4> hostClippingFaces;
4300 clippingFacesOutGPU.copyToHost(hostClippingFaces);
4301 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
4302 worldVertsB2GPU.copyToHost(worldVertsB2CPU);
4304 for (int i = 0; i < nPairs; i++)
4306 b3NewContactReductionKernel(&hostPairs.at(0),
4308 &hostSepNormals.at(0),
4309 &hostHasSepAxis.at(0),
4310 &hostContactsOut.at(0),
4311 &hostClippingFaces.at(0),
4312 &worldVertsB2CPU.at(0),
4313 &nGlobalContactsOut,
4320 nContacts = nGlobalContactsOut;
4321 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
4322 hostContactsOut.resize(nContacts);
4323 //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
4324 contactOut->copyFromHost(hostContactsOut);
4326 // b3Contact4 pt = contactOut->at(0);
4327 // printf("nContacts = %d\n",nContacts);
4331 else //breakupKernel
4335 b3BufferInfoCL bInfo[] = {
4336 b3BufferInfoCL(pairs->getBufferCL(), true),
4337 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4338 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4339 b3BufferInfoCL(convexData.getBufferCL(), true),
4340 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4341 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4342 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4343 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4344 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4345 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4346 b3BufferInfoCL(contactOut->getBufferCL()),
4347 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4348 b3LauncherCL launcher(m_queue, m_clipHullHullKernel, "m_clipHullHullKernel");
4349 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4350 launcher.setConst(nPairs);
4351 launcher.setConst(maxContactCapacity);
4354 launcher.launch1D(num);
4357 nContacts = m_totalContactsOut.at(0);
4358 if (nContacts >= maxContactCapacity)
4360 b3Error("Exceeded contact capacity (%d/%d)\n", nContacts, maxContactCapacity);
4361 nContacts = maxContactCapacity;
4363 contactOut->resize(nContacts);
4367 int nCompoundsPairs = m_gpuCompoundPairs.size();
4369 if (nCompoundsPairs)
4371 b3BufferInfoCL bInfo[] = {
4372 b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
4373 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4374 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4375 b3BufferInfoCL(convexData.getBufferCL(), true),
4376 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4377 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4378 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4379 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4380 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
4381 b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL(), true),
4382 b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL(), true),
4383 b3BufferInfoCL(contactOut->getBufferCL()),
4384 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4385 b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel, "m_clipCompoundsHullHullKernel");
4386 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4387 launcher.setConst(nCompoundsPairs);
4388 launcher.setConst(maxContactCapacity);
4390 int num = nCompoundsPairs;
4391 launcher.launch1D(num);
4394 nContacts = m_totalContactsOut.at(0);
4395 if (nContacts > maxContactCapacity)
4397 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
4398 nContacts = maxContactCapacity;
4400 contactOut->resize(nContacts);
4401 } //if nCompoundsPairs
4403 } //contactClippingOnGpu
4405 //printf("nContacts end = %d\n",nContacts);
4407 //printf("frameCount = %d\n",frameCount++);