2 #define TRIANGLE_NUM_CONVEX_FACES 5
6 #pragma OPENCL EXTENSION cl_amd_printf : enable
7 #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
8 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
9 #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
10 #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
12 #ifdef cl_ext_atomic_counters_32
13 #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
15 #define counter32_t volatile __global int*
18 #define GET_GROUP_IDX get_group_id(0)
19 #define GET_LOCAL_IDX get_local_id(0)
20 #define GET_GLOBAL_IDX get_global_id(0)
21 #define GET_GROUP_SIZE get_local_size(0)
22 #define GET_NUM_GROUPS get_num_groups(0)
23 #define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
24 #define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
25 #define AtomInc(x) atom_inc(&(x))
26 #define AtomInc1(x, out) out = atom_inc(&(x))
27 #define AppendInc(x, out) out = atomic_inc(x)
28 #define AtomAdd(x, value) atom_add(&(x), value)
29 #define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
30 #define AtomXhg(x, value) atom_xchg ( &(x), value )
35 typedef unsigned int u32;
39 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
40 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
41 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
42 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
46 #define GET_NPOINTS(x) (x).m_worldNormalOnB.w
50 #define SELECT_UINT4( b, a, condition ) select( b,a,condition )
52 #define make_float4 (float4)
53 #define make_float2 (float2)
54 #define make_uint4 (uint4)
55 #define make_int4 (int4)
56 #define make_uint2 (uint2)
57 #define make_int2 (int2)
61 float fastDiv(float numerator, float denominator)
63 return native_divide(numerator, denominator);
64 // return numerator/denominator;
68 float4 fastDiv4(float4 numerator, float4 denominator)
70 return native_divide(numerator, denominator);
75 float4 cross3(float4 a, float4 b)
83 float dot3F4(float4 a, float4 b)
85 float4 a1 = make_float4(a.xyz,0.f);
86 float4 b1 = make_float4(b.xyz,0.f);
91 float4 fastNormalize4(float4 v)
93 return fast_normalize(v);
97 ///////////////////////////////////////
99 ///////////////////////////////////////
101 typedef float4 Quaternion;
104 Quaternion qtMul(Quaternion a, Quaternion b);
107 Quaternion qtNormalize(Quaternion in);
110 float4 qtRotate(Quaternion q, float4 vec);
113 Quaternion qtInvert(Quaternion q);
119 Quaternion qtMul(Quaternion a, Quaternion b)
122 ans = cross3( a, b );
124 // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
125 ans.w = a.w*b.w - dot3F4(a, b);
130 Quaternion qtNormalize(Quaternion in)
132 return fastNormalize4(in);
133 // in /= length( in );
137 float4 qtRotate(Quaternion q, float4 vec)
139 Quaternion qInv = qtInvert( q );
142 float4 out = qtMul(qtMul(q,vcpy),qInv);
147 Quaternion qtInvert(Quaternion q)
149 return (Quaternion)(-q.xyz, q.w);
153 float4 qtInvRotate(const Quaternion q, float4 vec)
155 return qtRotate( qtInvert( q ), vec );
159 float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)
161 return qtRotate( *orientation, *p ) + (*translation);
167 float4 normalize3(const float4 a)
169 float4 n = make_float4(a.x, a.y, a.z, 0.f);
170 return fastNormalize4( n );
174 __inline float4 lerp3(const float4 a,const float4 b, float t)
176 return make_float4( a.x + (b.x - a.x) * t,
177 a.y + (b.y - a.y) * t,
178 a.z + (b.z - a.z) * t,
184 // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
185 int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)
191 //double-check next test
195 float4 firstVertex=pVtxIn[numVertsIn-1];
196 float4 endVertex = pVtxIn[0];
198 ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;
200 for (ve = 0; ve < numVertsIn; ve++)
202 endVertex=pVtxIn[ve];
203 de = dot3F4(planeNormalWS,endVertex)+planeEqWS;
208 // Start < 0, end < 0, so output endVertex
209 ppVtxOut[numVertsOut++] = endVertex;
213 // Start < 0, end >= 0, so output intersection
214 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
221 // Start >= 0, end < 0 so output intersection and end
222 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
223 ppVtxOut[numVertsOut++] = endVertex;
226 firstVertex = endVertex;
234 // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
235 int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)
241 //double-check next test
245 float4 firstVertex=pVtxIn[numVertsIn-1];
246 float4 endVertex = pVtxIn[0];
248 ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;
250 for (ve = 0; ve < numVertsIn; ve++)
252 endVertex=pVtxIn[ve];
254 de = dot3F4(planeNormalWS,endVertex)+planeEqWS;
260 // Start < 0, end < 0, so output endVertex
261 ppVtxOut[numVertsOut++] = endVertex;
265 // Start < 0, end >= 0, so output intersection
266 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
273 // Start >= 0, end < 0 so output intersection and end
274 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
275 ppVtxOut[numVertsOut++] = endVertex;
278 firstVertex = endVertex;
285 int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA,
286 const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,
287 float4* worldVertsB2, int capacityWorldVertsB2,
288 const float minDist, float maxDist,
289 __global const float4* vertices,
290 __global const b3GpuFace_t* faces,
291 __global const int* indices,
295 int numContactsOut = 0;
297 float4* pVtxIn = worldVertsB1;
298 float4* pVtxOut = worldVertsB2;
300 int numVertsIn = numWorldVertsB1;
305 float dmin = FLT_MAX;
306 for(int face=0;face<hullA->m_numFaces;face++)
308 const float4 Normal = make_float4(
309 faces[hullA->m_faceOffset+face].m_plane.x,
310 faces[hullA->m_faceOffset+face].m_plane.y,
311 faces[hullA->m_faceOffset+face].m_plane.z,0.f);
312 const float4 faceANormalWS = qtRotate(ornA,Normal);
314 float d = dot3F4(faceANormalWS,separatingNormal);
323 return numContactsOut;
325 b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];
327 // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
328 int numVerticesA = polyA.m_numIndices;
329 for(int e0=0;e0<numVerticesA;e0++)
331 const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];
332 const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];
333 const float4 edge0 = a - b;
334 const float4 WorldEdge0 = qtRotate(ornA,edge0);
335 float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
336 float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);
338 float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);
339 float4 worldA1 = transform(&a,&posA,&ornA);
340 float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);
342 float4 planeNormalWS = planeNormalWS1;
343 float planeEqWS=planeEqWS1;
346 //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
347 numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);
349 //btSwap(pVtxIn,pVtxOut);
350 float4* tmp = pVtxOut;
353 numVertsIn = numVertsOut;
358 // only keep points that are behind the witness face
360 float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
361 float localPlaneEq = polyA.m_plane.w;
362 float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);
363 float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);
364 for (int i=0;i<numVertsIn;i++)
366 float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
374 float4 pointInWorld = pVtxIn[i];
375 //resultOut.addContactPoint(separatingNormal,point,depth);
376 contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
381 return numContactsOut;
386 int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA,
387 const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,
388 float4* worldVertsB2, int capacityWorldVertsB2,
389 const float minDist, float maxDist,
390 const float4* verticesA,
391 const b3GpuFace_t* facesA,
393 __global const float4* verticesB,
394 __global const b3GpuFace_t* facesB,
395 __global const int* indicesB,
399 int numContactsOut = 0;
401 float4* pVtxIn = worldVertsB1;
402 float4* pVtxOut = worldVertsB2;
404 int numVertsIn = numWorldVertsB1;
409 float dmin = FLT_MAX;
410 for(int face=0;face<hullA->m_numFaces;face++)
412 const float4 Normal = make_float4(
413 facesA[hullA->m_faceOffset+face].m_plane.x,
414 facesA[hullA->m_faceOffset+face].m_plane.y,
415 facesA[hullA->m_faceOffset+face].m_plane.z,0.f);
416 const float4 faceANormalWS = qtRotate(ornA,Normal);
418 float d = dot3F4(faceANormalWS,separatingNormal);
427 return numContactsOut;
429 b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];
431 // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
432 int numVerticesA = polyA.m_numIndices;
433 for(int e0=0;e0<numVerticesA;e0++)
435 const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];
436 const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];
437 const float4 edge0 = a - b;
438 const float4 WorldEdge0 = qtRotate(ornA,edge0);
439 float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
440 float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);
442 float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);
443 float4 worldA1 = transform(&a,&posA,&ornA);
444 float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);
446 float4 planeNormalWS = planeNormalWS1;
447 float planeEqWS=planeEqWS1;
450 //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
451 numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);
453 //btSwap(pVtxIn,pVtxOut);
454 float4* tmp = pVtxOut;
457 numVertsIn = numVertsOut;
462 // only keep points that are behind the witness face
464 float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
465 float localPlaneEq = polyA.m_plane.w;
466 float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);
467 float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);
468 for (int i=0;i<numVertsIn;i++)
470 float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
478 float4 pointInWorld = pVtxIn[i];
479 //resultOut.addContactPoint(separatingNormal,point,depth);
480 contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
485 return numContactsOut;
488 int clipHullAgainstHull(const float4 separatingNormal,
489 __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,
490 const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,
491 float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,
492 const float minDist, float maxDist,
493 __global const float4* vertices,
494 __global const b3GpuFace_t* faces,
495 __global const int* indices,
496 float4* localContactsOut,
497 int localContactCapacity)
499 int numContactsOut = 0;
500 int numWorldVertsB1= 0;
504 float dmax = -FLT_MAX;
507 for(int face=0;face<hullB->m_numFaces;face++)
509 const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,
510 faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);
511 const float4 WorldNormal = qtRotate(ornB, Normal);
512 float d = dot3F4(WorldNormal,separatingNormal);
522 const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];
523 const int numVertices = polyB.m_numIndices;
524 for(int e0=0;e0<numVertices;e0++)
526 const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];
527 worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);
533 numContactsOut = clipFaceAgainstHull(separatingNormal, hullA,
535 worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,
537 indices,localContactsOut,localContactCapacity);
540 return numContactsOut;
544 int clipHullAgainstHullLocalA(const float4 separatingNormal,
545 const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,
546 const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,
547 float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,
548 const float minDist, float maxDist,
549 const float4* verticesA,
550 const b3GpuFace_t* facesA,
552 __global const float4* verticesB,
553 __global const b3GpuFace_t* facesB,
554 __global const int* indicesB,
555 float4* localContactsOut,
556 int localContactCapacity)
558 int numContactsOut = 0;
559 int numWorldVertsB1= 0;
563 float dmax = -FLT_MAX;
566 for(int face=0;face<hullB->m_numFaces;face++)
568 const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,
569 facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
570 const float4 WorldNormal = qtRotate(ornB, Normal);
571 float d = dot3F4(WorldNormal,separatingNormal);
581 const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];
582 const int numVertices = polyB.m_numIndices;
583 for(int e0=0;e0<numVertices;e0++)
585 const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
586 worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);
592 numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA,
594 worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,
595 verticesA,facesA,indicesA,
596 verticesB,facesB,indicesB,
597 localContactsOut,localContactCapacity);
600 return numContactsOut;
603 #define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];
604 #define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}
605 #define REDUCE_MAX(v, n) {int i=0;\
606 for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }
607 #define REDUCE_MIN(v, n) {int i=0;\
608 for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }
610 int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)
622 float4 center = make_float4(0.f);
625 for (int i=0;i<nPoints;i++)
627 center /= (float)nPoints;
632 // sample 4 directions
634 float4 aVector = p[0] - center;
635 float4 u = cross3( nearNormal, aVector );
636 float4 v = cross3( nearNormal, u );
641 //keep point with deepest penetration
653 for(int ie = 0; ie<nPoints; ie++ )
661 float4 r = p[ie]-center;
666 contactIdx[0].x = ie;
673 contactIdx[0].y = ie;
681 contactIdx[0].z = ie;
688 contactIdx[0].w = ie;
693 if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
695 //replace the first contact with minimum (todo: replace contact with least penetration)
696 contactIdx[0].x = minIndex;
704 int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)
711 if( nPoints == 0 ) return 0;
713 nPoints = min2( nPoints, 4 );
720 int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)
722 if( nPoints == 0 ) return 0;
724 nPoints = min2( nPoints, 64 );
726 float4 center = make_float4(0.f);
729 for (int i=0;i<nPoints;i++)
731 //memcpy( v, p, nPoints*sizeof(float4) );
732 PARALLEL_SUM( v, nPoints );
733 center = v[0]/(float)nPoints;
738 { // sample 4 directions
741 for(int i=0; i<nPoints; i++)
746 float4 aVector = p[0] - center;
747 float4 u = cross3( nearNormal, aVector );
748 float4 v = cross3( nearNormal, u );
754 float2 max00 = make_float2(0,FLT_MAX);
760 for(int ie = 0; ie<nPoints; ie++ )
765 float4 r = p[ie]-center;
767 a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);
770 a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);
773 a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);
776 a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);
779 for(int ie=0; ie<nPoints; ie++)
781 a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;
782 a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;
783 a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;
784 a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;
787 idx[0] = (int)a[0].x & 0xff;
788 idx[1] = (int)a[0].y & 0xff;
789 idx[2] = (int)a[0].z & 0xff;
790 idx[3] = (int)a[0].w & 0xff;
796 PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );
797 REDUCE_MIN( h, nPoints );
802 contactIdx[0] = idx[0];
803 contactIdx[1] = idx[1];
804 contactIdx[2] = idx[2];
805 contactIdx[3] = idx[3];
814 __kernel void extractManifoldAndAddContactKernel(__global const int4* pairs,
815 __global const b3RigidBodyData_t* rigidBodies,
816 __global const float4* closestPointsWorld,
817 __global const float4* separatingNormalsWorld,
818 __global const int* contactCounts,
819 __global const int* contactOffsets,
820 __global struct b3Contact4Data* restrict contactsOut,
821 counter32_t nContactsOut,
827 int idx = get_global_id(0);
831 float4 normal = separatingNormalsWorld[idx];
832 int nPoints = contactCounts[idx];
833 __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];
834 float4 localPoints[64];
835 for (int i=0;i<nPoints;i++)
837 localPoints[i] = pointsIn[i];
840 int contactIdx[4];// = {-1,-1,-1,-1};
846 int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);
849 AppendInc( nContactsOut, dstIdx );
850 if (dstIdx<contactCapacity)
852 __global struct b3Contact4Data* c = contactsOut + dstIdx;
853 c->m_worldNormalOnB = -normal;
854 c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
856 int bodyA = pairs[pairIndex].x;
857 int bodyB = pairs[pairIndex].y;
858 c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;
859 c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;
860 c->m_childIndexA = -1;
861 c->m_childIndexB = -1;
862 for (int i=0;i<nContacts;i++)
864 c->m_worldPosB[i] = localPoints[contactIdx[i]];
866 GET_NPOINTS(*c) = nContacts;
872 void trInverse(float4 translationIn, Quaternion orientationIn,
873 float4* translationOut, Quaternion* orientationOut)
875 *orientationOut = qtInvert(orientationIn);
876 *translationOut = qtRotate(*orientationOut, -translationIn);
879 void trMul(float4 translationA, Quaternion orientationA,
880 float4 translationB, Quaternion orientationB,
881 float4* translationOut, Quaternion* orientationOut)
883 *orientationOut = qtMul(orientationA,orientationB);
884 *translationOut = transform(&translationB,&translationA,&orientationA);
890 __kernel void clipHullHullKernel( __global int4* pairs,
891 __global const b3RigidBodyData_t* rigidBodies,
892 __global const b3Collidable_t* collidables,
893 __global const b3ConvexPolyhedronData_t* convexShapes,
894 __global const float4* vertices,
895 __global const float4* uniqueEdges,
896 __global const b3GpuFace_t* faces,
897 __global const int* indices,
898 __global const float4* separatingNormals,
899 __global const int* hasSeparatingAxis,
900 __global struct b3Contact4Data* restrict globalContactsOut,
901 counter32_t nGlobalContactsOut,
906 int i = get_global_id(0);
909 float4 worldVertsB1[64];
910 float4 worldVertsB2[64];
911 int capacityWorldVerts = 64;
913 float4 localContactsOut[64];
914 int localContactCapacity=64;
916 float minDist = -1e30f;
917 float maxDist = 0.02f;
922 int bodyIndexA = pairs[i].x;
923 int bodyIndexB = pairs[i].y;
925 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
926 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
928 if (hasSeparatingAxis[i])
932 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
933 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
938 int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],
939 &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],
940 rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,
941 rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,
942 worldVertsB1,worldVertsB2,capacityWorldVerts,
944 vertices,faces,indices,
945 localContactsOut,localContactCapacity);
947 if (numLocalContactsOut>0)
949 float4 normal = -separatingNormals[i];
950 int nPoints = numLocalContactsOut;
951 float4* pointsIn = localContactsOut;
952 int contactIdx[4];// = {-1,-1,-1,-1};
959 int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);
962 int mprContactIndex = pairs[pairIndex].z;
964 int dstIdx = mprContactIndex;
967 AppendInc( nGlobalContactsOut, dstIdx );
970 if (dstIdx<contactCapacity)
972 pairs[pairIndex].z = dstIdx;
974 __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
975 c->m_worldNormalOnB = -normal;
976 c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
977 c->m_batchIdx = pairIndex;
978 int bodyA = pairs[pairIndex].x;
979 int bodyB = pairs[pairIndex].y;
980 c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
981 c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
982 c->m_childIndexA = -1;
983 c->m_childIndexB = -1;
985 for (int i=0;i<nReducedContacts;i++)
987 //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact
988 if (i>0||(mprContactIndex<0))
990 c->m_worldPosB[i] = pointsIn[contactIdx[i]];
993 GET_NPOINTS(*c) = nReducedContacts;
996 }// if (numContactsOut>0)
997 }// if (hasSeparatingAxis[i])
1003 __kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs,
1004 __global const b3RigidBodyData_t* rigidBodies,
1005 __global const b3Collidable_t* collidables,
1006 __global const b3ConvexPolyhedronData_t* convexShapes,
1007 __global const float4* vertices,
1008 __global const float4* uniqueEdges,
1009 __global const b3GpuFace_t* faces,
1010 __global const int* indices,
1011 __global const b3GpuChildShape_t* gpuChildShapes,
1012 __global const float4* gpuCompoundSepNormalsOut,
1013 __global const int* gpuHasCompoundSepNormalsOut,
1014 __global struct b3Contact4Data* restrict globalContactsOut,
1015 counter32_t nGlobalContactsOut,
1016 int numCompoundPairs, int maxContactCapacity)
1019 int i = get_global_id(0);
1022 float4 worldVertsB1[64];
1023 float4 worldVertsB2[64];
1024 int capacityWorldVerts = 64;
1026 float4 localContactsOut[64];
1027 int localContactCapacity=64;
1029 float minDist = -1e30f;
1030 float maxDist = 0.02f;
1032 if (i<numCompoundPairs)
1035 if (gpuHasCompoundSepNormalsOut[i])
1038 int bodyIndexA = gpuCompoundPairs[i].x;
1039 int bodyIndexB = gpuCompoundPairs[i].y;
1041 int childShapeIndexA = gpuCompoundPairs[i].z;
1042 int childShapeIndexB = gpuCompoundPairs[i].w;
1044 int collidableIndexA = -1;
1045 int collidableIndexB = -1;
1047 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1048 float4 posA = rigidBodies[bodyIndexA].m_pos;
1050 float4 ornB = rigidBodies[bodyIndexB].m_quat;
1051 float4 posB = rigidBodies[bodyIndexB].m_pos;
1053 if (childShapeIndexA >= 0)
1055 collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1056 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1057 float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1058 float4 newPosA = qtRotate(ornA,childPosA)+posA;
1059 float4 newOrnA = qtMul(ornA,childOrnA);
1064 collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1067 if (childShapeIndexB>=0)
1069 collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1070 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1071 float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1072 float4 newPosB = transform(&childPosB,&posB,&ornB);
1073 float4 newOrnB = qtMul(ornB,childOrnB);
1078 collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1081 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1082 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1084 int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],
1085 &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],
1088 worldVertsB1,worldVertsB2,capacityWorldVerts,
1090 vertices,faces,indices,
1091 localContactsOut,localContactCapacity);
1093 if (numLocalContactsOut>0)
1095 float4 normal = -gpuCompoundSepNormalsOut[i];
1096 int nPoints = numLocalContactsOut;
1097 float4* pointsIn = localContactsOut;
1098 int contactIdx[4];// = {-1,-1,-1,-1};
1105 int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);
1108 AppendInc( nGlobalContactsOut, dstIdx );
1109 if ((dstIdx+nReducedContacts) < maxContactCapacity)
1111 __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
1112 c->m_worldNormalOnB = -normal;
1113 c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
1114 c->m_batchIdx = pairIndex;
1115 int bodyA = gpuCompoundPairs[pairIndex].x;
1116 int bodyB = gpuCompoundPairs[pairIndex].y;
1117 c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
1118 c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
1119 c->m_childIndexA = childShapeIndexA;
1120 c->m_childIndexB = childShapeIndexB;
1121 for (int i=0;i<nReducedContacts;i++)
1123 c->m_worldPosB[i] = pointsIn[contactIdx[i]];
1125 GET_NPOINTS(*c) = nReducedContacts;
1128 }// if (numContactsOut>0)
1129 }// if (gpuHasCompoundSepNormalsOut[i])
1130 }// if (i<numCompoundPairs)
1136 __kernel void sphereSphereCollisionKernel( __global const int4* pairs,
1137 __global const b3RigidBodyData_t* rigidBodies,
1138 __global const b3Collidable_t* collidables,
1139 __global const float4* separatingNormals,
1140 __global const int* hasSeparatingAxis,
1141 __global struct b3Contact4Data* restrict globalContactsOut,
1142 counter32_t nGlobalContactsOut,
1143 int contactCapacity,
1147 int i = get_global_id(0);
1152 int bodyIndexA = pairs[i].x;
1153 int bodyIndexB = pairs[i].y;
1155 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1156 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1158 if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&
1159 collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)
1162 float radiusA = collidables[collidableIndexA].m_radius;
1163 float radiusB = collidables[collidableIndexB].m_radius;
1164 float4 posA = rigidBodies[bodyIndexA].m_pos;
1165 float4 posB = rigidBodies[bodyIndexB].m_pos;
1167 float4 diff = posA-posB;
1168 float len = length(diff);
1170 ///iff distance positive, don't generate a new contact
1171 if ( len <= (radiusA+radiusB))
1173 ///distance (negative means penetration)
1174 float dist = len - (radiusA+radiusB);
1175 float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);
1178 normalOnSurfaceB = diff / len;
1180 float4 contactPosB = posB + normalOnSurfaceB*radiusB;
1181 contactPosB.w = dist;
1184 AppendInc( nGlobalContactsOut, dstIdx );
1185 if (dstIdx < contactCapacity)
1187 __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
1188 c->m_worldNormalOnB = -normalOnSurfaceB;
1189 c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
1190 c->m_batchIdx = pairIndex;
1191 int bodyA = pairs[pairIndex].x;
1192 int bodyB = pairs[pairIndex].y;
1193 c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
1194 c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
1195 c->m_worldPosB[0] = contactPosB;
1196 c->m_childIndexA = -1;
1197 c->m_childIndexB = -1;
1199 GET_NPOINTS(*c) = 1;
1200 }//if (dstIdx < numPairs)
1201 }//if ( len <= (radiusA+radiusB))
1202 }//SHAPE_SPHERE SHAPE_SPHERE
1206 __kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,
1207 __global const b3RigidBodyData_t* rigidBodies,
1208 __global const b3Collidable_t* collidables,
1209 __global const b3ConvexPolyhedronData_t* convexShapes,
1210 __global const float4* vertices,
1211 __global const float4* uniqueEdges,
1212 __global const b3GpuFace_t* faces,
1213 __global const int* indices,
1214 __global const b3GpuChildShape_t* gpuChildShapes,
1215 __global const float4* separatingNormals,
1216 __global struct b3Contact4Data* restrict globalContactsOut,
1217 counter32_t nGlobalContactsOut,
1218 int contactCapacity,
1219 int numConcavePairs)
1222 int i = get_global_id(0);
1225 float4 worldVertsB1[64];
1226 float4 worldVertsB2[64];
1227 int capacityWorldVerts = 64;
1229 float4 localContactsOut[64];
1230 int localContactCapacity=64;
1232 float minDist = -1e30f;
1233 float maxDist = 0.02f;
1235 if (i<numConcavePairs)
1237 //negative value means that the pair is invalid
1238 if (concavePairsIn[i].w<0)
1241 int bodyIndexA = concavePairsIn[i].x;
1242 int bodyIndexB = concavePairsIn[i].y;
1243 int f = concavePairsIn[i].z;
1244 int childShapeIndexA = f;
1246 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1247 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1249 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1250 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1252 ///////////////////////////////////////////////////////////////
1255 bool overlap = false;
1257 b3ConvexPolyhedronData_t convexPolyhedronA;
1259 //add 3 vertices of the triangle
1260 convexPolyhedronA.m_numVertices = 3;
1261 convexPolyhedronA.m_vertexOffset = 0;
1262 float4 localCenter = make_float4(0.f,0.f,0.f,0.f);
1264 b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
1266 float4 verticesA[3];
1267 for (int i=0;i<3;i++)
1269 int index = indices[face.m_indexOffset+i];
1270 float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];
1271 verticesA[i] = vert;
1272 localCenter += vert;
1275 float dmin = FLT_MAX;
1279 //a triangle has 3 unique edges
1280 convexPolyhedronA.m_numUniqueEdges = 3;
1281 convexPolyhedronA.m_uniqueEdgesOffset = 0;
1282 float4 uniqueEdgesA[3];
1284 uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);
1285 uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);
1286 uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);
1289 convexPolyhedronA.m_faceOffset = 0;
1291 float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
1293 b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];
1294 int indicesA[3+3+2+2+2];
1295 int curUsedIndices=0;
1298 //front size of triangle
1300 facesA[fidx].m_indexOffset=curUsedIndices;
1305 float c = face.m_plane.w;
1306 facesA[fidx].m_plane.x = normal.x;
1307 facesA[fidx].m_plane.y = normal.y;
1308 facesA[fidx].m_plane.z = normal.z;
1309 facesA[fidx].m_plane.w = c;
1310 facesA[fidx].m_numIndices=3;
1313 //back size of triangle
1315 facesA[fidx].m_indexOffset=curUsedIndices;
1320 float c = dot3F4(normal,verticesA[0]);
1321 float c1 = -face.m_plane.w;
1322 facesA[fidx].m_plane.x = -normal.x;
1323 facesA[fidx].m_plane.y = -normal.y;
1324 facesA[fidx].m_plane.z = -normal.z;
1325 facesA[fidx].m_plane.w = c;
1326 facesA[fidx].m_numIndices=3;
1330 bool addEdgePlanes = true;
1334 int prevVertex = numVertices-1;
1335 for (int i=0;i<numVertices;i++)
1337 float4 v0 = verticesA[i];
1338 float4 v1 = verticesA[prevVertex];
1340 float4 edgeNormal = normalize(cross(normal,v1-v0));
1341 float c = -dot3F4(edgeNormal,v0);
1343 facesA[fidx].m_numIndices = 2;
1344 facesA[fidx].m_indexOffset=curUsedIndices;
1345 indicesA[curUsedIndices++]=i;
1346 indicesA[curUsedIndices++]=prevVertex;
1348 facesA[fidx].m_plane.x = edgeNormal.x;
1349 facesA[fidx].m_plane.y = edgeNormal.y;
1350 facesA[fidx].m_plane.z = edgeNormal.z;
1351 facesA[fidx].m_plane.w = c;
1356 convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;
1357 convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);
1360 float4 posA = rigidBodies[bodyIndexA].m_pos;
1362 float4 posB = rigidBodies[bodyIndexB].m_pos;
1364 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1365 float4 ornB =rigidBodies[bodyIndexB].m_quat;
1368 float4 sepAxis = separatingNormals[i];
1370 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
1371 int childShapeIndexB =-1;
1372 if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1375 ///compound shape support
1377 childShapeIndexB = concavePairsIn[pairIndex].w;
1378 int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1379 shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1380 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1381 float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1382 float4 newPosB = transform(&childPosB,&posB,&ornB);
1383 float4 newOrnB = qtMul(ornB,childOrnB);
1389 ////////////////////////////////////////
1393 int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,
1394 &convexPolyhedronA, &convexShapes[shapeIndexB],
1397 worldVertsB1,worldVertsB2,capacityWorldVerts,
1399 &verticesA,&facesA,&indicesA,
1400 vertices,faces,indices,
1401 localContactsOut,localContactCapacity);
1403 if (numLocalContactsOut>0)
1405 float4 normal = -separatingNormals[i];
1406 int nPoints = numLocalContactsOut;
1407 float4* pointsIn = localContactsOut;
1408 int contactIdx[4];// = {-1,-1,-1,-1};
1415 int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);
1418 AppendInc( nGlobalContactsOut, dstIdx );
1419 if (dstIdx<contactCapacity)
1421 __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
1422 c->m_worldNormalOnB = -normal;
1423 c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
1424 c->m_batchIdx = pairIndex;
1425 int bodyA = concavePairsIn[pairIndex].x;
1426 int bodyB = concavePairsIn[pairIndex].y;
1427 c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
1428 c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
1429 c->m_childIndexA = childShapeIndexA;
1430 c->m_childIndexB = childShapeIndexB;
1431 for (int i=0;i<nReducedContacts;i++)
1433 c->m_worldPosB[i] = pointsIn[contactIdx[i]];
1435 GET_NPOINTS(*c) = nReducedContacts;
1438 }// if (numContactsOut>0)
1447 int findClippingFaces(const float4 separatingNormal,
1448 __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,
1449 const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,
1450 __global float4* worldVertsA1,
1451 __global float4* worldNormalsA1,
1452 __global float4* worldVertsB1,
1453 int capacityWorldVerts,
1454 const float minDist, float maxDist,
1455 __global const float4* vertices,
1456 __global const b3GpuFace_t* faces,
1457 __global const int* indices,
1458 __global int4* clippingFaces, int pairIndex)
1460 int numContactsOut = 0;
1461 int numWorldVertsB1= 0;
1464 int closestFaceB=-1;
1465 float dmax = -FLT_MAX;
1468 for(int face=0;face<hullB->m_numFaces;face++)
1470 const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,
1471 faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);
1472 const float4 WorldNormal = qtRotate(ornB, Normal);
1473 float d = dot3F4(WorldNormal,separatingNormal);
1477 closestFaceB = face;
1483 const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];
1484 const int numVertices = polyB.m_numIndices;
1485 for(int e0=0;e0<numVertices;e0++)
1487 const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];
1488 worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);
1492 int closestFaceA=-1;
1494 float dmin = FLT_MAX;
1495 for(int face=0;face<hullA->m_numFaces;face++)
1497 const float4 Normal = make_float4(
1498 faces[hullA->m_faceOffset+face].m_plane.x,
1499 faces[hullA->m_faceOffset+face].m_plane.y,
1500 faces[hullA->m_faceOffset+face].m_plane.z,
1502 const float4 faceANormalWS = qtRotate(ornA,Normal);
1504 float d = dot3F4(faceANormalWS,separatingNormal);
1508 closestFaceA = face;
1509 worldNormalsA1[pairIndex] = faceANormalWS;
1514 int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;
1515 for(int e0=0;e0<numVerticesA;e0++)
1517 const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
1518 worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);
1521 clippingFaces[pairIndex].x = closestFaceA;
1522 clippingFaces[pairIndex].y = closestFaceB;
1523 clippingFaces[pairIndex].z = numVerticesA;
1524 clippingFaces[pairIndex].w = numWorldVertsB1;
1527 return numContactsOut;
1532 int clipFaces(__global float4* worldVertsA1,
1533 __global float4* worldNormalsA1,
1534 __global float4* worldVertsB1,
1535 __global float4* worldVertsB2,
1536 int capacityWorldVertsB2,
1537 const float minDist, float maxDist,
1538 __global int4* clippingFaces,
1541 int numContactsOut = 0;
1543 int closestFaceA = clippingFaces[pairIndex].x;
1544 int closestFaceB = clippingFaces[pairIndex].y;
1545 int numVertsInA = clippingFaces[pairIndex].z;
1546 int numVertsInB = clippingFaces[pairIndex].w;
1548 int numVertsOut = 0;
1551 return numContactsOut;
1553 __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];
1554 __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];
1558 // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
1560 for(int e0=0;e0<numVertsInA;e0++)
1562 const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];
1563 const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];
1564 const float4 WorldEdge0 = aw - bw;
1565 float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];
1566 float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);
1567 float4 worldA1 = aw;
1568 float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);
1569 float4 planeNormalWS = planeNormalWS1;
1570 float planeEqWS=planeEqWS1;
1571 numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);
1572 __global float4* tmp = pVtxOut;
1575 numVertsInB = numVertsOut;
1579 //float4 planeNormalWS = worldNormalsA1[pairIndex];
1580 //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);
1584 /*for (int i=0;i<numVertsInB;i++)
1586 pVtxOut[i] = pVtxIn[i];
1594 float4 planeNormalWS = worldNormalsA1[pairIndex];
1595 float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);
1597 for (int i=0;i<numVertsInB;i++)
1599 float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
1600 if (depth <=minDist)
1605 if (depth <=maxDist)
1607 float4 pointInWorld = pVtxIn[i];
1608 pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
1612 clippingFaces[pairIndex].w =numContactsOut;
1615 return numContactsOut;
1622 __kernel void findClippingFacesKernel( __global const int4* pairs,
1623 __global const b3RigidBodyData_t* rigidBodies,
1624 __global const b3Collidable_t* collidables,
1625 __global const b3ConvexPolyhedronData_t* convexShapes,
1626 __global const float4* vertices,
1627 __global const float4* uniqueEdges,
1628 __global const b3GpuFace_t* faces,
1629 __global const int* indices,
1630 __global const float4* separatingNormals,
1631 __global const int* hasSeparatingAxis,
1632 __global int4* clippingFacesOut,
1633 __global float4* worldVertsA1,
1634 __global float4* worldNormalsA1,
1635 __global float4* worldVertsB1,
1636 int capacityWorldVerts,
1641 int i = get_global_id(0);
1645 float minDist = -1e30f;
1646 float maxDist = 0.02f;
1651 if (hasSeparatingAxis[i])
1654 int bodyIndexA = pairs[i].x;
1655 int bodyIndexB = pairs[i].y;
1657 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1658 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1660 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1661 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1665 int numLocalContactsOut = findClippingFaces(separatingNormals[i],
1666 &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],
1667 rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,
1668 rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,
1671 worldVertsB1,capacityWorldVerts,
1673 vertices,faces,indices,
1674 clippingFacesOut,i);
1677 }// if (hasSeparatingAxis[i])
1685 __kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals,
1686 __global const int* hasSeparatingAxis,
1687 __global int4* clippingFacesOut,
1688 __global float4* worldVertsA1,
1689 __global float4* worldNormalsA1,
1690 __global float4* worldVertsB1,
1691 __global float4* worldVertsB2,
1692 int vertexFaceCapacity,
1697 int i = get_global_id(0);
1701 float minDist = -1e30f;
1702 float maxDist = 0.02f;
1707 if (hasSeparatingAxis[i])
1710 // int bodyIndexA = pairs[i].x;
1711 // int bodyIndexB = pairs[i].y;
1713 int numLocalContactsOut = 0;
1715 int capacityWorldVertsB2 = vertexFaceCapacity;
1717 __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];
1718 __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];
1722 __global int4* clippingFaces = clippingFacesOut;
1725 int closestFaceA = clippingFaces[pairIndex].x;
1726 int closestFaceB = clippingFaces[pairIndex].y;
1727 int numVertsInA = clippingFaces[pairIndex].z;
1728 int numVertsInB = clippingFaces[pairIndex].w;
1730 int numVertsOut = 0;
1732 if (closestFaceA>=0)
1737 // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
1739 for(int e0=0;e0<numVertsInA;e0++)
1741 const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];
1742 const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];
1743 const float4 WorldEdge0 = aw - bw;
1744 float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];
1745 float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);
1746 float4 worldA1 = aw;
1747 float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);
1748 float4 planeNormalWS = planeNormalWS1;
1749 float planeEqWS=planeEqWS1;
1750 numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);
1751 __global float4* tmp = pVtxOut;
1754 numVertsInB = numVertsOut;
1758 float4 planeNormalWS = worldNormalsA1[pairIndex];
1759 float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);
1761 for (int i=0;i<numVertsInB;i++)
1763 float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
1764 if (depth <=minDist)
1769 if (depth <=maxDist)
1771 float4 pointInWorld = pVtxIn[i];
1772 pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
1777 clippingFaces[pairIndex].w =numLocalContactsOut;
1782 for (int i=0;i<numLocalContactsOut;i++)
1783 pVtxIn[i] = pVtxOut[i];
1785 }// if (hasSeparatingAxis[i])
1794 __kernel void newContactReductionKernel( __global int4* pairs,
1795 __global const b3RigidBodyData_t* rigidBodies,
1796 __global const float4* separatingNormals,
1797 __global const int* hasSeparatingAxis,
1798 __global struct b3Contact4Data* globalContactsOut,
1799 __global int4* clippingFaces,
1800 __global float4* worldVertsB2,
1801 volatile __global int* nGlobalContactsOut,
1802 int vertexFaceCapacity,
1803 int contactCapacity,
1807 int i = get_global_id(0);
1811 contactIdx=make_int4(0,1,2,3);
1816 if (hasSeparatingAxis[i])
1822 int nPoints = clippingFaces[pairIndex].w;
1827 __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];
1828 float4 normal = -separatingNormals[i];
1830 int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
1832 int mprContactIndex = pairs[pairIndex].z;
1834 int dstIdx = mprContactIndex;
1838 AppendInc( nGlobalContactsOut, dstIdx );
1842 if (dstIdx < contactCapacity)
1845 __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
1846 c->m_worldNormalOnB = -normal;
1847 c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
1848 c->m_batchIdx = pairIndex;
1849 int bodyA = pairs[pairIndex].x;
1850 int bodyB = pairs[pairIndex].y;
1852 pairs[pairIndex].w = dstIdx;
1854 c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
1855 c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
1856 c->m_childIndexA =-1;
1857 c->m_childIndexB =-1;
1859 switch (nReducedContacts)
1862 c->m_worldPosB[3] = pointsIn[contactIdx.w];
1864 c->m_worldPosB[2] = pointsIn[contactIdx.z];
1866 c->m_worldPosB[1] = pointsIn[contactIdx.y];
1868 if (mprContactIndex<0)//test
1869 c->m_worldPosB[0] = pointsIn[contactIdx.x];
1875 GET_NPOINTS(*c) = nReducedContacts;
1882 }// if (numContactsOut>0)
1883 }// if (hasSeparatingAxis[i])