[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / NarrowphaseCollision / kernels / satClipHullContacts.cl
1
2 #define TRIANGLE_NUM_CONVEX_FACES 5
3
4
5
6 #pragma OPENCL EXTENSION cl_amd_printf : enable
7 #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
8 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
9 #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
10 #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
11
12 #ifdef cl_ext_atomic_counters_32
13 #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
14 #else
15 #define counter32_t volatile __global int*
16 #endif
17
18 #define GET_GROUP_IDX get_group_id(0)
19 #define GET_LOCAL_IDX get_local_id(0)
20 #define GET_GLOBAL_IDX get_global_id(0)
21 #define GET_GROUP_SIZE get_local_size(0)
22 #define GET_NUM_GROUPS get_num_groups(0)
23 #define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
24 #define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
25 #define AtomInc(x) atom_inc(&(x))
26 #define AtomInc1(x, out) out = atom_inc(&(x))
27 #define AppendInc(x, out) out = atomic_inc(x)
28 #define AtomAdd(x, value) atom_add(&(x), value)
29 #define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
30 #define AtomXhg(x, value) atom_xchg ( &(x), value )
31
32 #define max2 max
33 #define min2 min
34
35 typedef unsigned int u32;
36
37
38
39 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
40 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
41 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
42 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
43
44
45
46 #define GET_NPOINTS(x) (x).m_worldNormalOnB.w
47
48
49
50 #define SELECT_UINT4( b, a, condition ) select( b,a,condition )
51
52 #define make_float4 (float4)
53 #define make_float2 (float2)
54 #define make_uint4 (uint4)
55 #define make_int4 (int4)
56 #define make_uint2 (uint2)
57 #define make_int2 (int2)
58
59
60 __inline
61 float fastDiv(float numerator, float denominator)
62 {
63         return native_divide(numerator, denominator);   
64 //      return numerator/denominator;   
65 }
66
67 __inline
68 float4 fastDiv4(float4 numerator, float4 denominator)
69 {
70         return native_divide(numerator, denominator);   
71 }
72
73
74 __inline
75 float4 cross3(float4 a, float4 b)
76 {
77         return cross(a,b);
78 }
79
80 //#define dot3F4 dot
81
82 __inline
83 float dot3F4(float4 a, float4 b)
84 {
85         float4 a1 = make_float4(a.xyz,0.f);
86         float4 b1 = make_float4(b.xyz,0.f);
87         return dot(a1, b1);
88 }
89
90 __inline
91 float4 fastNormalize4(float4 v)
92 {
93         return fast_normalize(v);
94 }
95
96
97 ///////////////////////////////////////
98 //      Quaternion
99 ///////////////////////////////////////
100
101 typedef float4 Quaternion;
102
103 __inline
104 Quaternion qtMul(Quaternion a, Quaternion b);
105
106 __inline
107 Quaternion qtNormalize(Quaternion in);
108
109 __inline
110 float4 qtRotate(Quaternion q, float4 vec);
111
112 __inline
113 Quaternion qtInvert(Quaternion q);
114
115
116
117
118 __inline
119 Quaternion qtMul(Quaternion a, Quaternion b)
120 {
121         Quaternion ans;
122         ans = cross3( a, b );
123         ans += a.w*b+b.w*a;
124 //      ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
125         ans.w = a.w*b.w - dot3F4(a, b);
126         return ans;
127 }
128
129 __inline
130 Quaternion qtNormalize(Quaternion in)
131 {
132         return fastNormalize4(in);
133 //      in /= length( in );
134 //      return in;
135 }
136 __inline
137 float4 qtRotate(Quaternion q, float4 vec)
138 {
139         Quaternion qInv = qtInvert( q );
140         float4 vcpy = vec;
141         vcpy.w = 0.f;
142         float4 out = qtMul(qtMul(q,vcpy),qInv);
143         return out;
144 }
145
146 __inline
147 Quaternion qtInvert(Quaternion q)
148 {
149         return (Quaternion)(-q.xyz, q.w);
150 }
151
152 __inline
153 float4 qtInvRotate(const Quaternion q, float4 vec)
154 {
155         return qtRotate( qtInvert( q ), vec );
156 }
157
158 __inline
159 float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)
160 {
161         return qtRotate( *orientation, *p ) + (*translation);
162 }
163
164
165
166 __inline
167 float4 normalize3(const float4 a)
168 {
169         float4 n = make_float4(a.x, a.y, a.z, 0.f);
170         return fastNormalize4( n );
171 }
172
173
174 __inline float4 lerp3(const float4 a,const float4 b, float  t)
175 {
176         return make_float4(     a.x + (b.x - a.x) * t,
177                                                 a.y + (b.y - a.y) * t,
178                                                 a.z + (b.z - a.z) * t,
179                                                 0.f);
180 }
181
182
183
184 // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
185 int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)
186 {
187         
188         int ve;
189         float ds, de;
190         int numVertsOut = 0;
191     //double-check next test
192         if (numVertsIn < 2)
193                 return 0;
194     
195         float4 firstVertex=pVtxIn[numVertsIn-1];
196         float4 endVertex = pVtxIn[0];
197         
198         ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;
199     
200         for (ve = 0; ve < numVertsIn; ve++)
201         {
202                 endVertex=pVtxIn[ve];
203                 de = dot3F4(planeNormalWS,endVertex)+planeEqWS;
204                 if (ds<0)
205                 {
206                         if (de<0)
207                         {
208                                 // Start < 0, end < 0, so output endVertex
209                                 ppVtxOut[numVertsOut++] = endVertex;
210                         }
211                         else
212                         {
213                                 // Start < 0, end >= 0, so output intersection
214                                 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
215                         }
216                 }
217                 else
218                 {
219                         if (de<0)
220                         {
221                                 // Start >= 0, end < 0 so output intersection and end
222                                 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
223                                 ppVtxOut[numVertsOut++] = endVertex;
224                         }
225                 }
226                 firstVertex = endVertex;
227                 ds = de;
228         }
229         return numVertsOut;
230 }
231
232
233
234 // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
235 int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)
236 {
237         
238         int ve;
239         float ds, de;
240         int numVertsOut = 0;
241 //double-check next test
242         if (numVertsIn < 2)
243                 return 0;
244
245         float4 firstVertex=pVtxIn[numVertsIn-1];
246         float4 endVertex = pVtxIn[0];
247         
248         ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;
249
250         for (ve = 0; ve < numVertsIn; ve++)
251         {
252                 endVertex=pVtxIn[ve];
253
254                 de = dot3F4(planeNormalWS,endVertex)+planeEqWS;
255
256                 if (ds<0)
257                 {
258                         if (de<0)
259                         {
260                                 // Start < 0, end < 0, so output endVertex
261                                 ppVtxOut[numVertsOut++] = endVertex;
262                         }
263                         else
264                         {
265                                 // Start < 0, end >= 0, so output intersection
266                                 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
267                         }
268                 }
269                 else
270                 {
271                         if (de<0)
272                         {
273                                 // Start >= 0, end < 0 so output intersection and end
274                                 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
275                                 ppVtxOut[numVertsOut++] = endVertex;
276                         }
277                 }
278                 firstVertex = endVertex;
279                 ds = de;
280         }
281         return numVertsOut;
282 }
283
284
285 int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA,  
286         const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,
287         float4* worldVertsB2, int capacityWorldVertsB2,
288         const float minDist, float maxDist,
289         __global const float4* vertices,
290         __global const b3GpuFace_t* faces,
291         __global const int* indices,
292         float4* contactsOut,
293         int contactCapacity)
294 {
295         int numContactsOut = 0;
296
297         float4* pVtxIn = worldVertsB1;
298         float4* pVtxOut = worldVertsB2;
299         
300         int numVertsIn = numWorldVertsB1;
301         int numVertsOut = 0;
302
303         int closestFaceA=-1;
304         {
305                 float dmin = FLT_MAX;
306                 for(int face=0;face<hullA->m_numFaces;face++)
307                 {
308                         const float4 Normal = make_float4(
309                                 faces[hullA->m_faceOffset+face].m_plane.x, 
310                                 faces[hullA->m_faceOffset+face].m_plane.y, 
311                                 faces[hullA->m_faceOffset+face].m_plane.z,0.f);
312                         const float4 faceANormalWS = qtRotate(ornA,Normal);
313                 
314                         float d = dot3F4(faceANormalWS,separatingNormal);
315                         if (d < dmin)
316                         {
317                                 dmin = d;
318                                 closestFaceA = face;
319                         }
320                 }
321         }
322         if (closestFaceA<0)
323                 return numContactsOut;
324
325         b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];
326
327         // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
328         int numVerticesA = polyA.m_numIndices;
329         for(int e0=0;e0<numVerticesA;e0++)
330         {
331                 const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];
332                 const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];
333                 const float4 edge0 = a - b;
334                 const float4 WorldEdge0 = qtRotate(ornA,edge0);
335                 float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
336                 float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);
337
338                 float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);
339                 float4 worldA1 = transform(&a,&posA,&ornA);
340                 float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);
341                 
342                 float4 planeNormalWS = planeNormalWS1;
343                 float planeEqWS=planeEqWS1;
344                 
345                 //clip face
346                 //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
347                 numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);
348
349                 //btSwap(pVtxIn,pVtxOut);
350                 float4* tmp = pVtxOut;
351                 pVtxOut = pVtxIn;
352                 pVtxIn = tmp;
353                 numVertsIn = numVertsOut;
354                 numVertsOut = 0;
355         }
356
357         
358         // only keep points that are behind the witness face
359         {
360                 float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
361                 float localPlaneEq = polyA.m_plane.w;
362                 float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);
363                 float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);
364                 for (int i=0;i<numVertsIn;i++)
365                 {
366                         float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
367                         if (depth <=minDist)
368                         {
369                                 depth = minDist;
370                         }
371
372                         if (depth <=maxDist)
373                         {
374                                 float4 pointInWorld = pVtxIn[i];
375                                 //resultOut.addContactPoint(separatingNormal,point,depth);
376                                 contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
377                         }
378                 }
379         }
380
381         return numContactsOut;
382 }
383
384
385
386 int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA,  
387         const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,
388         float4* worldVertsB2, int capacityWorldVertsB2,
389         const float minDist, float maxDist,
390         const float4* verticesA,
391         const b3GpuFace_t* facesA,
392         const int* indicesA,
393         __global const float4* verticesB,
394         __global const b3GpuFace_t* facesB,
395         __global const int* indicesB,
396         float4* contactsOut,
397         int contactCapacity)
398 {
399         int numContactsOut = 0;
400
401         float4* pVtxIn = worldVertsB1;
402         float4* pVtxOut = worldVertsB2;
403         
404         int numVertsIn = numWorldVertsB1;
405         int numVertsOut = 0;
406
407         int closestFaceA=-1;
408         {
409                 float dmin = FLT_MAX;
410                 for(int face=0;face<hullA->m_numFaces;face++)
411                 {
412                         const float4 Normal = make_float4(
413                                 facesA[hullA->m_faceOffset+face].m_plane.x, 
414                                 facesA[hullA->m_faceOffset+face].m_plane.y, 
415                                 facesA[hullA->m_faceOffset+face].m_plane.z,0.f);
416                         const float4 faceANormalWS = qtRotate(ornA,Normal);
417                 
418                         float d = dot3F4(faceANormalWS,separatingNormal);
419                         if (d < dmin)
420                         {
421                                 dmin = d;
422                                 closestFaceA = face;
423                         }
424                 }
425         }
426         if (closestFaceA<0)
427                 return numContactsOut;
428
429         b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];
430
431         // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
432         int numVerticesA = polyA.m_numIndices;
433         for(int e0=0;e0<numVerticesA;e0++)
434         {
435                 const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];
436                 const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];
437                 const float4 edge0 = a - b;
438                 const float4 WorldEdge0 = qtRotate(ornA,edge0);
439                 float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
440                 float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);
441
442                 float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);
443                 float4 worldA1 = transform(&a,&posA,&ornA);
444                 float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);
445                 
446                 float4 planeNormalWS = planeNormalWS1;
447                 float planeEqWS=planeEqWS1;
448                 
449                 //clip face
450                 //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
451                 numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);
452
453                 //btSwap(pVtxIn,pVtxOut);
454                 float4* tmp = pVtxOut;
455                 pVtxOut = pVtxIn;
456                 pVtxIn = tmp;
457                 numVertsIn = numVertsOut;
458                 numVertsOut = 0;
459         }
460
461         
462         // only keep points that are behind the witness face
463         {
464                 float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
465                 float localPlaneEq = polyA.m_plane.w;
466                 float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);
467                 float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);
468                 for (int i=0;i<numVertsIn;i++)
469                 {
470                         float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
471                         if (depth <=minDist)
472                         {
473                                 depth = minDist;
474                         }
475
476                         if (depth <=maxDist)
477                         {
478                                 float4 pointInWorld = pVtxIn[i];
479                                 //resultOut.addContactPoint(separatingNormal,point,depth);
480                                 contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
481                         }
482                 }
483         }
484
485         return numContactsOut;
486 }
487
488 int     clipHullAgainstHull(const float4 separatingNormal,
489         __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, 
490         const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, 
491         float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,
492         const float minDist, float maxDist,
493         __global const float4* vertices,
494         __global const b3GpuFace_t* faces,
495         __global const int* indices,
496         float4* localContactsOut,
497         int localContactCapacity)
498 {
499         int numContactsOut = 0;
500         int numWorldVertsB1= 0;
501
502
503         int closestFaceB=-1;
504         float dmax = -FLT_MAX;
505
506         {
507                 for(int face=0;face<hullB->m_numFaces;face++)
508                 {
509                         const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, 
510                                 faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);
511                         const float4 WorldNormal = qtRotate(ornB, Normal);
512                         float d = dot3F4(WorldNormal,separatingNormal);
513                         if (d > dmax)
514                         {
515                                 dmax = d;
516                                 closestFaceB = face;
517                         }
518                 }
519         }
520
521         {
522                 const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];
523                 const int numVertices = polyB.m_numIndices;
524                 for(int e0=0;e0<numVertices;e0++)
525                 {
526                         const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];
527                         worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);
528                 }
529         }
530
531         if (closestFaceB>=0)
532         {
533                 numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, 
534                                 posA,ornA,
535                                 worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,
536                                 faces,
537                                 indices,localContactsOut,localContactCapacity);
538         }
539
540         return numContactsOut;
541 }
542
543
544 int     clipHullAgainstHullLocalA(const float4 separatingNormal,
545         const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, 
546         const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, 
547         float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,
548         const float minDist, float maxDist,
549         const float4* verticesA,
550         const b3GpuFace_t* facesA,
551         const int* indicesA,
552         __global const float4* verticesB,
553         __global const b3GpuFace_t* facesB,
554         __global const int* indicesB,
555         float4* localContactsOut,
556         int localContactCapacity)
557 {
558         int numContactsOut = 0;
559         int numWorldVertsB1= 0;
560
561
562         int closestFaceB=-1;
563         float dmax = -FLT_MAX;
564
565         {
566                 for(int face=0;face<hullB->m_numFaces;face++)
567                 {
568                         const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, 
569                                 facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
570                         const float4 WorldNormal = qtRotate(ornB, Normal);
571                         float d = dot3F4(WorldNormal,separatingNormal);
572                         if (d > dmax)
573                         {
574                                 dmax = d;
575                                 closestFaceB = face;
576                         }
577                 }
578         }
579
580         {
581                 const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];
582                 const int numVertices = polyB.m_numIndices;
583                 for(int e0=0;e0<numVertices;e0++)
584                 {
585                         const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
586                         worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);
587                 }
588         }
589
590         if (closestFaceB>=0)
591         {
592                 numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, 
593                                 posA,ornA,
594                                 worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,
595                                 verticesA,facesA,indicesA,
596                                 verticesB,facesB,indicesB,
597                                 localContactsOut,localContactCapacity);
598         }
599
600         return numContactsOut;
601 }
602
603 #define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];
604 #define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}
605 #define REDUCE_MAX(v, n) {int i=0;\
606 for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }
607 #define REDUCE_MIN(v, n) {int i=0;\
608 for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }
609
610 int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)
611 {
612         if( nPoints == 0 )
613         return 0;
614     
615     if (nPoints <=4)
616         return nPoints;
617     
618     
619     if (nPoints >64)
620         nPoints = 64;
621     
622         float4 center = make_float4(0.f);
623         {
624                 
625                 for (int i=0;i<nPoints;i++)
626                         center += p[i];
627                 center /= (float)nPoints;
628         }
629     
630         
631     
632         //      sample 4 directions
633     
634     float4 aVector = p[0] - center;
635     float4 u = cross3( nearNormal, aVector );
636     float4 v = cross3( nearNormal, u );
637     u = normalize3( u );
638     v = normalize3( v );
639     
640     
641     //keep point with deepest penetration
642     float minW= FLT_MAX;
643     
644     int minIndex=-1;
645     
646     float4 maxDots;
647     maxDots.x = FLT_MIN;
648     maxDots.y = FLT_MIN;
649     maxDots.z = FLT_MIN;
650     maxDots.w = FLT_MIN;
651     
652     //  idx, distance
653     for(int ie = 0; ie<nPoints; ie++ )
654     {
655         if (p[ie].w<minW)
656         {
657             minW = p[ie].w;
658             minIndex=ie;
659         }
660         float f;
661         float4 r = p[ie]-center;
662         f = dot3F4( u, r );
663         if (f<maxDots.x)
664         {
665             maxDots.x = f;
666             contactIdx[0].x = ie;
667         }
668         
669         f = dot3F4( -u, r );
670         if (f<maxDots.y)
671         {
672             maxDots.y = f;
673             contactIdx[0].y = ie;
674         }
675         
676         
677         f = dot3F4( v, r );
678         if (f<maxDots.z)
679         {
680             maxDots.z = f;
681             contactIdx[0].z = ie;
682         }
683         
684         f = dot3F4( -v, r );
685         if (f<maxDots.w)
686         {
687             maxDots.w = f;
688             contactIdx[0].w = ie;
689         }
690         
691     }
692     
693     if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
694     {
695         //replace the first contact with minimum (todo: replace contact with least penetration)
696         contactIdx[0].x = minIndex;
697     }
698     
699     return 4;
700     
701 }
702
703
704 int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)
705 {
706     contactIdx[0] = 0;
707     contactIdx[1] = 1;
708     contactIdx[2] = 2;
709     contactIdx[3] = 3;
710     
711         if( nPoints == 0 ) return 0;
712     
713         nPoints = min2( nPoints, 4 );
714     return nPoints;
715     
716 }
717
718
719
720 int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)
721 {
722         if( nPoints == 0 ) return 0;
723
724         nPoints = min2( nPoints, 64 );
725
726         float4 center = make_float4(0.f);
727         {
728                 float4 v[64];
729                 for (int i=0;i<nPoints;i++)
730                         v[i] = p[i];
731                 //memcpy( v, p, nPoints*sizeof(float4) );
732                 PARALLEL_SUM( v, nPoints );
733                 center = v[0]/(float)nPoints;
734         }
735
736         
737
738         {       //      sample 4 directions
739                 if( nPoints < 4 )
740                 {
741                         for(int i=0; i<nPoints; i++) 
742                                 contactIdx[i] = i;
743                         return nPoints;
744                 }
745
746                 float4 aVector = p[0] - center;
747                 float4 u = cross3( nearNormal, aVector );
748                 float4 v = cross3( nearNormal, u );
749                 u = normalize3( u );
750                 v = normalize3( v );
751
752                 int idx[4];
753
754                 float2 max00 = make_float2(0,FLT_MAX);
755                 {
756                         //      idx, distance
757                         {
758                                 {
759                                         int4 a[64];
760                                         for(int ie = 0; ie<nPoints; ie++ )
761                                         {
762                                                 
763                                                 
764                                                 float f;
765                                                 float4 r = p[ie]-center;
766                                                 f = dot3F4( u, r );
767                                                 a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);
768
769                                                 f = dot3F4( -u, r );
770                                                 a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);
771
772                                                 f = dot3F4( v, r );
773                                                 a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);
774
775                                                 f = dot3F4( -v, r );
776                                                 a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);
777                                         }
778
779                                         for(int ie=0; ie<nPoints; ie++)
780                                         {
781                                                 a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;
782                                                 a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;
783                                                 a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;
784                                                 a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;
785                                         }
786
787                                         idx[0] = (int)a[0].x & 0xff;
788                                         idx[1] = (int)a[0].y & 0xff;
789                                         idx[2] = (int)a[0].z & 0xff;
790                                         idx[3] = (int)a[0].w & 0xff;
791                                 }
792                         }
793
794                         {
795                                 float2 h[64];
796                                 PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );
797                                 REDUCE_MIN( h, nPoints );
798                                 max00 = h[0];
799                         }
800                 }
801
802                 contactIdx[0] = idx[0];
803                 contactIdx[1] = idx[1];
804                 contactIdx[2] = idx[2];
805                 contactIdx[3] = idx[3];
806
807
808                 return 4;
809         }
810 }
811
812
813
814 __kernel void   extractManifoldAndAddContactKernel(__global const int4* pairs, 
815                                                                                                                                         __global const b3RigidBodyData_t* rigidBodies, 
816                                                                                                                                         __global const float4* closestPointsWorld,
817                                                                                                                                         __global const float4* separatingNormalsWorld,
818                                                                                                                                         __global const int* contactCounts,
819                                                                                                                                         __global const int* contactOffsets,
820                                                                                                                                         __global struct b3Contact4Data* restrict contactsOut,
821                                                                                                                                         counter32_t nContactsOut,
822                                                                                                                                         int contactCapacity,
823                                                                                                                                         int numPairs,
824                                                                                                                                         int pairIndex
825                                                                                                                                         )
826 {
827         int idx = get_global_id(0);
828         
829         if (idx<numPairs)
830         {
831                 float4 normal = separatingNormalsWorld[idx];
832                 int nPoints = contactCounts[idx];
833                 __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];
834                 float4 localPoints[64];
835                 for (int i=0;i<nPoints;i++)
836                 {
837                         localPoints[i] = pointsIn[i];
838                 }
839
840                 int contactIdx[4];// = {-1,-1,-1,-1};
841                 contactIdx[0] = -1;
842                 contactIdx[1] = -1;
843                 contactIdx[2] = -1;
844                 contactIdx[3] = -1;
845
846                 int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);
847
848                 int dstIdx;
849                 AppendInc( nContactsOut, dstIdx );
850                 if (dstIdx<contactCapacity)
851                 {
852                         __global struct b3Contact4Data* c = contactsOut + dstIdx;
853                         c->m_worldNormalOnB = -normal;
854                         c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
855                         c->m_batchIdx = idx;
856                         int bodyA = pairs[pairIndex].x;
857                         int bodyB = pairs[pairIndex].y;
858                         c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;
859                         c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;
860                         c->m_childIndexA = -1;
861                         c->m_childIndexB = -1;
862                         for (int i=0;i<nContacts;i++)
863                         {
864                                 c->m_worldPosB[i] = localPoints[contactIdx[i]];
865                         }
866                         GET_NPOINTS(*c) = nContacts;
867                 }
868         }
869 }
870
871
872 void    trInverse(float4 translationIn, Quaternion orientationIn,
873                 float4* translationOut, Quaternion* orientationOut)
874 {
875         *orientationOut = qtInvert(orientationIn);
876         *translationOut = qtRotate(*orientationOut, -translationIn);
877 }
878
879 void    trMul(float4 translationA, Quaternion orientationA,
880                                                 float4 translationB, Quaternion orientationB,
881                 float4* translationOut, Quaternion* orientationOut)
882 {
883         *orientationOut = qtMul(orientationA,orientationB);
884         *translationOut = transform(&translationB,&translationA,&orientationA);
885 }
886
887
888
889
890 __kernel void   clipHullHullKernel( __global int4* pairs, 
891                                                                                                                                                                         __global const b3RigidBodyData_t* rigidBodies, 
892                                                                                                                                                                         __global const b3Collidable_t* collidables,
893                                                                                                                                                                         __global const b3ConvexPolyhedronData_t* convexShapes, 
894                                                                                                                                                                         __global const float4* vertices,
895                                                                                                                                                                         __global const float4* uniqueEdges,
896                                                                                                                                                                         __global const b3GpuFace_t* faces,
897                                                                                                                                                                         __global const int* indices,
898                                                                                                                                                                         __global const float4* separatingNormals,
899                                                                                                                                                                         __global const int* hasSeparatingAxis,
900                                                                                                                                                                         __global struct b3Contact4Data* restrict globalContactsOut,
901                                                                                                                                                                         counter32_t nGlobalContactsOut,
902                                                                                                                                                                         int numPairs,
903                                                                                                                                                                         int contactCapacity)
904 {
905
906         int i = get_global_id(0);
907         int pairIndex = i;
908         
909         float4 worldVertsB1[64];
910         float4 worldVertsB2[64];
911         int capacityWorldVerts = 64;    
912
913         float4 localContactsOut[64];
914         int localContactCapacity=64;
915         
916         float minDist = -1e30f;
917         float maxDist = 0.02f;
918
919         if (i<numPairs)
920         {
921
922                 int bodyIndexA = pairs[i].x;
923                 int bodyIndexB = pairs[i].y;
924                         
925                 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
926                 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
927
928                 if (hasSeparatingAxis[i])
929                 {
930
931                         
932                         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
933                         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
934                         
935
936
937                 
938                         int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],
939                                                                                                                 &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],
940                                                                                                                 rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,
941                                                                                                           rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,
942                                                                                                           worldVertsB1,worldVertsB2,capacityWorldVerts,
943                                                                                                                 minDist, maxDist,
944                                                                                                                 vertices,faces,indices,
945                                                                                                                 localContactsOut,localContactCapacity);
946                                                                                                 
947                 if (numLocalContactsOut>0)
948                 {
949                                 float4 normal = -separatingNormals[i];
950                                 int nPoints = numLocalContactsOut;
951                                 float4* pointsIn = localContactsOut;
952                                 int contactIdx[4];// = {-1,-1,-1,-1};
953
954                                 contactIdx[0] = -1;
955                                 contactIdx[1] = -1;
956                                 contactIdx[2] = -1;
957                                 contactIdx[3] = -1;
958                 
959                                 int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);
960                 
961                                 
962                                 int mprContactIndex = pairs[pairIndex].z;
963
964                                 int dstIdx = mprContactIndex;
965                                 if (dstIdx<0)
966                                 {
967                                         AppendInc( nGlobalContactsOut, dstIdx );
968                                 }
969
970                                 if (dstIdx<contactCapacity)
971                                 {
972                                         pairs[pairIndex].z = dstIdx;
973
974                                         __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
975                                         c->m_worldNormalOnB = -normal;
976                                         c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
977                                         c->m_batchIdx = pairIndex;
978                                         int bodyA = pairs[pairIndex].x;
979                                         int bodyB = pairs[pairIndex].y;
980                                         c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
981                                         c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
982                                         c->m_childIndexA = -1;
983                                         c->m_childIndexB = -1;
984
985                                         for (int i=0;i<nReducedContacts;i++)
986                                         {
987                                         //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact
988                                                 if (i>0||(mprContactIndex<0))
989                                                 {
990                                                         c->m_worldPosB[i] = pointsIn[contactIdx[i]];
991                                                 }
992                                         }
993                                         GET_NPOINTS(*c) = nReducedContacts;
994                                 }
995                                 
996                         }//             if (numContactsOut>0)
997                 }//             if (hasSeparatingAxis[i])
998         }//     if (i<numPairs)
999
1000 }
1001
1002
1003 __kernel void   clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, 
1004                                                                                                                                                                         __global const b3RigidBodyData_t* rigidBodies, 
1005                                                                                                                                                                         __global const b3Collidable_t* collidables,
1006                                                                                                                                                                         __global const b3ConvexPolyhedronData_t* convexShapes, 
1007                                                                                                                                                                         __global const float4* vertices,
1008                                                                                                                                                                         __global const float4* uniqueEdges,
1009                                                                                                                                                                         __global const b3GpuFace_t* faces,
1010                                                                                                                                                                         __global const int* indices,
1011                                                                                                                                                                         __global const b3GpuChildShape_t* gpuChildShapes,
1012                                                                                                                                                                         __global const float4* gpuCompoundSepNormalsOut,
1013                                                                                                                                                                         __global const int* gpuHasCompoundSepNormalsOut,
1014                                                                                                                                                                         __global struct b3Contact4Data* restrict globalContactsOut,
1015                                                                                                                                                                         counter32_t nGlobalContactsOut,
1016                                                                                                                                                                         int numCompoundPairs, int maxContactCapacity)
1017 {
1018
1019         int i = get_global_id(0);
1020         int pairIndex = i;
1021         
1022         float4 worldVertsB1[64];
1023         float4 worldVertsB2[64];
1024         int capacityWorldVerts = 64;    
1025
1026         float4 localContactsOut[64];
1027         int localContactCapacity=64;
1028         
1029         float minDist = -1e30f;
1030         float maxDist = 0.02f;
1031
1032         if (i<numCompoundPairs)
1033         {
1034
1035                 if (gpuHasCompoundSepNormalsOut[i])
1036                 {
1037
1038                         int bodyIndexA = gpuCompoundPairs[i].x;
1039                         int bodyIndexB = gpuCompoundPairs[i].y;
1040                         
1041                         int childShapeIndexA = gpuCompoundPairs[i].z;
1042                         int childShapeIndexB = gpuCompoundPairs[i].w;
1043                         
1044                         int collidableIndexA = -1;
1045                         int collidableIndexB = -1;
1046                         
1047                         float4 ornA = rigidBodies[bodyIndexA].m_quat;
1048                         float4 posA = rigidBodies[bodyIndexA].m_pos;
1049                         
1050                         float4 ornB = rigidBodies[bodyIndexB].m_quat;
1051                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1052                                                                 
1053                         if (childShapeIndexA >= 0)
1054                         {
1055                                 collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1056                                 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1057                                 float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1058                                 float4 newPosA = qtRotate(ornA,childPosA)+posA;
1059                                 float4 newOrnA = qtMul(ornA,childOrnA);
1060                                 posA = newPosA;
1061                                 ornA = newOrnA;
1062                         } else
1063                         {
1064                                 collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1065                         }
1066                         
1067                         if (childShapeIndexB>=0)
1068                         {
1069                                 collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1070                                 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1071                                 float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1072                                 float4 newPosB = transform(&childPosB,&posB,&ornB);
1073                                 float4 newOrnB = qtMul(ornB,childOrnB);
1074                                 posB = newPosB;
1075                                 ornB = newOrnB;
1076                         } else
1077                         {
1078                                 collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;     
1079                         }
1080                         
1081                         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1082                         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1083                 
1084                         int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],
1085                                                                                                                 &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],
1086                                                                                                                 posA,ornA,
1087                                                                                                           posB,ornB,
1088                                                                                                           worldVertsB1,worldVertsB2,capacityWorldVerts,
1089                                                                                                                 minDist, maxDist,
1090                                                                                                                 vertices,faces,indices,
1091                                                                                                                 localContactsOut,localContactCapacity);
1092                                                                                                 
1093                 if (numLocalContactsOut>0)
1094                 {
1095                                 float4 normal = -gpuCompoundSepNormalsOut[i];
1096                                 int nPoints = numLocalContactsOut;
1097                                 float4* pointsIn = localContactsOut;
1098                                 int contactIdx[4];// = {-1,-1,-1,-1};
1099
1100                                 contactIdx[0] = -1;
1101                                 contactIdx[1] = -1;
1102                                 contactIdx[2] = -1;
1103                                 contactIdx[3] = -1;
1104                 
1105                                 int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);
1106                 
1107                                 int dstIdx;
1108                                 AppendInc( nGlobalContactsOut, dstIdx );
1109                                 if ((dstIdx+nReducedContacts) < maxContactCapacity)
1110                                 {
1111                                         __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
1112                                         c->m_worldNormalOnB = -normal;
1113                                         c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
1114                                         c->m_batchIdx = pairIndex;
1115                                         int bodyA = gpuCompoundPairs[pairIndex].x;
1116                                         int bodyB = gpuCompoundPairs[pairIndex].y;
1117                                         c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
1118                                         c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
1119                                         c->m_childIndexA = childShapeIndexA;
1120                                         c->m_childIndexB = childShapeIndexB;
1121                                         for (int i=0;i<nReducedContacts;i++)
1122                                         {
1123                                                 c->m_worldPosB[i] = pointsIn[contactIdx[i]];
1124                                         }
1125                                         GET_NPOINTS(*c) = nReducedContacts;
1126                                 }
1127                                 
1128                         }//             if (numContactsOut>0)
1129                 }//             if (gpuHasCompoundSepNormalsOut[i])
1130         }//     if (i<numCompoundPairs)
1131
1132 }
1133
1134
1135
1136 __kernel void   sphereSphereCollisionKernel( __global const int4* pairs, 
1137                                                                                                                                                                         __global const b3RigidBodyData_t* rigidBodies, 
1138                                                                                                                                                                         __global const b3Collidable_t* collidables,
1139                                                                                                                                                                         __global const float4* separatingNormals,
1140                                                                                                                                                                         __global const int* hasSeparatingAxis,
1141                                                                                                                                                                         __global struct b3Contact4Data* restrict globalContactsOut,
1142                                                                                                                                                                         counter32_t nGlobalContactsOut,
1143                                                                                                                                                                         int contactCapacity,
1144                                                                                                                                                                         int numPairs)
1145 {
1146
1147         int i = get_global_id(0);
1148         int pairIndex = i;
1149         
1150         if (i<numPairs)
1151         {
1152                 int bodyIndexA = pairs[i].x;
1153                 int bodyIndexB = pairs[i].y;
1154                         
1155                 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1156                 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1157
1158                 if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&
1159                         collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)
1160                 {
1161                         //sphere-sphere
1162                         float radiusA = collidables[collidableIndexA].m_radius;
1163                         float radiusB = collidables[collidableIndexB].m_radius;
1164                         float4 posA = rigidBodies[bodyIndexA].m_pos;
1165                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1166
1167                         float4 diff = posA-posB;
1168                         float len = length(diff);
1169                         
1170                         ///iff distance positive, don't generate a new contact
1171                         if ( len <= (radiusA+radiusB))
1172                         {
1173                                 ///distance (negative means penetration)
1174                                 float dist = len - (radiusA+radiusB);
1175                                 float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);
1176                                 if (len > 0.00001)
1177                                 {
1178                                         normalOnSurfaceB = diff / len;
1179                                 }
1180                                 float4 contactPosB = posB + normalOnSurfaceB*radiusB;
1181                                 contactPosB.w = dist;
1182                                                                 
1183                                 int dstIdx;
1184                                 AppendInc( nGlobalContactsOut, dstIdx );
1185                                 if (dstIdx < contactCapacity)
1186                                 {
1187                                         __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
1188                                         c->m_worldNormalOnB = -normalOnSurfaceB;
1189                                         c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
1190                                         c->m_batchIdx = pairIndex;
1191                                         int bodyA = pairs[pairIndex].x;
1192                                         int bodyB = pairs[pairIndex].y;
1193                                         c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
1194                                         c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
1195                                         c->m_worldPosB[0] = contactPosB;
1196                                         c->m_childIndexA = -1;
1197                                         c->m_childIndexB = -1;
1198
1199                                         GET_NPOINTS(*c) = 1;
1200                                 }//if (dstIdx < numPairs)
1201                         }//if ( len <= (radiusA+radiusB))
1202                 }//SHAPE_SPHERE SHAPE_SPHERE
1203         }//if (i<numPairs)
1204 }                               
1205
1206 __kernel void   clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,
1207                                                                                                                                                                         __global const b3RigidBodyData_t* rigidBodies, 
1208                                                                                                                                                                         __global const b3Collidable_t* collidables,
1209                                                                                                                                                                         __global const b3ConvexPolyhedronData_t* convexShapes, 
1210                                                                                                                                                                         __global const float4* vertices,
1211                                                                                                                                                                         __global const float4* uniqueEdges,
1212                                                                                                                                                                         __global const b3GpuFace_t* faces,
1213                                                                                                                                                                         __global const int* indices,
1214                                                                                                                                                                         __global const b3GpuChildShape_t* gpuChildShapes,
1215                                                                                                                                                                         __global const float4* separatingNormals,
1216                                                                                                                                                                         __global struct b3Contact4Data* restrict globalContactsOut,
1217                                                                                                                                                                         counter32_t nGlobalContactsOut,
1218                                                                                                                                                                         int contactCapacity,
1219                                                                                                                                                                         int numConcavePairs)
1220 {
1221
1222         int i = get_global_id(0);
1223         int pairIndex = i;
1224         
1225         float4 worldVertsB1[64];
1226         float4 worldVertsB2[64];
1227         int capacityWorldVerts = 64;    
1228
1229         float4 localContactsOut[64];
1230         int localContactCapacity=64;
1231         
1232         float minDist = -1e30f;
1233         float maxDist = 0.02f;
1234
1235         if (i<numConcavePairs)
1236         {
1237                 //negative value means that the pair is invalid
1238                 if (concavePairsIn[i].w<0)
1239                         return;
1240
1241                 int bodyIndexA = concavePairsIn[i].x;
1242                 int bodyIndexB = concavePairsIn[i].y;
1243                 int f = concavePairsIn[i].z;
1244                 int childShapeIndexA = f;
1245                 
1246                 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1247                 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1248                 
1249                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1250                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1251                 
1252                 ///////////////////////////////////////////////////////////////
1253                 
1254         
1255                 bool overlap = false;
1256                 
1257                 b3ConvexPolyhedronData_t convexPolyhedronA;
1258
1259         //add 3 vertices of the triangle
1260                 convexPolyhedronA.m_numVertices = 3;
1261                 convexPolyhedronA.m_vertexOffset = 0;
1262                 float4  localCenter = make_float4(0.f,0.f,0.f,0.f);
1263
1264                 b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
1265                 
1266                 float4 verticesA[3];
1267                 for (int i=0;i<3;i++)
1268                 {
1269                         int index = indices[face.m_indexOffset+i];
1270                         float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];
1271                         verticesA[i] = vert;
1272                         localCenter += vert;
1273                 }
1274
1275                 float dmin = FLT_MAX;
1276
1277                 int localCC=0;
1278
1279                 //a triangle has 3 unique edges
1280                 convexPolyhedronA.m_numUniqueEdges = 3;
1281                 convexPolyhedronA.m_uniqueEdgesOffset = 0;
1282                 float4 uniqueEdgesA[3];
1283                 
1284                 uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);
1285                 uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);
1286                 uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);
1287
1288
1289                 convexPolyhedronA.m_faceOffset = 0;
1290                                   
1291                 float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
1292                              
1293                 b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];
1294                 int indicesA[3+3+2+2+2];
1295                 int curUsedIndices=0;
1296                 int fidx=0;
1297
1298                 //front size of triangle
1299                 {
1300                         facesA[fidx].m_indexOffset=curUsedIndices;
1301                         indicesA[0] = 0;
1302                         indicesA[1] = 1;
1303                         indicesA[2] = 2;
1304                         curUsedIndices+=3;
1305                         float c = face.m_plane.w;
1306                         facesA[fidx].m_plane.x = normal.x;
1307                         facesA[fidx].m_plane.y = normal.y;
1308                         facesA[fidx].m_plane.z = normal.z;
1309                         facesA[fidx].m_plane.w = c;
1310                         facesA[fidx].m_numIndices=3;
1311                 }
1312                 fidx++;
1313                 //back size of triangle
1314                 {
1315                         facesA[fidx].m_indexOffset=curUsedIndices;
1316                         indicesA[3]=2;
1317                         indicesA[4]=1;
1318                         indicesA[5]=0;
1319                         curUsedIndices+=3;
1320                         float c = dot3F4(normal,verticesA[0]);
1321                         float c1 = -face.m_plane.w;
1322                         facesA[fidx].m_plane.x = -normal.x;
1323                         facesA[fidx].m_plane.y = -normal.y;
1324                         facesA[fidx].m_plane.z = -normal.z;
1325                         facesA[fidx].m_plane.w = c;
1326                         facesA[fidx].m_numIndices=3;
1327                 }
1328                 fidx++;
1329
1330                 bool addEdgePlanes = true;
1331                 if (addEdgePlanes)
1332                 {
1333                         int numVertices=3;
1334                         int prevVertex = numVertices-1;
1335                         for (int i=0;i<numVertices;i++)
1336                         {
1337                                 float4 v0 = verticesA[i];
1338                                 float4 v1 = verticesA[prevVertex];
1339                                             
1340                                 float4 edgeNormal = normalize(cross(normal,v1-v0));
1341                                 float c = -dot3F4(edgeNormal,v0);
1342
1343                                 facesA[fidx].m_numIndices = 2;
1344                                 facesA[fidx].m_indexOffset=curUsedIndices;
1345                                 indicesA[curUsedIndices++]=i;
1346                                 indicesA[curUsedIndices++]=prevVertex;
1347                                             
1348                                 facesA[fidx].m_plane.x = edgeNormal.x;
1349                                 facesA[fidx].m_plane.y = edgeNormal.y;
1350                                 facesA[fidx].m_plane.z = edgeNormal.z;
1351                                 facesA[fidx].m_plane.w = c;
1352                                 fidx++;
1353                                 prevVertex = i;
1354                         }
1355                 }
1356                 convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;
1357                 convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);
1358
1359
1360                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1361                 posA.w = 0.f;
1362                 float4 posB = rigidBodies[bodyIndexB].m_pos;
1363                 posB.w = 0.f;
1364                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1365                 float4 ornB =rigidBodies[bodyIndexB].m_quat;
1366
1367
1368                 float4 sepAxis = separatingNormals[i];
1369                 
1370                 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
1371                 int childShapeIndexB =-1;
1372                 if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1373                 {
1374                         ///////////////////
1375                         ///compound shape support
1376                         
1377                         childShapeIndexB = concavePairsIn[pairIndex].w;
1378                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1379                         shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1380                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1381                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1382                         float4 newPosB = transform(&childPosB,&posB,&ornB);
1383                         float4 newOrnB = qtMul(ornB,childOrnB);
1384                         posB = newPosB;
1385                         ornB = newOrnB;
1386                         
1387                 }
1388                 
1389                 ////////////////////////////////////////
1390                 
1391                 
1392                 
1393                 int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,
1394                                                                                                                 &convexPolyhedronA, &convexShapes[shapeIndexB],
1395                                                                                                                 posA,ornA,
1396                                                                                                           posB,ornB,
1397                                                                                                           worldVertsB1,worldVertsB2,capacityWorldVerts,
1398                                                                                                                 minDist, maxDist,
1399                                                                                                                 &verticesA,&facesA,&indicesA,
1400                                                                                                                 vertices,faces,indices,
1401                                                                                                                 localContactsOut,localContactCapacity);
1402                                                                                                 
1403                 if (numLocalContactsOut>0)
1404                 {
1405                         float4 normal = -separatingNormals[i];
1406                         int nPoints = numLocalContactsOut;
1407                         float4* pointsIn = localContactsOut;
1408                         int contactIdx[4];// = {-1,-1,-1,-1};
1409
1410                         contactIdx[0] = -1;
1411                         contactIdx[1] = -1;
1412                         contactIdx[2] = -1;
1413                         contactIdx[3] = -1;
1414         
1415                         int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);
1416         
1417                         int dstIdx;
1418                         AppendInc( nGlobalContactsOut, dstIdx );
1419                         if (dstIdx<contactCapacity)
1420                         {
1421                                 __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
1422                                 c->m_worldNormalOnB = -normal;
1423                                 c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
1424                                 c->m_batchIdx = pairIndex;
1425                                 int bodyA = concavePairsIn[pairIndex].x;
1426                                 int bodyB = concavePairsIn[pairIndex].y;
1427                                 c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
1428                                 c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
1429                                 c->m_childIndexA = childShapeIndexA;
1430                                 c->m_childIndexB = childShapeIndexB;
1431                                 for (int i=0;i<nReducedContacts;i++)
1432                                 {
1433                                         c->m_worldPosB[i] = pointsIn[contactIdx[i]];
1434                                 }
1435                                 GET_NPOINTS(*c) = nReducedContacts;
1436                         }
1437                                 
1438                 }//             if (numContactsOut>0)
1439         }//     if (i<numPairs)
1440 }
1441
1442
1443
1444
1445
1446
1447 int     findClippingFaces(const float4 separatingNormal,
1448                       __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,
1449                       const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,
1450                        __global float4* worldVertsA1,
1451                       __global float4* worldNormalsA1,
1452                       __global float4* worldVertsB1,
1453                       int capacityWorldVerts,
1454                       const float minDist, float maxDist,
1455                       __global const float4* vertices,
1456                       __global const b3GpuFace_t* faces,
1457                       __global const int* indices,
1458                       __global int4* clippingFaces, int pairIndex)
1459 {
1460         int numContactsOut = 0;
1461         int numWorldVertsB1= 0;
1462     
1463     
1464         int closestFaceB=-1;
1465         float dmax = -FLT_MAX;
1466     
1467         {
1468                 for(int face=0;face<hullB->m_numFaces;face++)
1469                 {
1470                         const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,
1471                                               faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);
1472                         const float4 WorldNormal = qtRotate(ornB, Normal);
1473                         float d = dot3F4(WorldNormal,separatingNormal);
1474                         if (d > dmax)
1475                         {
1476                                 dmax = d;
1477                                 closestFaceB = face;
1478                         }
1479                 }
1480         }
1481     
1482         {
1483                 const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];
1484                 const int numVertices = polyB.m_numIndices;
1485                 for(int e0=0;e0<numVertices;e0++)
1486                 {
1487                         const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];
1488                         worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);
1489                 }
1490         }
1491     
1492     int closestFaceA=-1;
1493         {
1494                 float dmin = FLT_MAX;
1495                 for(int face=0;face<hullA->m_numFaces;face++)
1496                 {
1497                         const float4 Normal = make_float4(
1498                                               faces[hullA->m_faceOffset+face].m_plane.x,
1499                                               faces[hullA->m_faceOffset+face].m_plane.y,
1500                                               faces[hullA->m_faceOffset+face].m_plane.z,
1501                                               0.f);
1502                         const float4 faceANormalWS = qtRotate(ornA,Normal);
1503             
1504                         float d = dot3F4(faceANormalWS,separatingNormal);
1505                         if (d < dmin)
1506                         {
1507                                 dmin = d;
1508                                 closestFaceA = face;
1509                 worldNormalsA1[pairIndex] = faceANormalWS;
1510                         }
1511                 }
1512         }
1513     
1514     int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;
1515         for(int e0=0;e0<numVerticesA;e0++)
1516         {
1517         const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
1518         worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);
1519     }
1520     
1521     clippingFaces[pairIndex].x = closestFaceA;
1522     clippingFaces[pairIndex].y = closestFaceB;
1523     clippingFaces[pairIndex].z = numVerticesA;
1524     clippingFaces[pairIndex].w = numWorldVertsB1;
1525     
1526     
1527         return numContactsOut;
1528 }
1529
1530
1531
1532 int clipFaces(__global float4* worldVertsA1,
1533               __global float4* worldNormalsA1,
1534               __global float4* worldVertsB1,
1535               __global float4* worldVertsB2, 
1536               int capacityWorldVertsB2,
1537               const float minDist, float maxDist,
1538               __global int4* clippingFaces,
1539               int pairIndex)
1540 {
1541         int numContactsOut = 0;
1542     
1543     int closestFaceA = clippingFaces[pairIndex].x;
1544     int closestFaceB = clippingFaces[pairIndex].y;
1545         int numVertsInA = clippingFaces[pairIndex].z;
1546         int numVertsInB = clippingFaces[pairIndex].w;
1547     
1548         int numVertsOut = 0;
1549     
1550         if (closestFaceA<0)
1551                 return numContactsOut;
1552     
1553     __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];
1554     __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];
1555     
1556     
1557         
1558         // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
1559     
1560         for(int e0=0;e0<numVertsInA;e0++)
1561         {
1562                 const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];
1563                 const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];
1564                 const float4 WorldEdge0 = aw - bw;
1565                 float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];
1566                 float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);
1567                 float4 worldA1 = aw;
1568                 float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);
1569                 float4 planeNormalWS = planeNormalWS1;
1570                 float planeEqWS=planeEqWS1;
1571                 numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);
1572                 __global float4* tmp = pVtxOut;
1573                 pVtxOut = pVtxIn;
1574                 pVtxIn = tmp;
1575                 numVertsInB = numVertsOut;
1576                 numVertsOut = 0;
1577         }
1578     
1579     //float4 planeNormalWS = worldNormalsA1[pairIndex];
1580     //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);
1581
1582
1583     
1584     /*for (int i=0;i<numVertsInB;i++)
1585     {
1586         pVtxOut[i] = pVtxIn[i];
1587     }*/
1588     
1589     
1590     
1591     
1592     //numVertsInB=0;
1593         
1594     float4 planeNormalWS = worldNormalsA1[pairIndex];
1595     float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);
1596
1597     for (int i=0;i<numVertsInB;i++)
1598     {
1599         float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
1600         if (depth <=minDist)
1601         {
1602             depth = minDist;
1603         }
1604         
1605         if (depth <=maxDist)
1606         {
1607             float4 pointInWorld = pVtxIn[i];
1608             pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
1609         }
1610     }
1611    
1612     clippingFaces[pairIndex].w =numContactsOut;
1613    
1614     
1615         return numContactsOut;
1616
1617 }
1618
1619
1620
1621
1622 __kernel void   findClippingFacesKernel(  __global const int4* pairs,
1623                                         __global const b3RigidBodyData_t* rigidBodies,
1624                                         __global const b3Collidable_t* collidables,
1625                                         __global const b3ConvexPolyhedronData_t* convexShapes,
1626                                         __global const float4* vertices,
1627                                         __global const float4* uniqueEdges,
1628                                         __global const b3GpuFace_t* faces,
1629                                         __global const int* indices,
1630                                         __global const float4* separatingNormals,
1631                                         __global const int* hasSeparatingAxis,
1632                                         __global int4* clippingFacesOut,
1633                                         __global float4* worldVertsA1,
1634                                         __global float4* worldNormalsA1,
1635                                         __global float4* worldVertsB1,
1636                                         int capacityWorldVerts,
1637                                         int numPairs
1638                                         )
1639 {
1640     
1641         int i = get_global_id(0);
1642         int pairIndex = i;
1643     
1644         
1645         float minDist = -1e30f;
1646         float maxDist = 0.02f;
1647     
1648         if (i<numPairs)
1649         {
1650         
1651                 if (hasSeparatingAxis[i])
1652                 {
1653             
1654                         int bodyIndexA = pairs[i].x;
1655                         int bodyIndexB = pairs[i].y;
1656                         
1657                         int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1658                         int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1659                         
1660                         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1661                         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1662                         
1663             
1664             
1665                         int numLocalContactsOut = findClippingFaces(separatingNormals[i],
1666                                                         &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],
1667                                                         rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,
1668                                                         rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,
1669                                                         worldVertsA1,
1670                                                         worldNormalsA1,
1671                                                         worldVertsB1,capacityWorldVerts,
1672                                                         minDist, maxDist,
1673                                                         vertices,faces,indices,
1674                                                         clippingFacesOut,i);
1675             
1676             
1677                 }//             if (hasSeparatingAxis[i])
1678         }//     if (i<numPairs)
1679     
1680 }
1681
1682
1683
1684
1685 __kernel void   clipFacesAndFindContactsKernel(    __global const float4* separatingNormals,
1686                                                    __global const int* hasSeparatingAxis,
1687                                                    __global int4* clippingFacesOut,
1688                                                    __global float4* worldVertsA1,
1689                                                    __global float4* worldNormalsA1,
1690                                                    __global float4* worldVertsB1,
1691                                                    __global float4* worldVertsB2,
1692                                                     int vertexFaceCapacity,
1693                                                    int numPairs,
1694                                                                                 int debugMode
1695                                                    )
1696 {
1697     int i = get_global_id(0);
1698         int pairIndex = i;
1699         
1700     
1701         float minDist = -1e30f;
1702         float maxDist = 0.02f;
1703     
1704         if (i<numPairs)
1705         {
1706         
1707                 if (hasSeparatingAxis[i])
1708                 {
1709             
1710 //                      int bodyIndexA = pairs[i].x;
1711         //              int bodyIndexB = pairs[i].y;
1712                     
1713             int numLocalContactsOut = 0;
1714
1715             int capacityWorldVertsB2 = vertexFaceCapacity;
1716             
1717             __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];
1718             __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];
1719             
1720
1721             {
1722                 __global int4* clippingFaces = clippingFacesOut;
1723             
1724                 
1725                 int closestFaceA = clippingFaces[pairIndex].x;
1726                 int closestFaceB = clippingFaces[pairIndex].y;
1727                 int numVertsInA = clippingFaces[pairIndex].z;
1728                 int numVertsInB = clippingFaces[pairIndex].w;
1729                 
1730                 int numVertsOut = 0;
1731                 
1732                 if (closestFaceA>=0)
1733                 {
1734                     
1735                     
1736                     
1737                     // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
1738                     
1739                     for(int e0=0;e0<numVertsInA;e0++)
1740                     {
1741                         const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];
1742                         const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];
1743                         const float4 WorldEdge0 = aw - bw;
1744                         float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];
1745                         float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);
1746                         float4 worldA1 = aw;
1747                         float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);
1748                         float4 planeNormalWS = planeNormalWS1;
1749                         float planeEqWS=planeEqWS1;
1750                         numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);
1751                         __global float4* tmp = pVtxOut;
1752                         pVtxOut = pVtxIn;
1753                         pVtxIn = tmp;
1754                         numVertsInB = numVertsOut;
1755                         numVertsOut = 0;
1756                     }
1757                     
1758                     float4 planeNormalWS = worldNormalsA1[pairIndex];
1759                     float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);
1760                     
1761                     for (int i=0;i<numVertsInB;i++)
1762                     {
1763                         float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
1764                         if (depth <=minDist)
1765                         {
1766                             depth = minDist;
1767                         }
1768                         
1769                         if (depth <=maxDist)
1770                         {
1771                             float4 pointInWorld = pVtxIn[i];
1772                             pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
1773                         }
1774                     }
1775                     
1776                 }
1777                 clippingFaces[pairIndex].w =numLocalContactsOut;
1778                 
1779
1780             }
1781             
1782             for (int i=0;i<numLocalContactsOut;i++)
1783                 pVtxIn[i] = pVtxOut[i];
1784                 
1785                 }//             if (hasSeparatingAxis[i])
1786         }//     if (i<numPairs)
1787     
1788 }
1789
1790
1791
1792
1793
1794 __kernel void   newContactReductionKernel( __global int4* pairs,
1795                                                    __global const b3RigidBodyData_t* rigidBodies,
1796                                                    __global const float4* separatingNormals,
1797                                                    __global const int* hasSeparatingAxis,
1798                                                    __global struct b3Contact4Data* globalContactsOut,
1799                                                    __global int4* clippingFaces,
1800                                                    __global float4* worldVertsB2,
1801                                                    volatile __global int* nGlobalContactsOut,
1802                                                    int vertexFaceCapacity,
1803                                                                                                    int contactCapacity,
1804                                                    int numPairs
1805                                                    )
1806 {
1807     int i = get_global_id(0);
1808         int pairIndex = i;
1809         
1810     int4 contactIdx;
1811     contactIdx=make_int4(0,1,2,3);
1812     
1813         if (i<numPairs)
1814         {
1815         
1816                 if (hasSeparatingAxis[i])
1817                 {
1818             
1819                         
1820             
1821             
1822                         int nPoints = clippingFaces[pairIndex].w;
1823            
1824             if (nPoints>0)
1825             {
1826
1827                  __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];
1828                 float4 normal = -separatingNormals[i];
1829                 
1830                 int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
1831             
1832                                 int mprContactIndex = pairs[pairIndex].z;
1833
1834                 int dstIdx = mprContactIndex;
1835
1836                                 if (dstIdx<0)
1837                                 {
1838                         AppendInc( nGlobalContactsOut, dstIdx );
1839                                 }
1840 //#if 0
1841                 
1842                                 if (dstIdx < contactCapacity)
1843                                 {
1844
1845                                         __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
1846                                         c->m_worldNormalOnB = -normal;
1847                                         c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
1848                                         c->m_batchIdx = pairIndex;
1849                                         int bodyA = pairs[pairIndex].x;
1850                                         int bodyB = pairs[pairIndex].y;
1851
1852                                         pairs[pairIndex].w = dstIdx;
1853
1854                                         c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
1855                                         c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
1856                     c->m_childIndexA =-1;
1857                                         c->m_childIndexB =-1;
1858
1859                     switch (nReducedContacts)
1860                     {
1861                         case 4:
1862                             c->m_worldPosB[3] = pointsIn[contactIdx.w];
1863                         case 3:
1864                             c->m_worldPosB[2] = pointsIn[contactIdx.z];
1865                         case 2:
1866                             c->m_worldPosB[1] = pointsIn[contactIdx.y];
1867                         case 1:
1868                                                         if (mprContactIndex<0)//test
1869                                     c->m_worldPosB[0] = pointsIn[contactIdx.x];
1870                         default:
1871                         {
1872                         }
1873                     };
1874                     
1875                                         GET_NPOINTS(*c) = nReducedContacts;
1876                     
1877                  }
1878                  
1879                 
1880 //#endif
1881                                 
1882                         }//             if (numContactsOut>0)
1883                 }//             if (hasSeparatingAxis[i])
1884         }//     if (i<numPairs)
1885
1886     
1887     
1888 }