[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / NarrowphaseCollision / kernels / sat.cl
1 //keep this enum in sync with the CPU version (in btCollidable.h)
2 //written by Erwin Coumans
3
4
5 #define SHAPE_CONVEX_HULL 3
6 #define SHAPE_CONCAVE_TRIMESH 5
7 #define TRIANGLE_NUM_CONVEX_FACES 5
8 #define SHAPE_COMPOUND_OF_CONVEX_HULLS 6
9
10 #define B3_MAX_STACK_DEPTH 256
11
12
13 typedef unsigned int u32;
14
15 ///keep this in sync with btCollidable.h
16 typedef struct
17 {
18         union {
19                 int m_numChildShapes;
20                 int m_bvhIndex;
21         };
22         union
23         {
24                 float m_radius;
25                 int     m_compoundBvhIndex;
26         };
27         
28         int m_shapeType;
29         int m_shapeIndex;
30         
31 } btCollidableGpu;
32
33 #define MAX_NUM_PARTS_IN_BITS 10
34
35 ///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.
36 ///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
37 typedef struct
38 {
39         //12 bytes
40         unsigned short int      m_quantizedAabbMin[3];
41         unsigned short int      m_quantizedAabbMax[3];
42         //4 bytes
43         int     m_escapeIndexOrTriangleIndex;
44 } b3QuantizedBvhNode;
45
46 typedef struct
47 {
48         float4          m_aabbMin;
49         float4          m_aabbMax;
50         float4          m_quantization;
51         int                     m_numNodes;
52         int                     m_numSubTrees;
53         int                     m_nodeOffset;
54         int                     m_subTreeOffset;
55
56 } b3BvhInfo;
57
58
59 int     getTriangleIndex(const b3QuantizedBvhNode* rootNode)
60 {
61         unsigned int x=0;
62         unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
63         // Get only the lower bits where the triangle index is stored
64         return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
65 }
66
67 int     getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)
68 {
69         unsigned int x=0;
70         unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
71         // Get only the lower bits where the triangle index is stored
72         return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
73 }
74
75 int isLeafNode(const b3QuantizedBvhNode* rootNode)
76 {
77         //skipindex is negative (internal node), triangleindex >=0 (leafnode)
78         return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
79 }
80
81 int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)
82 {
83         //skipindex is negative (internal node), triangleindex >=0 (leafnode)
84         return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
85 }
86         
87 int getEscapeIndex(const b3QuantizedBvhNode* rootNode)
88 {
89         return -rootNode->m_escapeIndexOrTriangleIndex;
90 }
91
92 int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)
93 {
94         return -rootNode->m_escapeIndexOrTriangleIndex;
95 }
96
97
98 typedef struct
99 {
100         //12 bytes
101         unsigned short int      m_quantizedAabbMin[3];
102         unsigned short int      m_quantizedAabbMax[3];
103         //4 bytes, points to the root of the subtree
104         int                     m_rootNodeIndex;
105         //4 bytes
106         int                     m_subtreeSize;
107         int                     m_padding[3];
108 } b3BvhSubtreeInfo;
109
110
111
112
113
114
115
116 typedef struct
117 {
118         float4  m_childPosition;
119         float4  m_childOrientation;
120         int m_shapeIndex;
121         int m_unused0;
122         int m_unused1;
123         int m_unused2;
124 } btGpuChildShape;
125
126
127 typedef struct
128 {
129         float4 m_pos;
130         float4 m_quat;
131         float4 m_linVel;
132         float4 m_angVel;
133
134         u32 m_collidableIdx;
135         float m_invMass;
136         float m_restituitionCoeff;
137         float m_frictionCoeff;
138 } BodyData;
139
140
141 typedef struct  
142 {
143         float4          m_localCenter;
144         float4          m_extents;
145         float4          mC;
146         float4          mE;
147         
148         float                   m_radius;
149         int     m_faceOffset;
150         int m_numFaces;
151         int     m_numVertices;
152
153         int m_vertexOffset;
154         int     m_uniqueEdgesOffset;
155         int     m_numUniqueEdges;
156         int m_unused;
157 } ConvexPolyhedronCL;
158
159 typedef struct 
160 {
161         union
162         {
163                 float4  m_min;
164                 float   m_minElems[4];
165                 int                     m_minIndices[4];
166         };
167         union
168         {
169                 float4  m_max;
170                 float   m_maxElems[4];
171                 int                     m_maxIndices[4];
172         };
173 } btAabbCL;
174
175 #include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
176 #include "Bullet3Common/shared/b3Int2.h"
177
178
179
180 typedef struct
181 {
182         float4 m_plane;
183         int m_indexOffset;
184         int m_numIndices;
185 } btGpuFace;
186
187 #define make_float4 (float4)
188
189
190 __inline
191 float4 cross3(float4 a, float4 b)
192 {
193         return cross(a,b);
194
195         
196 //      float4 a1 = make_float4(a.xyz,0.f);
197 //      float4 b1 = make_float4(b.xyz,0.f);
198
199 //      return cross(a1,b1);
200
201 //float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);
202         
203         //      float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);
204         
205         //return c;
206 }
207
208 __inline
209 float dot3F4(float4 a, float4 b)
210 {
211         float4 a1 = make_float4(a.xyz,0.f);
212         float4 b1 = make_float4(b.xyz,0.f);
213         return dot(a1, b1);
214 }
215
216 __inline
217 float4 fastNormalize4(float4 v)
218 {
219         v = make_float4(v.xyz,0.f);
220         return fast_normalize(v);
221 }
222
223
224 ///////////////////////////////////////
225 //      Quaternion
226 ///////////////////////////////////////
227
228 typedef float4 Quaternion;
229
230 __inline
231 Quaternion qtMul(Quaternion a, Quaternion b);
232
233 __inline
234 Quaternion qtNormalize(Quaternion in);
235
236 __inline
237 float4 qtRotate(Quaternion q, float4 vec);
238
239 __inline
240 Quaternion qtInvert(Quaternion q);
241
242
243
244
245 __inline
246 Quaternion qtMul(Quaternion a, Quaternion b)
247 {
248         Quaternion ans;
249         ans = cross3( a, b );
250         ans += a.w*b+b.w*a;
251 //      ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
252         ans.w = a.w*b.w - dot3F4(a, b);
253         return ans;
254 }
255
256 __inline
257 Quaternion qtNormalize(Quaternion in)
258 {
259         return fastNormalize4(in);
260 //      in /= length( in );
261 //      return in;
262 }
263 __inline
264 float4 qtRotate(Quaternion q, float4 vec)
265 {
266         Quaternion qInv = qtInvert( q );
267         float4 vcpy = vec;
268         vcpy.w = 0.f;
269         float4 out = qtMul(qtMul(q,vcpy),qInv);
270         return out;
271 }
272
273 __inline
274 Quaternion qtInvert(Quaternion q)
275 {
276         return (Quaternion)(-q.xyz, q.w);
277 }
278
279 __inline
280 float4 qtInvRotate(const Quaternion q, float4 vec)
281 {
282         return qtRotate( qtInvert( q ), vec );
283 }
284
285 __inline
286 float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)
287 {
288         return qtRotate( *orientation, *p ) + (*translation);
289 }
290
291
292
293 __inline
294 float4 normalize3(const float4 a)
295 {
296         float4 n = make_float4(a.x, a.y, a.z, 0.f);
297         return fastNormalize4( n );
298 }
299
300 inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, 
301 const float4* dir, const float4* vertices, float* min, float* max)
302 {
303         min[0] = FLT_MAX;
304         max[0] = -FLT_MAX;
305         int numVerts = hull->m_numVertices;
306
307         const float4 localDir = qtInvRotate(orn,*dir);
308         float offset = dot(pos,*dir);
309         for(int i=0;i<numVerts;i++)
310         {
311                 float dp = dot(vertices[hull->m_vertexOffset+i],localDir);
312                 if(dp < min[0]) 
313                         min[0] = dp;
314                 if(dp > max[0]) 
315                         max[0] = dp;
316         }
317         if(min[0]>max[0])
318         {
319                 float tmp = min[0];
320                 min[0] = max[0];
321                 max[0] = tmp;
322         }
323         min[0] += offset;
324         max[0] += offset;
325 }
326
327 inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, 
328 const float4* dir, __global const float4* vertices, float* min, float* max)
329 {
330         min[0] = FLT_MAX;
331         max[0] = -FLT_MAX;
332         int numVerts = hull->m_numVertices;
333
334         const float4 localDir = qtInvRotate(orn,*dir);
335         float offset = dot(pos,*dir);
336         for(int i=0;i<numVerts;i++)
337         {
338                 float dp = dot(vertices[hull->m_vertexOffset+i],localDir);
339                 if(dp < min[0]) 
340                         min[0] = dp;
341                 if(dp > max[0]) 
342                         max[0] = dp;
343         }
344         if(min[0]>max[0])
345         {
346                 float tmp = min[0];
347                 min[0] = max[0];
348                 max[0] = tmp;
349         }
350         min[0] += offset;
351         max[0] += offset;
352 }
353
354 inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
355         const float4 posA,const float4 ornA,
356         const float4 posB,const float4 ornB,
357         float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)
358 {
359         float Min0,Max0;
360         float Min1,Max1;
361         projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);
362         project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);
363
364         if(Max0<Min1 || Max1<Min0)
365                 return false;
366
367         float d0 = Max0 - Min1;
368         float d1 = Max1 - Min0;
369         *depth = d0<d1 ? d0:d1;
370         return true;
371 }
372
373
374
375
376 inline bool IsAlmostZero(const float4 v)
377 {
378         if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)
379                 return false;
380         return true;
381 }
382
383
384
385 bool findSeparatingAxisLocalA(  const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
386         const float4 posA1,
387         const float4 ornA,
388         const float4 posB1,
389         const float4 ornB,
390         const float4 DeltaC2,
391         
392         const float4* verticesA, 
393         const float4* uniqueEdgesA, 
394         const btGpuFace* facesA,
395         const int*  indicesA,
396
397         __global const float4* verticesB, 
398         __global const float4* uniqueEdgesB, 
399         __global const btGpuFace* facesB,
400         __global const int*  indicesB,
401         float4* sep,
402         float* dmin)
403 {
404         
405
406         float4 posA = posA1;
407         posA.w = 0.f;
408         float4 posB = posB1;
409         posB.w = 0.f;
410         int curPlaneTests=0;
411         {
412                 int numFacesA = hullA->m_numFaces;
413                 // Test normals from hullA
414                 for(int i=0;i<numFacesA;i++)
415                 {
416                         const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
417                         float4 faceANormalWS = qtRotate(ornA,normal);
418                         if (dot3F4(DeltaC2,faceANormalWS)<0)
419                                 faceANormalWS*=-1.f;
420                         curPlaneTests++;
421                         float d;
422                         if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))
423                                 return false;
424                         if(d<*dmin)
425                         {
426                                 *dmin = d;
427                                 *sep = faceANormalWS;
428                         }
429                 }
430         }
431         if((dot3F4(-DeltaC2,*sep))>0.0f)
432         {
433                 *sep = -(*sep);
434         }
435         return true;
436 }
437
438 bool findSeparatingAxisLocalB(  __global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, 
439         const float4 posA1,
440         const float4 ornA,
441         const float4 posB1,
442         const float4 ornB,
443         const float4 DeltaC2,
444         __global const float4* verticesA, 
445         __global const float4* uniqueEdgesA, 
446         __global const btGpuFace* facesA,
447         __global const int*  indicesA,
448         const float4* verticesB,
449         const float4* uniqueEdgesB, 
450         const btGpuFace* facesB,
451         const int*  indicesB,
452         float4* sep,
453         float* dmin)
454 {
455
456
457         float4 posA = posA1;
458         posA.w = 0.f;
459         float4 posB = posB1;
460         posB.w = 0.f;
461         int curPlaneTests=0;
462         {
463                 int numFacesA = hullA->m_numFaces;
464                 // Test normals from hullA
465                 for(int i=0;i<numFacesA;i++)
466                 {
467                         const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
468                         float4 faceANormalWS = qtRotate(ornA,normal);
469                         if (dot3F4(DeltaC2,faceANormalWS)<0)
470                                 faceANormalWS *= -1.f;
471                         curPlaneTests++;
472                         float d;
473                         if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))
474                                 return false;
475                         if(d<*dmin)
476                         {
477                                 *dmin = d;
478                                 *sep = faceANormalWS;
479                         }
480                 }
481         }
482         if((dot3F4(-DeltaC2,*sep))>0.0f)
483         {
484                 *sep = -(*sep);
485         }
486         return true;
487 }
488
489
490
491 bool findSeparatingAxisEdgeEdgeLocalA(  const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
492         const float4 posA1,
493         const float4 ornA,
494         const float4 posB1,
495         const float4 ornB,
496         const float4 DeltaC2,
497         const float4* verticesA, 
498         const float4* uniqueEdgesA, 
499         const btGpuFace* facesA,
500         const int*  indicesA,
501         __global const float4* verticesB, 
502         __global const float4* uniqueEdgesB, 
503         __global const btGpuFace* facesB,
504         __global const int*  indicesB,
505                 float4* sep,
506         float* dmin)
507 {
508
509
510         float4 posA = posA1;
511         posA.w = 0.f;
512         float4 posB = posB1;
513         posB.w = 0.f;
514
515         int curPlaneTests=0;
516
517         int curEdgeEdge = 0;
518         // Test edges
519         for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)
520         {
521                 const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];
522                 float4 edge0World = qtRotate(ornA,edge0);
523
524                 for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)
525                 {
526                         const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];
527                         float4 edge1World = qtRotate(ornB,edge1);
528
529
530                         float4 crossje = cross3(edge0World,edge1World);
531
532                         curEdgeEdge++;
533                         if(!IsAlmostZero(crossje))
534                         {
535                                 crossje = normalize3(crossje);
536                                 if (dot3F4(DeltaC2,crossje)<0)
537                                         crossje *= -1.f;
538
539                                 float dist;
540                                 bool result = true;
541                                 {
542                                         float Min0,Max0;
543                                         float Min1,Max1;
544                                         projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);
545                                         project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);
546                                 
547                                         if(Max0<Min1 || Max1<Min0)
548                                                 result = false;
549                                 
550                                         float d0 = Max0 - Min1;
551                                         float d1 = Max1 - Min0;
552                                         dist = d0<d1 ? d0:d1;
553                                         result = true;
554
555                                 }
556                                 
557
558                                 if(dist<*dmin)
559                                 {
560                                         *dmin = dist;
561                                         *sep = crossje;
562                                 }
563                         }
564                 }
565
566         }
567
568         
569         if((dot3F4(-DeltaC2,*sep))>0.0f)
570         {
571                 *sep = -(*sep);
572         }
573         return true;
574 }
575
576
577 inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
578         const float4 posA,const float4 ornA,
579         const float4 posB,const float4 ornB,
580         float4* sep_axis, __global const float4* vertices,float* depth)
581 {
582         float Min0,Max0;
583         float Min1,Max1;
584         project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);
585         project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);
586
587         if(Max0<Min1 || Max1<Min0)
588                 return false;
589
590         float d0 = Max0 - Min1;
591         float d1 = Max1 - Min0;
592         *depth = d0<d1 ? d0:d1;
593         return true;
594 }
595
596
597 bool findSeparatingAxis(        __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
598         const float4 posA1,
599         const float4 ornA,
600         const float4 posB1,
601         const float4 ornB,
602         const float4 DeltaC2,
603         __global const float4* vertices, 
604         __global const float4* uniqueEdges, 
605         __global const btGpuFace* faces,
606         __global const int*  indices,
607         float4* sep,
608         float* dmin)
609 {
610         
611
612         float4 posA = posA1;
613         posA.w = 0.f;
614         float4 posB = posB1;
615         posB.w = 0.f;
616         
617         int curPlaneTests=0;
618
619         {
620                 int numFacesA = hullA->m_numFaces;
621                 // Test normals from hullA
622                 for(int i=0;i<numFacesA;i++)
623                 {
624                         const float4 normal = faces[hullA->m_faceOffset+i].m_plane;
625                         float4 faceANormalWS = qtRotate(ornA,normal);
626         
627                         if (dot3F4(DeltaC2,faceANormalWS)<0)
628                                 faceANormalWS*=-1.f;
629                                 
630                         curPlaneTests++;
631         
632                         float d;
633                         if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))
634                                 return false;
635         
636                         if(d<*dmin)
637                         {
638                                 *dmin = d;
639                                 *sep = faceANormalWS;
640                         }
641                 }
642         }
643
644
645                 if((dot3F4(-DeltaC2,*sep))>0.0f)
646                 {
647                         *sep = -(*sep);
648                 }
649         
650         return true;
651 }
652
653
654
655
656 bool findSeparatingAxisUnitSphere(      __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
657         const float4 posA1,
658         const float4 ornA,
659         const float4 posB1,
660         const float4 ornB,
661         const float4 DeltaC2,
662         __global const float4* vertices,
663         __global const float4* unitSphereDirections,
664         int numUnitSphereDirections,
665         float4* sep,
666         float* dmin)
667 {
668         
669         float4 posA = posA1;
670         posA.w = 0.f;
671         float4 posB = posB1;
672         posB.w = 0.f;
673
674         int curPlaneTests=0;
675
676         int curEdgeEdge = 0;
677         // Test unit sphere directions
678         for (int i=0;i<numUnitSphereDirections;i++)
679         {
680
681                 float4 crossje;
682                 crossje = unitSphereDirections[i];      
683
684                 if (dot3F4(DeltaC2,crossje)>0)
685                         crossje *= -1.f;
686                 {
687                         float dist;
688                         bool result = true;
689                         float Min0,Max0;
690                         float Min1,Max1;
691                         project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);
692                         project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);
693                 
694                         if(Max0<Min1 || Max1<Min0)
695                                 return false;
696                 
697                         float d0 = Max0 - Min1;
698                         float d1 = Max1 - Min0;
699                         dist = d0<d1 ? d0:d1;
700                         result = true;
701         
702                         if(dist<*dmin)
703                         {
704                                 *dmin = dist;
705                                 *sep = crossje;
706                         }
707                 }
708         }
709
710         
711         if((dot3F4(-DeltaC2,*sep))>0.0f)
712         {
713                 *sep = -(*sep);
714         }
715         return true;
716 }
717
718
719 bool findSeparatingAxisEdgeEdge(        __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
720         const float4 posA1,
721         const float4 ornA,
722         const float4 posB1,
723         const float4 ornB,
724         const float4 DeltaC2,
725         __global const float4* vertices, 
726         __global const float4* uniqueEdges, 
727         __global const btGpuFace* faces,
728         __global const int*  indices,
729         float4* sep,
730         float* dmin)
731 {
732         
733
734         float4 posA = posA1;
735         posA.w = 0.f;
736         float4 posB = posB1;
737         posB.w = 0.f;
738
739         int curPlaneTests=0;
740
741         int curEdgeEdge = 0;
742         // Test edges
743         for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)
744         {
745                 const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];
746                 float4 edge0World = qtRotate(ornA,edge0);
747
748                 for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)
749                 {
750                         const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];
751                         float4 edge1World = qtRotate(ornB,edge1);
752
753
754                         float4 crossje = cross3(edge0World,edge1World);
755
756                         curEdgeEdge++;
757                         if(!IsAlmostZero(crossje))
758                         {
759                                 crossje = normalize3(crossje);
760                                 if (dot3F4(DeltaC2,crossje)<0)
761                                         crossje*=-1.f;
762                                         
763                                 float dist;
764                                 bool result = true;
765                                 {
766                                         float Min0,Max0;
767                                         float Min1,Max1;
768                                         project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);
769                                         project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);
770                                 
771                                         if(Max0<Min1 || Max1<Min0)
772                                                 return false;
773                                 
774                                         float d0 = Max0 - Min1;
775                                         float d1 = Max1 - Min0;
776                                         dist = d0<d1 ? d0:d1;
777                                         result = true;
778
779                                 }
780                                 
781
782                                 if(dist<*dmin)
783                                 {
784                                         *dmin = dist;
785                                         *sep = crossje;
786                                 }
787                         }
788                 }
789
790         }
791
792         
793         if((dot3F4(-DeltaC2,*sep))>0.0f)
794         {
795                 *sep = -(*sep);
796         }
797         return true;
798 }
799
800
801 // work-in-progress
802 __kernel void   processCompoundPairsKernel( __global const int4* gpuCompoundPairs,
803                                                                                                                                                                         __global const BodyData* rigidBodies, 
804                                                                                                                                                                         __global const btCollidableGpu* collidables,
805                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes, 
806                                                                                                                                                                         __global const float4* vertices,
807                                                                                                                                                                         __global const float4* uniqueEdges,
808                                                                                                                                                                         __global const btGpuFace* faces,
809                                                                                                                                                                         __global const int* indices,
810                                                                                                                                                                         __global btAabbCL* aabbs,
811                                                                                                                                                                         __global const btGpuChildShape* gpuChildShapes,
812                                                                                                                                                                         __global volatile float4* gpuCompoundSepNormalsOut,
813                                                                                                                                                                         __global volatile int* gpuHasCompoundSepNormalsOut,
814                                                                                                                                                                         int numCompoundPairs
815                                                                                                                                                                         )
816 {
817
818         int i = get_global_id(0);
819         if (i<numCompoundPairs)
820         {
821                 int bodyIndexA = gpuCompoundPairs[i].x;
822                 int bodyIndexB = gpuCompoundPairs[i].y;
823
824                 int childShapeIndexA = gpuCompoundPairs[i].z;
825                 int childShapeIndexB = gpuCompoundPairs[i].w;
826                 
827                 int collidableIndexA = -1;
828                 int collidableIndexB = -1;
829                 
830                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
831                 float4 posA = rigidBodies[bodyIndexA].m_pos;
832                 
833                 float4 ornB = rigidBodies[bodyIndexB].m_quat;
834                 float4 posB = rigidBodies[bodyIndexB].m_pos;
835                                                         
836                 if (childShapeIndexA >= 0)
837                 {
838                         collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
839                         float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
840                         float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
841                         float4 newPosA = qtRotate(ornA,childPosA)+posA;
842                         float4 newOrnA = qtMul(ornA,childOrnA);
843                         posA = newPosA;
844                         ornA = newOrnA;
845                 } else
846                 {
847                         collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
848                 }
849                 
850                 if (childShapeIndexB>=0)
851                 {
852                         collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
853                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
854                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
855                         float4 newPosB = transform(&childPosB,&posB,&ornB);
856                         float4 newOrnB = qtMul(ornB,childOrnB);
857                         posB = newPosB;
858                         ornB = newOrnB;
859                 } else
860                 {
861                         collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;     
862                 }
863         
864                 gpuHasCompoundSepNormalsOut[i] = 0;
865         
866                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
867                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
868         
869                 int shapeTypeA = collidables[collidableIndexA].m_shapeType;
870                 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
871         
872
873                 if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))
874                 {
875                         return;
876                 }
877
878                 int hasSeparatingAxis = 5;
879                                                         
880                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
881                 float dmin = FLT_MAX;
882                 posA.w = 0.f;
883                 posB.w = 0.f;
884                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
885                 float4 c0 = transform(&c0local, &posA, &ornA);
886                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
887                 float4 c1 = transform(&c1local,&posB,&ornB);
888                 const float4 DeltaC2 = c0 - c1;
889                 float4 sepNormal = make_float4(1,0,0,0);
890                 bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
891                 hasSeparatingAxis = 4;
892                 if (!sepA)
893                 {
894                         hasSeparatingAxis = 0;
895                 } else
896                 {
897                         bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
898
899                         if (!sepB)
900                         {
901                                 hasSeparatingAxis = 0;
902                         } else//(!sepB)
903                         {
904                                 bool sepEE = findSeparatingAxisEdgeEdge(        &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
905                                 if (sepEE)
906                                 {
907                                                 gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);
908                                                 gpuHasCompoundSepNormalsOut[i] = 1;
909                                 }//sepEE
910                         }//(!sepB)
911                 }//(!sepA)
912                 
913                 
914         }
915                 
916 }
917
918
919 inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)
920 {
921                 b3Float4 vecOut;
922                 vecOut = b3MakeFloat4(
923                         (float)(vecIn[0]) / (quantization.x),
924                         (float)(vecIn[1]) / (quantization.y),
925                         (float)(vecIn[2]) / (quantization.z),
926                         0.f);
927
928                 vecOut += bvhAabbMin;
929                 return vecOut;
930 }
931
932 inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)
933 {
934                 b3Float4 vecOut;
935                 vecOut = b3MakeFloat4(
936                         (float)(vecIn[0]) / (quantization.x),
937                         (float)(vecIn[1]) / (quantization.y),
938                         (float)(vecIn[2]) / (quantization.z),
939                         0.f);
940
941                 vecOut += bvhAabbMin;
942                 return vecOut;
943 }
944
945
946 // work-in-progress
947 __kernel void   findCompoundPairsKernel( __global const int4* pairs, 
948         __global const BodyData* rigidBodies, 
949         __global const btCollidableGpu* collidables,
950         __global const ConvexPolyhedronCL* convexShapes, 
951         __global const float4* vertices,
952         __global const float4* uniqueEdges,
953         __global const btGpuFace* faces,
954         __global const int* indices,
955         __global b3Aabb_t* aabbLocalSpace,
956         __global const btGpuChildShape* gpuChildShapes,
957         __global volatile int4* gpuCompoundPairsOut,
958         __global volatile int* numCompoundPairsOut,
959         __global const b3BvhSubtreeInfo* subtrees,
960         __global const b3QuantizedBvhNode* quantizedNodes,
961         __global const b3BvhInfo* bvhInfos,
962         int numPairs,
963         int maxNumCompoundPairsCapacity
964         )
965 {
966
967         int i = get_global_id(0);
968
969         if (i<numPairs)
970         {
971                 int bodyIndexA = pairs[i].x;
972                 int bodyIndexB = pairs[i].y;
973
974                 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
975                 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
976
977                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
978                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
979
980
981                 //once the broadphase avoids static-static pairs, we can remove this test
982                 if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
983                 {
984                         return;
985                 }
986
987                 if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
988                 {
989                         int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;
990                         int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;
991                         int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;
992                         int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;
993                         int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;
994
995
996                         int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;
997                         
998                         float4 posA = rigidBodies[bodyIndexA].m_pos;
999                         b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1000
1001                         b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1002                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1003
1004                         
1005                         for (int p=0;p<numSubTreesA;p++)
1006                         {
1007                                 b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];
1008                                 //bvhInfos[bvhA].m_quantization
1009                                 b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1010                                 b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1011
1012                                 b3Float4 aabbAMinOut,aabbAMaxOut;
1013                                 float margin=0.f;
1014                                 b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);
1015                                 
1016                                 for (int q=0;q<numSubTreesB;q++)
1017                                 {
1018                                         b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];
1019
1020                                         b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1021                                         b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1022
1023                                         b3Float4 aabbBMinOut,aabbBMaxOut;
1024                                         float margin=0.f;
1025                                         b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);
1026
1027                                         
1028                                         
1029                                         bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
1030                                         if (aabbOverlap)
1031                                         {
1032                                                 
1033                                                 int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;
1034                                                 int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;
1035
1036                                                 int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;
1037                                                 int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;
1038
1039
1040                                                 b3Int2 nodeStack[B3_MAX_STACK_DEPTH];
1041                                                 b3Int2 node0;
1042                                                 node0.x = startNodeIndexA;
1043                                                 node0.y = startNodeIndexB;
1044                                                 int maxStackDepth = B3_MAX_STACK_DEPTH;
1045                                                 int depth=0;
1046                                                 nodeStack[depth++]=node0;
1047
1048                                                 do
1049                                                 {
1050                                                         b3Int2 node = nodeStack[--depth];
1051
1052                                                         b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1053                                                         b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1054
1055                                                         b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1056                                                         b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1057
1058                                                         float margin=0.f;
1059                                                         b3Float4 aabbAMinOut,aabbAMaxOut;
1060                                                         b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);
1061
1062                                                         b3Float4 aabbBMinOut,aabbBMaxOut;
1063                                                         b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);
1064
1065                                                         
1066                                                         bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
1067                                                         if (nodeOverlap)
1068                                                         {
1069                                                                 bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);
1070                                                                 bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);
1071                                                                 bool isInternalA = !isLeafA;
1072                                                                 bool isInternalB = !isLeafB;
1073
1074                                                                 //fail, even though it might hit two leaf nodes
1075                                                                 if (depth+4>maxStackDepth && !(isLeafA && isLeafB))
1076                                                                 {
1077                                                                         //printf("Error: traversal exceeded maxStackDepth");
1078                                                                         continue;
1079                                                                 }
1080
1081                                                                 if(isInternalA)
1082                                                                 {
1083                                                                         int nodeAleftChild = node.x+1;
1084                                                                         bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);
1085                                                                         int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);
1086
1087                                                                         if(isInternalB)
1088                                                                         {                                       
1089                                                                                 int nodeBleftChild = node.y+1;
1090                                                                                 bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);
1091                                                                                 int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);
1092
1093                                                                                 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);
1094                                                                                 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);
1095                                                                                 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);
1096                                                                                 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);
1097                                                                         }
1098                                                                         else
1099                                                                         {
1100                                                                                 nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);
1101                                                                                 nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);
1102                                                                         }
1103                                                                 }
1104                                                                 else
1105                                                                 {
1106                                                                         if(isInternalB)
1107                                                                         {
1108                                                                                 int nodeBleftChild = node.y+1;
1109                                                                                 bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);
1110                                                                                 int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);
1111                                                                                 nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);
1112                                                                                 nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);
1113                                                                         }
1114                                                                         else
1115                                                                         {
1116                                                                                 int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1117                                                                                 if (compoundPairIdx<maxNumCompoundPairsCapacity)
1118                                                                                 {
1119                                                                                         int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);
1120                                                                                         int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);
1121                                                                                         gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);
1122                                                                                 }
1123                                                                         }
1124                                                                 }
1125                                                         }
1126                                                 } while (depth);
1127                                         }
1128                                 }
1129                         }
1130                         
1131                         return;
1132                 }
1133
1134
1135
1136
1137
1138                 if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1139                 {
1140
1141                         if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) 
1142                         {
1143
1144                                 int numChildrenA = collidables[collidableIndexA].m_numChildShapes;
1145                                 for (int c=0;c<numChildrenA;c++)
1146                                 {
1147                                         int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;
1148                                         int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1149
1150                                         float4 posA = rigidBodies[bodyIndexA].m_pos;
1151                                         float4 ornA = rigidBodies[bodyIndexA].m_quat;
1152                                         float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1153                                         float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1154                                         float4 newPosA = qtRotate(ornA,childPosA)+posA;
1155                                         float4 newOrnA = qtMul(ornA,childOrnA);
1156
1157                                         int shapeIndexA = collidables[childColIndexA].m_shapeIndex;
1158                                         b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];
1159                                         float margin = 0.f;
1160                                         
1161                                         b3Float4 aabbAMinWS;
1162                                         b3Float4 aabbAMaxWS;
1163                                         
1164                                         b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,
1165                                                 newPosA,
1166                                                 newOrnA,
1167                                                 &aabbAMinWS,&aabbAMaxWS);
1168                                                 
1169                                         
1170                                         if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1171                                         {
1172                                                 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1173                                                 for (int b=0;b<numChildrenB;b++)
1174                                                 {
1175                                                         int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;
1176                                                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1177                                                         float4 ornB = rigidBodies[bodyIndexB].m_quat;
1178                                                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1179                                                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1180                                                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1181                                                         float4 newPosB = transform(&childPosB,&posB,&ornB);
1182                                                         float4 newOrnB = qtMul(ornB,childOrnB);
1183
1184                                                         int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1185                                                         b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];
1186                                                         
1187                                                         b3Float4 aabbBMinWS;
1188                                                         b3Float4 aabbBMaxWS;
1189                                                         
1190                                                         b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,
1191                                                                 newPosB,
1192                                                                 newOrnB,
1193                                                                 &aabbBMinWS,&aabbBMaxWS);
1194                                                                 
1195                                                                 
1196                                                         
1197                                                         bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);
1198                                                         if (aabbOverlap)
1199                                                         {
1200                                                                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1201                                                                 float dmin = FLT_MAX;
1202                                                                 float4 posA = newPosA;
1203                                                                 posA.w = 0.f;
1204                                                                 float4 posB = newPosB;
1205                                                                 posB.w = 0.f;
1206                                                                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1207                                                                 float4 ornA = newOrnA;
1208                                                                 float4 c0 = transform(&c0local, &posA, &ornA);
1209                                                                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1210                                                                 float4 ornB =newOrnB;
1211                                                                 float4 c1 = transform(&c1local,&posB,&ornB);
1212                                                                 const float4 DeltaC2 = c0 - c1;
1213
1214                                                                 {//
1215                                                                         int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1216                                                                         if (compoundPairIdx<maxNumCompoundPairsCapacity)
1217                                                                         {
1218                                                                                 gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);
1219                                                                         }
1220                                                                 }//
1221                                                         }//fi(1)
1222                                                 } //for (int b=0
1223                                         }//if (collidables[collidableIndexB].
1224                                         else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1225                                         {
1226                                                 if (1)
1227                                                 {
1228                                                         int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1229                                                         float dmin = FLT_MAX;
1230                                                         float4 posA = newPosA;
1231                                                         posA.w = 0.f;
1232                                                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1233                                                         posB.w = 0.f;
1234                                                         float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1235                                                         float4 ornA = newOrnA;
1236                                                         float4 c0 = transform(&c0local, &posA, &ornA);
1237                                                         float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1238                                                         float4 ornB = rigidBodies[bodyIndexB].m_quat;
1239                                                         float4 c1 = transform(&c1local,&posB,&ornB);
1240                                                         const float4 DeltaC2 = c0 - c1;
1241
1242                                                         {
1243                                                                 int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1244                                                                 if (compoundPairIdx<maxNumCompoundPairsCapacity)
1245                                                                 {
1246                                                                         gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);
1247                                                                 }//if (compoundPairIdx<maxNumCompoundPairsCapacity)
1248                                                         }//
1249                                                 }//fi (1)
1250                                         }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1251                                 }//for (int b=0;b<numChildrenB;b++)     
1252                                 return;
1253                         }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1254                         if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) 
1255                                 && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1256                         {
1257                                 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1258                                 for (int b=0;b<numChildrenB;b++)
1259                                 {
1260                                         int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;
1261                                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1262                                         float4 ornB = rigidBodies[bodyIndexB].m_quat;
1263                                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1264                                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1265                                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1266                                         float4 newPosB = qtRotate(ornB,childPosB)+posB;
1267                                         float4 newOrnB = qtMul(ornB,childOrnB);
1268
1269                                         int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1270
1271
1272                                         //////////////////////////////////////
1273
1274                                         if (1)
1275                                         {
1276                                                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1277                                                 float dmin = FLT_MAX;
1278                                                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1279                                                 posA.w = 0.f;
1280                                                 float4 posB = newPosB;
1281                                                 posB.w = 0.f;
1282                                                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1283                                                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1284                                                 float4 c0 = transform(&c0local, &posA, &ornA);
1285                                                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1286                                                 float4 ornB =newOrnB;
1287                                                 float4 c1 = transform(&c1local,&posB,&ornB);
1288                                                 const float4 DeltaC2 = c0 - c1;
1289                                                 {//
1290                                                         int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1291                                                         if (compoundPairIdx<maxNumCompoundPairsCapacity)
1292                                                         {
1293                                                                 gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);
1294                                                         }//fi (compoundPairIdx<maxNumCompoundPairsCapacity)
1295                                                 }//
1296                                         }//fi (1)       
1297                                 }//for (int b=0;b<numChildrenB;b++)
1298                                 return;
1299                         }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1300                         return;
1301                 }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1302         }//i<numPairs
1303 }
1304
1305 // work-in-progress
1306 __kernel void   findSeparatingAxisKernel( __global const int4* pairs, 
1307                                                                                                                                                                         __global const BodyData* rigidBodies, 
1308                                                                                                                                                                         __global const btCollidableGpu* collidables,
1309                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes, 
1310                                                                                                                                                                         __global const float4* vertices,
1311                                                                                                                                                                         __global const float4* uniqueEdges,
1312                                                                                                                                                                         __global const btGpuFace* faces,
1313                                                                                                                                                                         __global const int* indices,
1314                                                                                                                                                                         __global btAabbCL* aabbs,
1315                                                                                                                                                                         __global volatile float4* separatingNormals,
1316                                                                                                                                                                         __global volatile int* hasSeparatingAxis,
1317                                                                                                                                                                         int numPairs
1318                                                                                                                                                                         )
1319 {
1320
1321         int i = get_global_id(0);
1322         
1323         if (i<numPairs)
1324         {
1325
1326         
1327                 int bodyIndexA = pairs[i].x;
1328                 int bodyIndexB = pairs[i].y;
1329
1330                 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1331                 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1332         
1333                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1334                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1335                 
1336                 
1337                 //once the broadphase avoids static-static pairs, we can remove this test
1338                 if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
1339                 {
1340                         hasSeparatingAxis[i] = 0;
1341                         return;
1342                 }
1343                 
1344
1345                 if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))
1346                 {
1347                         hasSeparatingAxis[i] = 0;
1348                         return;
1349                 }
1350                         
1351                 if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))
1352                 {
1353                         hasSeparatingAxis[i] = 0;
1354                         return;
1355                 }
1356
1357                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1358
1359                 float dmin = FLT_MAX;
1360
1361                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1362                 posA.w = 0.f;
1363                 float4 posB = rigidBodies[bodyIndexB].m_pos;
1364                 posB.w = 0.f;
1365                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1366                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1367                 float4 c0 = transform(&c0local, &posA, &ornA);
1368                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1369                 float4 ornB =rigidBodies[bodyIndexB].m_quat;
1370                 float4 c1 = transform(&c1local,&posB,&ornB);
1371                 const float4 DeltaC2 = c0 - c1;
1372                 float4 sepNormal;
1373                 
1374                 bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1375                                                                                                                                                                                                 posB,ornB,
1376                                                                                                                                                                                                 DeltaC2,
1377                                                                                                                                                                                                 vertices,uniqueEdges,faces,
1378                                                                                                                                                                                                 indices,&sepNormal,&dmin);
1379                 hasSeparatingAxis[i] = 4;
1380                 if (!sepA)
1381                 {
1382                         hasSeparatingAxis[i] = 0;
1383                 } else
1384                 {
1385                         bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,
1386                                                                                                                                                                                                         posA,ornA,
1387                                                                                                                                                                                                         DeltaC2,
1388                                                                                                                                                                                                         vertices,uniqueEdges,faces,
1389                                                                                                                                                                                                         indices,&sepNormal,&dmin);
1390
1391                         if (!sepB)
1392                         {
1393                                 hasSeparatingAxis[i] = 0;
1394                         } else
1395                         {
1396                                 bool sepEE = findSeparatingAxisEdgeEdge(        &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1397                                                                                                                                                                                                         posB,ornB,
1398                                                                                                                                                                                                         DeltaC2,
1399                                                                                                                                                                                                         vertices,uniqueEdges,faces,
1400                                                                                                                                                                                                         indices,&sepNormal,&dmin);
1401                                 if (!sepEE)
1402                                 {
1403                                         hasSeparatingAxis[i] = 0;
1404                                 } else
1405                                 {
1406                                         hasSeparatingAxis[i] = 1;
1407                                         separatingNormals[i] = sepNormal;
1408                                 }
1409                         }
1410                 }
1411                 
1412         }
1413
1414 }
1415
1416
1417 __kernel void   findSeparatingAxisVertexFaceKernel( __global const int4* pairs, 
1418                                                                                                                                                                         __global const BodyData* rigidBodies, 
1419                                                                                                                                                                         __global const btCollidableGpu* collidables,
1420                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes, 
1421                                                                                                                                                                         __global const float4* vertices,
1422                                                                                                                                                                         __global const float4* uniqueEdges,
1423                                                                                                                                                                         __global const btGpuFace* faces,
1424                                                                                                                                                                         __global const int* indices,
1425                                                                                                                                                                         __global btAabbCL* aabbs,
1426                                                                                                                                                                         __global volatile float4* separatingNormals,
1427                                                                                                                                                                         __global volatile int* hasSeparatingAxis,
1428                                                                                                                                                                         __global  float* dmins,
1429                                                                                                                                                                         int numPairs
1430                                                                                                                                                                         )
1431 {
1432
1433         int i = get_global_id(0);
1434         
1435         if (i<numPairs)
1436         {
1437
1438         
1439                 int bodyIndexA = pairs[i].x;
1440                 int bodyIndexB = pairs[i].y;
1441
1442                 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1443                 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1444         
1445                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1446                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1447         
1448                 hasSeparatingAxis[i] = 0;       
1449                 
1450                 //once the broadphase avoids static-static pairs, we can remove this test
1451                 if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
1452                 {
1453                         return;
1454                 }
1455                 
1456
1457                 if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))
1458                 {
1459                         return;
1460                 }
1461                         
1462
1463                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1464
1465                 float dmin = FLT_MAX;
1466
1467                 dmins[i] = dmin;
1468                 
1469                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1470                 posA.w = 0.f;
1471                 float4 posB = rigidBodies[bodyIndexB].m_pos;
1472                 posB.w = 0.f;
1473                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1474                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1475                 float4 c0 = transform(&c0local, &posA, &ornA);
1476                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1477                 float4 ornB =rigidBodies[bodyIndexB].m_quat;
1478                 float4 c1 = transform(&c1local,&posB,&ornB);
1479                 const float4 DeltaC2 = c0 - c1;
1480                 float4 sepNormal;
1481                 
1482                 bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1483                                                                                                                                                                                                 posB,ornB,
1484                                                                                                                                                                                                 DeltaC2,
1485                                                                                                                                                                                                 vertices,uniqueEdges,faces,
1486                                                                                                                                                                                                 indices,&sepNormal,&dmin);
1487                 hasSeparatingAxis[i] = 4;
1488                 if (!sepA)
1489                 {
1490                         hasSeparatingAxis[i] = 0;
1491                 } else
1492                 {
1493                         bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,
1494                                                                                                                                                                                                         posA,ornA,
1495                                                                                                                                                                                                         DeltaC2,
1496                                                                                                                                                                                                         vertices,uniqueEdges,faces,
1497                                                                                                                                                                                                         indices,&sepNormal,&dmin);
1498
1499                         if (sepB)
1500                         {
1501                                 dmins[i] = dmin;
1502                                 hasSeparatingAxis[i] = 1;
1503                                 separatingNormals[i] = sepNormal;
1504                         }
1505                 }
1506                 
1507         }
1508
1509 }
1510
1511
1512 __kernel void   findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, 
1513                                                                                                                                                                         __global const BodyData* rigidBodies, 
1514                                                                                                                                                                         __global const btCollidableGpu* collidables,
1515                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes, 
1516                                                                                                                                                                         __global const float4* vertices,
1517                                                                                                                                                                         __global const float4* uniqueEdges,
1518                                                                                                                                                                         __global const btGpuFace* faces,
1519                                                                                                                                                                         __global const int* indices,
1520                                                                                                                                                                         __global btAabbCL* aabbs,
1521                                                                                                                                                                         __global  float4* separatingNormals,
1522                                                                                                                                                                         __global  int* hasSeparatingAxis,
1523                                                                                                                                                                         __global  float* dmins,
1524                                                                                                                                                                         __global const float4* unitSphereDirections,
1525                                                                                                                                                                         int numUnitSphereDirections,
1526                                                                                                                                                                         int numPairs
1527                                                                                                                                                                         )
1528 {
1529
1530         int i = get_global_id(0);
1531         
1532         if (i<numPairs)
1533         {
1534
1535                 if (hasSeparatingAxis[i])
1536                 {
1537         
1538                         int bodyIndexA = pairs[i].x;
1539                         int bodyIndexB = pairs[i].y;
1540         
1541                         int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1542                         int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1543                 
1544                         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1545                         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1546                         
1547                         
1548                         int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1549         
1550                         float dmin = dmins[i];
1551         
1552                         float4 posA = rigidBodies[bodyIndexA].m_pos;
1553                         posA.w = 0.f;
1554                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1555                         posB.w = 0.f;
1556                         float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1557                         float4 ornA = rigidBodies[bodyIndexA].m_quat;
1558                         float4 c0 = transform(&c0local, &posA, &ornA);
1559                         float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1560                         float4 ornB =rigidBodies[bodyIndexB].m_quat;
1561                         float4 c1 = transform(&c1local,&posB,&ornB);
1562                         const float4 DeltaC2 = c0 - c1;
1563                         float4 sepNormal = separatingNormals[i];
1564                         
1565                         
1566                         
1567                         bool sepEE = false;
1568                         int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;
1569                         if (numEdgeEdgeDirections<=numUnitSphereDirections)
1570                         {
1571                                 sepEE = findSeparatingAxisEdgeEdge(     &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1572                                                                                                                                                                                                         posB,ornB,
1573                                                                                                                                                                                                         DeltaC2,
1574                                                                                                                                                                                                         vertices,uniqueEdges,faces,
1575                                                                                                                                                                                                         indices,&sepNormal,&dmin);
1576                                                                                                                                                                                                         
1577                                         if (!sepEE)
1578                                         {
1579                                                 hasSeparatingAxis[i] = 0;
1580                                         } else
1581                                         {
1582                                                 hasSeparatingAxis[i] = 1;
1583                                                 separatingNormals[i] = sepNormal;
1584                                         }
1585                         }
1586                         /*
1587                         ///else case is a separate kernel, to make Mac OSX OpenCL compiler happy
1588                         else
1589                         {
1590                                 sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1591                                                                                                                                                                                                         posB,ornB,
1592                                                                                                                                                                                                         DeltaC2,
1593                                                                                                                                                                                                         vertices,unitSphereDirections,numUnitSphereDirections,
1594                                                                                                                                                                                                         &sepNormal,&dmin);
1595                                         if (!sepEE)
1596                                         {
1597                                                 hasSeparatingAxis[i] = 0;
1598                                         } else
1599                                         {
1600                                                 hasSeparatingAxis[i] = 1;
1601                                                 separatingNormals[i] = sepNormal;
1602                                         }
1603                         }
1604                         */
1605                 }               //if (hasSeparatingAxis[i])
1606         }//(i<numPairs)
1607 }
1608
1609
1610
1611
1612
1613 inline int      findClippingFaces(const float4 separatingNormal,
1614                       const ConvexPolyhedronCL* hullA, 
1615                                           __global const ConvexPolyhedronCL* hullB,
1616                       const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,
1617                        __global float4* worldVertsA1,
1618                       __global float4* worldNormalsA1,
1619                       __global float4* worldVertsB1,
1620                       int capacityWorldVerts,
1621                       const float minDist, float maxDist,
1622                                           const float4* verticesA,
1623                       const btGpuFace* facesA,
1624                       const int* indicesA,
1625                                           __global const float4* verticesB,
1626                       __global const btGpuFace* facesB,
1627                       __global const int* indicesB,
1628                       __global int4* clippingFaces, int pairIndex)
1629 {
1630         int numContactsOut = 0;
1631         int numWorldVertsB1= 0;
1632     
1633     
1634         int closestFaceB=0;
1635         float dmax = -FLT_MAX;
1636     
1637         {
1638                 for(int face=0;face<hullB->m_numFaces;face++)
1639                 {
1640                         const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,
1641                                               facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
1642                         const float4 WorldNormal = qtRotate(ornB, Normal);
1643                         float d = dot3F4(WorldNormal,separatingNormal);
1644                         if (d > dmax)
1645                         {
1646                                 dmax = d;
1647                                 closestFaceB = face;
1648                         }
1649                 }
1650         }
1651     
1652         {
1653                 const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];
1654                 int numVertices = polyB.m_numIndices;
1655         if (numVertices>capacityWorldVerts)
1656             numVertices = capacityWorldVerts;
1657         
1658                 for(int e0=0;e0<numVertices;e0++)
1659                 {
1660             if (e0<capacityWorldVerts)
1661             {
1662                 const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
1663                 worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);
1664             }
1665                 }
1666         }
1667     
1668     int closestFaceA=0;
1669         {
1670                 float dmin = FLT_MAX;
1671                 for(int face=0;face<hullA->m_numFaces;face++)
1672                 {
1673                         const float4 Normal = make_float4(
1674                                               facesA[hullA->m_faceOffset+face].m_plane.x,
1675                                               facesA[hullA->m_faceOffset+face].m_plane.y,
1676                                               facesA[hullA->m_faceOffset+face].m_plane.z,
1677                                               0.f);
1678                         const float4 faceANormalWS = qtRotate(ornA,Normal);
1679             
1680                         float d = dot3F4(faceANormalWS,separatingNormal);
1681                         if (d < dmin)
1682                         {
1683                                 dmin = d;
1684                                 closestFaceA = face;
1685                 worldNormalsA1[pairIndex] = faceANormalWS;
1686                         }
1687                 }
1688         }
1689     
1690     int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;
1691     if (numVerticesA>capacityWorldVerts)
1692        numVerticesA = capacityWorldVerts;
1693     
1694         for(int e0=0;e0<numVerticesA;e0++)
1695         {
1696         if (e0<capacityWorldVerts)
1697         {
1698             const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
1699             worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);
1700         }
1701     }
1702     
1703     clippingFaces[pairIndex].x = closestFaceA;
1704     clippingFaces[pairIndex].y = closestFaceB;
1705     clippingFaces[pairIndex].z = numVerticesA;
1706     clippingFaces[pairIndex].w = numWorldVertsB1;
1707     
1708     
1709         return numContactsOut;
1710 }
1711
1712
1713
1714
1715 // work-in-progress
1716 __kernel void   findConcaveSeparatingAxisKernel( __global int4* concavePairs,
1717                                                                                                                                                                         __global const BodyData* rigidBodies,
1718                                                                                                                                                                         __global const btCollidableGpu* collidables,
1719                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes, 
1720                                                                                                                                                                         __global const float4* vertices,
1721                                                                                                                                                                         __global const float4* uniqueEdges,
1722                                                                                                                                                                         __global const btGpuFace* faces,
1723                                                                                                                                                                         __global const int* indices,
1724                                                                                                                                                                         __global const btGpuChildShape* gpuChildShapes,
1725                                                                                                                                                                         __global btAabbCL* aabbs,
1726                                                                                                                                                                         __global float4* concaveSeparatingNormalsOut,
1727                                                                                                                                                                         __global int* concaveHasSeparatingNormals,
1728                                                                                                                                                                         __global int4* clippingFacesOut,
1729                                                                                                                                                                         __global float4* worldVertsA1GPU,
1730                                                                                                                                                                         __global float4*  worldNormalsAGPU,
1731                                                                                                                                                                         __global float4* worldVertsB1GPU,
1732                                                                                                                                                                         int vertexFaceCapacity,
1733                                                                                                                                                                         int numConcavePairs
1734                                                                                                                                                                         )
1735 {
1736
1737         int i = get_global_id(0);
1738         if (i>=numConcavePairs)
1739                 return;
1740
1741         concaveHasSeparatingNormals[i] = 0;
1742
1743         int pairIdx = i;
1744
1745         int bodyIndexA = concavePairs[i].x;
1746         int bodyIndexB = concavePairs[i].y;
1747
1748         int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1749         int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1750
1751         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1752         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1753
1754         if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&
1755                 collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)
1756         {
1757                 concavePairs[pairIdx].w = -1;
1758                 return;
1759         }
1760
1761
1762
1763         int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1764         int numActualConcaveConvexTests = 0;
1765         
1766         int f = concavePairs[i].z;
1767         
1768         bool overlap = false;
1769         
1770         ConvexPolyhedronCL convexPolyhedronA;
1771
1772         //add 3 vertices of the triangle
1773         convexPolyhedronA.m_numVertices = 3;
1774         convexPolyhedronA.m_vertexOffset = 0;
1775         float4  localCenter = make_float4(0.f,0.f,0.f,0.f);
1776
1777         btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
1778         float4 triMinAabb, triMaxAabb;
1779         btAabbCL triAabb;
1780         triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);
1781         triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);
1782         
1783         float4 verticesA[3];
1784         for (int i=0;i<3;i++)
1785         {
1786                 int index = indices[face.m_indexOffset+i];
1787                 float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];
1788                 verticesA[i] = vert;
1789                 localCenter += vert;
1790                         
1791                 triAabb.m_min = min(triAabb.m_min,vert);                
1792                 triAabb.m_max = max(triAabb.m_max,vert);                
1793
1794         }
1795
1796         overlap = true;
1797         overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;
1798         overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;
1799         overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;
1800                 
1801         if (overlap)
1802         {
1803                 float dmin = FLT_MAX;
1804                 int hasSeparatingAxis=5;
1805                 float4 sepAxis=make_float4(1,2,3,4);
1806
1807                 int localCC=0;
1808                 numActualConcaveConvexTests++;
1809
1810                 //a triangle has 3 unique edges
1811                 convexPolyhedronA.m_numUniqueEdges = 3;
1812                 convexPolyhedronA.m_uniqueEdgesOffset = 0;
1813                 float4 uniqueEdgesA[3];
1814                 
1815                 uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);
1816                 uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);
1817                 uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);
1818
1819
1820                 convexPolyhedronA.m_faceOffset = 0;
1821                                   
1822                 float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
1823                              
1824                 btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];
1825                 int indicesA[3+3+2+2+2];
1826                 int curUsedIndices=0;
1827                 int fidx=0;
1828
1829                 //front size of triangle
1830                 {
1831                         facesA[fidx].m_indexOffset=curUsedIndices;
1832                         indicesA[0] = 0;
1833                         indicesA[1] = 1;
1834                         indicesA[2] = 2;
1835                         curUsedIndices+=3;
1836                         float c = face.m_plane.w;
1837                         facesA[fidx].m_plane.x = normal.x;
1838                         facesA[fidx].m_plane.y = normal.y;
1839                         facesA[fidx].m_plane.z = normal.z;
1840                         facesA[fidx].m_plane.w = c;
1841                         facesA[fidx].m_numIndices=3;
1842                 }
1843                 fidx++;
1844                 //back size of triangle
1845                 {
1846                         facesA[fidx].m_indexOffset=curUsedIndices;
1847                         indicesA[3]=2;
1848                         indicesA[4]=1;
1849                         indicesA[5]=0;
1850                         curUsedIndices+=3;
1851                         float c = dot(normal,verticesA[0]);
1852                         float c1 = -face.m_plane.w;
1853                         facesA[fidx].m_plane.x = -normal.x;
1854                         facesA[fidx].m_plane.y = -normal.y;
1855                         facesA[fidx].m_plane.z = -normal.z;
1856                         facesA[fidx].m_plane.w = c;
1857                         facesA[fidx].m_numIndices=3;
1858                 }
1859                 fidx++;
1860
1861                 bool addEdgePlanes = true;
1862                 if (addEdgePlanes)
1863                 {
1864                         int numVertices=3;
1865                         int prevVertex = numVertices-1;
1866                         for (int i=0;i<numVertices;i++)
1867                         {
1868                                 float4 v0 = verticesA[i];
1869                                 float4 v1 = verticesA[prevVertex];
1870                                             
1871                                 float4 edgeNormal = normalize(cross(normal,v1-v0));
1872                                 float c = -dot(edgeNormal,v0);
1873
1874                                 facesA[fidx].m_numIndices = 2;
1875                                 facesA[fidx].m_indexOffset=curUsedIndices;
1876                                 indicesA[curUsedIndices++]=i;
1877                                 indicesA[curUsedIndices++]=prevVertex;
1878                                             
1879                                 facesA[fidx].m_plane.x = edgeNormal.x;
1880                                 facesA[fidx].m_plane.y = edgeNormal.y;
1881                                 facesA[fidx].m_plane.z = edgeNormal.z;
1882                                 facesA[fidx].m_plane.w = c;
1883                                 fidx++;
1884                                 prevVertex = i;
1885                         }
1886                 }
1887                 convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;
1888                 convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);
1889
1890
1891                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1892                 posA.w = 0.f;
1893                 float4 posB = rigidBodies[bodyIndexB].m_pos;
1894                 posB.w = 0.f;
1895
1896                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1897                 float4 ornB =rigidBodies[bodyIndexB].m_quat;
1898
1899                 
1900
1901
1902                 ///////////////////
1903                 ///compound shape support
1904
1905                 if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1906                 {
1907                         int compoundChild = concavePairs[pairIdx].w;
1908                         int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;
1909                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1910                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1911                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1912                         float4 newPosB = transform(&childPosB,&posB,&ornB);
1913                         float4 newOrnB = qtMul(ornB,childOrnB);
1914                         posB = newPosB;
1915                         ornB = newOrnB;
1916                         shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1917                 }
1918                 //////////////////
1919
1920                 float4 c0local = convexPolyhedronA.m_localCenter;
1921                 float4 c0 = transform(&c0local, &posA, &ornA);
1922                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1923                 float4 c1 = transform(&c1local,&posB,&ornB);
1924                 const float4 DeltaC2 = c0 - c1;
1925
1926
1927                 bool sepA = findSeparatingAxisLocalA(   &convexPolyhedronA, &convexShapes[shapeIndexB],
1928                                                                                                 posA,ornA,
1929                                                                                                 posB,ornB,
1930                                                                                                 DeltaC2,
1931                                                                                                 verticesA,uniqueEdgesA,facesA,indicesA,
1932                                                                                                 vertices,uniqueEdges,faces,indices,
1933                                                                                                 &sepAxis,&dmin);
1934                 hasSeparatingAxis = 4;
1935                 if (!sepA)
1936                 {
1937                         hasSeparatingAxis = 0;
1938                 } else
1939                 {
1940                         bool sepB = findSeparatingAxisLocalB(   &convexShapes[shapeIndexB],&convexPolyhedronA,
1941                                                                                                 posB,ornB,
1942                                                                                                 posA,ornA,
1943                                                                                                 DeltaC2,
1944                                                                                                 vertices,uniqueEdges,faces,indices,
1945                                                                                                 verticesA,uniqueEdgesA,facesA,indicesA,
1946                                                                                                 &sepAxis,&dmin);
1947
1948                         if (!sepB)
1949                         {
1950                                 hasSeparatingAxis = 0;
1951                         } else
1952                         {
1953                                 bool sepEE = findSeparatingAxisEdgeEdgeLocalA(  &convexPolyhedronA, &convexShapes[shapeIndexB],
1954                                                                                                                         posA,ornA,
1955                                                                                                                         posB,ornB,
1956                                                                                                                         DeltaC2,
1957                                                                                                                         verticesA,uniqueEdgesA,facesA,indicesA,
1958                                                                                                                         vertices,uniqueEdges,faces,indices,
1959                                                                                                                         &sepAxis,&dmin);
1960         
1961                                 if (!sepEE)
1962                                 {
1963                                         hasSeparatingAxis = 0;
1964                                 } else
1965                                 {
1966                                         hasSeparatingAxis = 1;
1967                                 }
1968                         }
1969                 }       
1970                 
1971                 if (hasSeparatingAxis)
1972                 {
1973                         sepAxis.w = dmin;
1974                         concaveSeparatingNormalsOut[pairIdx]=sepAxis;
1975                         concaveHasSeparatingNormals[i]=1;
1976
1977
1978                         float minDist = -1e30f;
1979                         float maxDist = 0.02f;
1980
1981                 
1982
1983                         findClippingFaces(sepAxis,
1984                      &convexPolyhedronA,
1985                                          &convexShapes[shapeIndexB],
1986                                          posA,ornA,
1987                                          posB,ornB,
1988                       worldVertsA1GPU,
1989                       worldNormalsAGPU,
1990                       worldVertsB1GPU,
1991                                           vertexFaceCapacity,
1992                       minDist, maxDist,
1993                       verticesA,
1994                       facesA,
1995                       indicesA,
1996                                           vertices,
1997                       faces,
1998                       indices,
1999                       clippingFacesOut, pairIdx);
2000
2001
2002                 } else
2003                 {       
2004                         //mark this pair as in-active
2005                         concavePairs[pairIdx].w = -1;
2006                 }
2007         }
2008         else
2009         {       
2010                 //mark this pair as in-active
2011                 concavePairs[pairIdx].w = -1;
2012         }
2013         
2014         concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts
2015 }
2016
2017
2018