[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / NarrowphaseCollision / kernels / satConcave.cl
1
2 //keep this enum in sync with the CPU version (in btCollidable.h)
3 //written by Erwin Coumans
4
5
6 #define SHAPE_CONVEX_HULL 3
7 #define SHAPE_CONCAVE_TRIMESH 5
8 #define TRIANGLE_NUM_CONVEX_FACES 5
9 #define SHAPE_COMPOUND_OF_CONVEX_HULLS 6
10
11 #define B3_MAX_STACK_DEPTH 256
12
13
14 typedef unsigned int u32;
15
16 ///keep this in sync with btCollidable.h
17 typedef struct
18 {
19         union {
20                 int m_numChildShapes;
21                 int m_bvhIndex;
22         };
23         union
24         {
25                 float m_radius;
26                 int     m_compoundBvhIndex;
27         };
28         
29         int m_shapeType;
30         int m_shapeIndex;
31         
32 } btCollidableGpu;
33
34 #define MAX_NUM_PARTS_IN_BITS 10
35
36 ///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.
37 ///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
38 typedef struct
39 {
40         //12 bytes
41         unsigned short int      m_quantizedAabbMin[3];
42         unsigned short int      m_quantizedAabbMax[3];
43         //4 bytes
44         int     m_escapeIndexOrTriangleIndex;
45 } b3QuantizedBvhNode;
46
47 typedef struct
48 {
49         float4          m_aabbMin;
50         float4          m_aabbMax;
51         float4          m_quantization;
52         int                     m_numNodes;
53         int                     m_numSubTrees;
54         int                     m_nodeOffset;
55         int                     m_subTreeOffset;
56
57 } b3BvhInfo;
58
59
60 int     getTriangleIndex(const b3QuantizedBvhNode* rootNode)
61 {
62         unsigned int x=0;
63         unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
64         // Get only the lower bits where the triangle index is stored
65         return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
66 }
67
68 int     getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)
69 {
70         unsigned int x=0;
71         unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
72         // Get only the lower bits where the triangle index is stored
73         return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
74 }
75
76 int isLeafNode(const b3QuantizedBvhNode* rootNode)
77 {
78         //skipindex is negative (internal node), triangleindex >=0 (leafnode)
79         return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
80 }
81
82 int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)
83 {
84         //skipindex is negative (internal node), triangleindex >=0 (leafnode)
85         return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
86 }
87         
88 int getEscapeIndex(const b3QuantizedBvhNode* rootNode)
89 {
90         return -rootNode->m_escapeIndexOrTriangleIndex;
91 }
92
93 int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)
94 {
95         return -rootNode->m_escapeIndexOrTriangleIndex;
96 }
97
98
99 typedef struct
100 {
101         //12 bytes
102         unsigned short int      m_quantizedAabbMin[3];
103         unsigned short int      m_quantizedAabbMax[3];
104         //4 bytes, points to the root of the subtree
105         int                     m_rootNodeIndex;
106         //4 bytes
107         int                     m_subtreeSize;
108         int                     m_padding[3];
109 } b3BvhSubtreeInfo;
110
111
112
113
114
115
116
117 typedef struct
118 {
119         float4  m_childPosition;
120         float4  m_childOrientation;
121         int m_shapeIndex;
122         int m_unused0;
123         int m_unused1;
124         int m_unused2;
125 } btGpuChildShape;
126
127
128 typedef struct
129 {
130         float4 m_pos;
131         float4 m_quat;
132         float4 m_linVel;
133         float4 m_angVel;
134
135         u32 m_collidableIdx;
136         float m_invMass;
137         float m_restituitionCoeff;
138         float m_frictionCoeff;
139 } BodyData;
140
141
142 typedef struct  
143 {
144         float4          m_localCenter;
145         float4          m_extents;
146         float4          mC;
147         float4          mE;
148         
149         float                   m_radius;
150         int     m_faceOffset;
151         int m_numFaces;
152         int     m_numVertices;
153
154         int m_vertexOffset;
155         int     m_uniqueEdgesOffset;
156         int     m_numUniqueEdges;
157         int m_unused;
158 } ConvexPolyhedronCL;
159
160 typedef struct 
161 {
162         union
163         {
164                 float4  m_min;
165                 float   m_minElems[4];
166                 int                     m_minIndices[4];
167         };
168         union
169         {
170                 float4  m_max;
171                 float   m_maxElems[4];
172                 int                     m_maxIndices[4];
173         };
174 } btAabbCL;
175
176 #include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
177 #include "Bullet3Common/shared/b3Int2.h"
178
179
180
181 typedef struct
182 {
183         float4 m_plane;
184         int m_indexOffset;
185         int m_numIndices;
186 } btGpuFace;
187
188 #define make_float4 (float4)
189
190
191 __inline
192 float4 cross3(float4 a, float4 b)
193 {
194         return cross(a,b);
195
196         
197 //      float4 a1 = make_float4(a.xyz,0.f);
198 //      float4 b1 = make_float4(b.xyz,0.f);
199
200 //      return cross(a1,b1);
201
202 //float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);
203         
204         //      float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);
205         
206         //return c;
207 }
208
209 __inline
210 float dot3F4(float4 a, float4 b)
211 {
212         float4 a1 = make_float4(a.xyz,0.f);
213         float4 b1 = make_float4(b.xyz,0.f);
214         return dot(a1, b1);
215 }
216
217 __inline
218 float4 fastNormalize4(float4 v)
219 {
220         v = make_float4(v.xyz,0.f);
221         return fast_normalize(v);
222 }
223
224
225 ///////////////////////////////////////
226 //      Quaternion
227 ///////////////////////////////////////
228
229 typedef float4 Quaternion;
230
231 __inline
232 Quaternion qtMul(Quaternion a, Quaternion b);
233
234 __inline
235 Quaternion qtNormalize(Quaternion in);
236
237 __inline
238 float4 qtRotate(Quaternion q, float4 vec);
239
240 __inline
241 Quaternion qtInvert(Quaternion q);
242
243
244
245
246 __inline
247 Quaternion qtMul(Quaternion a, Quaternion b)
248 {
249         Quaternion ans;
250         ans = cross3( a, b );
251         ans += a.w*b+b.w*a;
252 //      ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
253         ans.w = a.w*b.w - dot3F4(a, b);
254         return ans;
255 }
256
257 __inline
258 Quaternion qtNormalize(Quaternion in)
259 {
260         return fastNormalize4(in);
261 //      in /= length( in );
262 //      return in;
263 }
264 __inline
265 float4 qtRotate(Quaternion q, float4 vec)
266 {
267         Quaternion qInv = qtInvert( q );
268         float4 vcpy = vec;
269         vcpy.w = 0.f;
270         float4 out = qtMul(qtMul(q,vcpy),qInv);
271         return out;
272 }
273
274 __inline
275 Quaternion qtInvert(Quaternion q)
276 {
277         return (Quaternion)(-q.xyz, q.w);
278 }
279
280 __inline
281 float4 qtInvRotate(const Quaternion q, float4 vec)
282 {
283         return qtRotate( qtInvert( q ), vec );
284 }
285
286 __inline
287 float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)
288 {
289         return qtRotate( *orientation, *p ) + (*translation);
290 }
291
292
293
294 __inline
295 float4 normalize3(const float4 a)
296 {
297         float4 n = make_float4(a.x, a.y, a.z, 0.f);
298         return fastNormalize4( n );
299 }
300
301 inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, 
302 const float4* dir, const float4* vertices, float* min, float* max)
303 {
304         min[0] = FLT_MAX;
305         max[0] = -FLT_MAX;
306         int numVerts = hull->m_numVertices;
307
308         const float4 localDir = qtInvRotate(orn,*dir);
309         float offset = dot(pos,*dir);
310         for(int i=0;i<numVerts;i++)
311         {
312                 float dp = dot(vertices[hull->m_vertexOffset+i],localDir);
313                 if(dp < min[0]) 
314                         min[0] = dp;
315                 if(dp > max[0]) 
316                         max[0] = dp;
317         }
318         if(min[0]>max[0])
319         {
320                 float tmp = min[0];
321                 min[0] = max[0];
322                 max[0] = tmp;
323         }
324         min[0] += offset;
325         max[0] += offset;
326 }
327
328 inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, 
329 const float4* dir, __global const float4* vertices, float* min, float* max)
330 {
331         min[0] = FLT_MAX;
332         max[0] = -FLT_MAX;
333         int numVerts = hull->m_numVertices;
334
335         const float4 localDir = qtInvRotate(orn,*dir);
336         float offset = dot(pos,*dir);
337         for(int i=0;i<numVerts;i++)
338         {
339                 float dp = dot(vertices[hull->m_vertexOffset+i],localDir);
340                 if(dp < min[0]) 
341                         min[0] = dp;
342                 if(dp > max[0]) 
343                         max[0] = dp;
344         }
345         if(min[0]>max[0])
346         {
347                 float tmp = min[0];
348                 min[0] = max[0];
349                 max[0] = tmp;
350         }
351         min[0] += offset;
352         max[0] += offset;
353 }
354
355 inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
356         const float4 posA,const float4 ornA,
357         const float4 posB,const float4 ornB,
358         float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)
359 {
360         float Min0,Max0;
361         float Min1,Max1;
362         projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);
363         project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);
364
365         if(Max0<Min1 || Max1<Min0)
366                 return false;
367
368         float d0 = Max0 - Min1;
369         float d1 = Max1 - Min0;
370         *depth = d0<d1 ? d0:d1;
371         return true;
372 }
373
374
375
376
377 inline bool IsAlmostZero(const float4 v)
378 {
379         if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)
380                 return false;
381         return true;
382 }
383
384
385
386 bool findSeparatingAxisLocalA(  const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
387         const float4 posA1,
388         const float4 ornA,
389         const float4 posB1,
390         const float4 ornB,
391         const float4 DeltaC2,
392         
393         const float4* verticesA, 
394         const float4* uniqueEdgesA, 
395         const btGpuFace* facesA,
396         const int*  indicesA,
397
398         __global const float4* verticesB, 
399         __global const float4* uniqueEdgesB, 
400         __global const btGpuFace* facesB,
401         __global const int*  indicesB,
402         float4* sep,
403         float* dmin)
404 {
405         
406
407         float4 posA = posA1;
408         posA.w = 0.f;
409         float4 posB = posB1;
410         posB.w = 0.f;
411         int curPlaneTests=0;
412         {
413                 int numFacesA = hullA->m_numFaces;
414                 // Test normals from hullA
415                 for(int i=0;i<numFacesA;i++)
416                 {
417                         const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
418                         float4 faceANormalWS = qtRotate(ornA,normal);
419                         if (dot3F4(DeltaC2,faceANormalWS)<0)
420                                 faceANormalWS*=-1.f;
421                         curPlaneTests++;
422                         float d;
423                         if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))
424                                 return false;
425                         if(d<*dmin)
426                         {
427                                 *dmin = d;
428                                 *sep = faceANormalWS;
429                         }
430                 }
431         }
432         if((dot3F4(-DeltaC2,*sep))>0.0f)
433         {
434                 *sep = -(*sep);
435         }
436         return true;
437 }
438
439 bool findSeparatingAxisLocalB(  __global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, 
440         const float4 posA1,
441         const float4 ornA,
442         const float4 posB1,
443         const float4 ornB,
444         const float4 DeltaC2,
445         __global const float4* verticesA, 
446         __global const float4* uniqueEdgesA, 
447         __global const btGpuFace* facesA,
448         __global const int*  indicesA,
449         const float4* verticesB,
450         const float4* uniqueEdgesB, 
451         const btGpuFace* facesB,
452         const int*  indicesB,
453         float4* sep,
454         float* dmin)
455 {
456
457
458         float4 posA = posA1;
459         posA.w = 0.f;
460         float4 posB = posB1;
461         posB.w = 0.f;
462         int curPlaneTests=0;
463         {
464                 int numFacesA = hullA->m_numFaces;
465                 // Test normals from hullA
466                 for(int i=0;i<numFacesA;i++)
467                 {
468                         const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
469                         float4 faceANormalWS = qtRotate(ornA,normal);
470                         if (dot3F4(DeltaC2,faceANormalWS)<0)
471                                 faceANormalWS *= -1.f;
472                         curPlaneTests++;
473                         float d;
474                         if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))
475                                 return false;
476                         if(d<*dmin)
477                         {
478                                 *dmin = d;
479                                 *sep = faceANormalWS;
480                         }
481                 }
482         }
483         if((dot3F4(-DeltaC2,*sep))>0.0f)
484         {
485                 *sep = -(*sep);
486         }
487         return true;
488 }
489
490
491
492 bool findSeparatingAxisEdgeEdgeLocalA(  const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, 
493         const float4 posA1,
494         const float4 ornA,
495         const float4 posB1,
496         const float4 ornB,
497         const float4 DeltaC2,
498         const float4* verticesA, 
499         const float4* uniqueEdgesA, 
500         const btGpuFace* facesA,
501         const int*  indicesA,
502         __global const float4* verticesB, 
503         __global const float4* uniqueEdgesB, 
504         __global const btGpuFace* facesB,
505         __global const int*  indicesB,
506                 float4* sep,
507         float* dmin)
508 {
509
510
511         float4 posA = posA1;
512         posA.w = 0.f;
513         float4 posB = posB1;
514         posB.w = 0.f;
515
516         int curPlaneTests=0;
517
518         int curEdgeEdge = 0;
519         // Test edges
520         for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)
521         {
522                 const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];
523                 float4 edge0World = qtRotate(ornA,edge0);
524
525                 for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)
526                 {
527                         const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];
528                         float4 edge1World = qtRotate(ornB,edge1);
529
530
531                         float4 crossje = cross3(edge0World,edge1World);
532
533                         curEdgeEdge++;
534                         if(!IsAlmostZero(crossje))
535                         {
536                                 crossje = normalize3(crossje);
537                                 if (dot3F4(DeltaC2,crossje)<0)
538                                         crossje *= -1.f;
539
540                                 float dist;
541                                 bool result = true;
542                                 {
543                                         float Min0,Max0;
544                                         float Min1,Max1;
545                                         projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);
546                                         project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);
547                                 
548                                         if(Max0<Min1 || Max1<Min0)
549                                                 result = false;
550                                 
551                                         float d0 = Max0 - Min1;
552                                         float d1 = Max1 - Min0;
553                                         dist = d0<d1 ? d0:d1;
554                                         result = true;
555
556                                 }
557                                 
558
559                                 if(dist<*dmin)
560                                 {
561                                         *dmin = dist;
562                                         *sep = crossje;
563                                 }
564                         }
565                 }
566
567         }
568
569         
570         if((dot3F4(-DeltaC2,*sep))>0.0f)
571         {
572                 *sep = -(*sep);
573         }
574         return true;
575 }
576
577
578
579 inline int      findClippingFaces(const float4 separatingNormal,
580                       const ConvexPolyhedronCL* hullA, 
581                                           __global const ConvexPolyhedronCL* hullB,
582                       const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,
583                        __global float4* worldVertsA1,
584                       __global float4* worldNormalsA1,
585                       __global float4* worldVertsB1,
586                       int capacityWorldVerts,
587                       const float minDist, float maxDist,
588                                           const float4* verticesA,
589                       const btGpuFace* facesA,
590                       const int* indicesA,
591                                           __global const float4* verticesB,
592                       __global const btGpuFace* facesB,
593                       __global const int* indicesB,
594                       __global int4* clippingFaces, int pairIndex)
595 {
596         int numContactsOut = 0;
597         int numWorldVertsB1= 0;
598     
599     
600         int closestFaceB=0;
601         float dmax = -FLT_MAX;
602     
603         {
604                 for(int face=0;face<hullB->m_numFaces;face++)
605                 {
606                         const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,
607                                               facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
608                         const float4 WorldNormal = qtRotate(ornB, Normal);
609                         float d = dot3F4(WorldNormal,separatingNormal);
610                         if (d > dmax)
611                         {
612                                 dmax = d;
613                                 closestFaceB = face;
614                         }
615                 }
616         }
617     
618         {
619                 const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];
620                 int numVertices = polyB.m_numIndices;
621         if (numVertices>capacityWorldVerts)
622             numVertices = capacityWorldVerts;
623         if (numVertices<0)
624             numVertices = 0;
625         
626                 for(int e0=0;e0<numVertices;e0++)
627                 {
628             if (e0<capacityWorldVerts)
629             {
630                 const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
631                 worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);
632             }
633                 }
634         }
635     
636     int closestFaceA=0;
637         {
638                 float dmin = FLT_MAX;
639                 for(int face=0;face<hullA->m_numFaces;face++)
640                 {
641                         const float4 Normal = make_float4(
642                                               facesA[hullA->m_faceOffset+face].m_plane.x,
643                                               facesA[hullA->m_faceOffset+face].m_plane.y,
644                                               facesA[hullA->m_faceOffset+face].m_plane.z,
645                                               0.f);
646                         const float4 faceANormalWS = qtRotate(ornA,Normal);
647             
648                         float d = dot3F4(faceANormalWS,separatingNormal);
649                         if (d < dmin)
650                         {
651                                 dmin = d;
652                                 closestFaceA = face;
653                 worldNormalsA1[pairIndex] = faceANormalWS;
654                         }
655                 }
656         }
657     
658     int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;
659     if (numVerticesA>capacityWorldVerts)
660        numVerticesA = capacityWorldVerts;
661     if (numVerticesA<0)
662         numVerticesA=0;
663     
664         for(int e0=0;e0<numVerticesA;e0++)
665         {
666         if (e0<capacityWorldVerts)
667         {
668             const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
669             worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);
670         }
671     }
672     
673     clippingFaces[pairIndex].x = closestFaceA;
674     clippingFaces[pairIndex].y = closestFaceB;
675     clippingFaces[pairIndex].z = numVerticesA;
676     clippingFaces[pairIndex].w = numWorldVertsB1;
677     
678     
679         return numContactsOut;
680 }
681
682
683
684
685 // work-in-progress
686 __kernel void   findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs,
687                                                 __global const BodyData* rigidBodies,
688                                                 __global const btCollidableGpu* collidables,
689                                                 __global const ConvexPolyhedronCL* convexShapes,
690                                                 __global const float4* vertices,
691                                                 __global const float4* uniqueEdges,
692                                                 __global const btGpuFace* faces,
693                                                 __global const int* indices,
694                                                 __global const btGpuChildShape* gpuChildShapes,
695                                                 __global btAabbCL* aabbs,
696                                                 __global float4* concaveSeparatingNormalsOut,
697                                                 __global int* concaveHasSeparatingNormals,
698                                                 __global int4* clippingFacesOut,
699                                                 __global float4* worldVertsA1GPU,
700                                                 __global float4*  worldNormalsAGPU,
701                                                 __global float4* worldVertsB1GPU,
702                                                 __global float* dmins,
703                                                 int vertexFaceCapacity,
704                                                 int numConcavePairs
705                                                 )
706 {
707     
708         int i = get_global_id(0);
709         if (i>=numConcavePairs)
710                 return;
711     
712         concaveHasSeparatingNormals[i] = 0;
713     
714         int pairIdx = i;
715     
716         int bodyIndexA = concavePairs[i].x;
717         int bodyIndexB = concavePairs[i].y;
718     
719         int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
720         int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
721     
722         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
723         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
724     
725         if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&
726                 collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)
727         {
728                 concavePairs[pairIdx].w = -1;
729                 return;
730         }
731     
732     
733     
734         int numFacesA = convexShapes[shapeIndexA].m_numFaces;
735         int numActualConcaveConvexTests = 0;
736         
737         int f = concavePairs[i].z;
738         
739         bool overlap = false;
740         
741         ConvexPolyhedronCL convexPolyhedronA;
742     
743         //add 3 vertices of the triangle
744         convexPolyhedronA.m_numVertices = 3;
745         convexPolyhedronA.m_vertexOffset = 0;
746         float4  localCenter = make_float4(0.f,0.f,0.f,0.f);
747     
748         btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
749         float4 triMinAabb, triMaxAabb;
750         btAabbCL triAabb;
751         triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);
752         triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);
753         
754         float4 verticesA[3];
755         for (int i=0;i<3;i++)
756         {
757                 int index = indices[face.m_indexOffset+i];
758                 float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];
759                 verticesA[i] = vert;
760                 localCenter += vert;
761         
762                 triAabb.m_min = min(triAabb.m_min,vert);
763                 triAabb.m_max = max(triAabb.m_max,vert);
764         
765         }
766     
767         overlap = true;
768         overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;
769         overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;
770         overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;
771     
772         if (overlap)
773         {
774                 float dmin = FLT_MAX;
775                 int hasSeparatingAxis=5;
776                 float4 sepAxis=make_float4(1,2,3,4);
777         
778                 int localCC=0;
779                 numActualConcaveConvexTests++;
780         
781                 //a triangle has 3 unique edges
782                 convexPolyhedronA.m_numUniqueEdges = 3;
783                 convexPolyhedronA.m_uniqueEdgesOffset = 0;
784                 float4 uniqueEdgesA[3];
785                 
786                 uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);
787                 uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);
788                 uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);
789         
790         
791                 convexPolyhedronA.m_faceOffset = 0;
792         
793                 float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
794         
795                 btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];
796                 int indicesA[3+3+2+2+2];
797                 int curUsedIndices=0;
798                 int fidx=0;
799         
800                 //front size of triangle
801                 {
802                         facesA[fidx].m_indexOffset=curUsedIndices;
803                         indicesA[0] = 0;
804                         indicesA[1] = 1;
805                         indicesA[2] = 2;
806                         curUsedIndices+=3;
807                         float c = face.m_plane.w;
808                         facesA[fidx].m_plane.x = normal.x;
809                         facesA[fidx].m_plane.y = normal.y;
810                         facesA[fidx].m_plane.z = normal.z;
811                         facesA[fidx].m_plane.w = c;
812                         facesA[fidx].m_numIndices=3;
813                 }
814                 fidx++;
815                 //back size of triangle
816                 {
817                         facesA[fidx].m_indexOffset=curUsedIndices;
818                         indicesA[3]=2;
819                         indicesA[4]=1;
820                         indicesA[5]=0;
821                         curUsedIndices+=3;
822                         float c = dot(normal,verticesA[0]);
823                         float c1 = -face.m_plane.w;
824                         facesA[fidx].m_plane.x = -normal.x;
825                         facesA[fidx].m_plane.y = -normal.y;
826                         facesA[fidx].m_plane.z = -normal.z;
827                         facesA[fidx].m_plane.w = c;
828                         facesA[fidx].m_numIndices=3;
829                 }
830                 fidx++;
831         
832                 bool addEdgePlanes = true;
833                 if (addEdgePlanes)
834                 {
835                         int numVertices=3;
836                         int prevVertex = numVertices-1;
837                         for (int i=0;i<numVertices;i++)
838                         {
839                                 float4 v0 = verticesA[i];
840                                 float4 v1 = verticesA[prevVertex];
841                 
842                                 float4 edgeNormal = normalize(cross(normal,v1-v0));
843                                 float c = -dot(edgeNormal,v0);
844                 
845                                 facesA[fidx].m_numIndices = 2;
846                                 facesA[fidx].m_indexOffset=curUsedIndices;
847                                 indicesA[curUsedIndices++]=i;
848                                 indicesA[curUsedIndices++]=prevVertex;
849                 
850                                 facesA[fidx].m_plane.x = edgeNormal.x;
851                                 facesA[fidx].m_plane.y = edgeNormal.y;
852                                 facesA[fidx].m_plane.z = edgeNormal.z;
853                                 facesA[fidx].m_plane.w = c;
854                                 fidx++;
855                                 prevVertex = i;
856                         }
857                 }
858                 convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;
859                 convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);
860         
861         
862                 float4 posA = rigidBodies[bodyIndexA].m_pos;
863                 posA.w = 0.f;
864                 float4 posB = rigidBodies[bodyIndexB].m_pos;
865                 posB.w = 0.f;
866         
867                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
868                 float4 ornB =rigidBodies[bodyIndexB].m_quat;
869         
870                 
871         
872         
873                 ///////////////////
874                 ///compound shape support
875         
876                 if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
877                 {
878                         int compoundChild = concavePairs[pairIdx].w;
879                         int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;
880                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
881                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
882                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
883                         float4 newPosB = transform(&childPosB,&posB,&ornB);
884                         float4 newOrnB = qtMul(ornB,childOrnB);
885                         posB = newPosB;
886                         ornB = newOrnB;
887                         shapeIndexB = collidables[childColIndexB].m_shapeIndex;
888                 }
889                 //////////////////
890         
891                 float4 c0local = convexPolyhedronA.m_localCenter;
892                 float4 c0 = transform(&c0local, &posA, &ornA);
893                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
894                 float4 c1 = transform(&c1local,&posB,&ornB);
895                 const float4 DeltaC2 = c0 - c1;
896         
897         
898                 bool sepA = findSeparatingAxisLocalA(   &convexPolyhedronA, &convexShapes[shapeIndexB],
899                                              posA,ornA,
900                                              posB,ornB,
901                                              DeltaC2,
902                                              verticesA,uniqueEdgesA,facesA,indicesA,
903                                              vertices,uniqueEdges,faces,indices,
904                                              &sepAxis,&dmin);
905                 hasSeparatingAxis = 4;
906                 if (!sepA)
907                 {
908                         hasSeparatingAxis = 0;
909                 } else
910                 {
911                         bool sepB = findSeparatingAxisLocalB(   &convexShapes[shapeIndexB],&convexPolyhedronA,
912                                                  posB,ornB,
913                                                  posA,ornA,
914                                                  DeltaC2,
915                                                  vertices,uniqueEdges,faces,indices,
916                                                  verticesA,uniqueEdgesA,facesA,indicesA,
917                                                  &sepAxis,&dmin);
918             
919                         if (!sepB)
920                         {
921                                 hasSeparatingAxis = 0;
922                         } else
923                         {
924                                 hasSeparatingAxis = 1;
925                         }
926                 }       
927                 
928                 if (hasSeparatingAxis)
929                 {
930             dmins[i] = dmin;
931                         concaveSeparatingNormalsOut[pairIdx]=sepAxis;
932                         concaveHasSeparatingNormals[i]=1;
933             
934                 } else
935                 {       
936                         //mark this pair as in-active
937                         concavePairs[pairIdx].w = -1;
938                 }
939         }
940         else
941         {       
942                 //mark this pair as in-active
943                 concavePairs[pairIdx].w = -1;
944         }
945 }
946
947
948
949
950 // work-in-progress
951 __kernel void   findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs,
952                                                           __global const BodyData* rigidBodies,
953                                                           __global const btCollidableGpu* collidables,
954                                                           __global const ConvexPolyhedronCL* convexShapes,
955                                                           __global const float4* vertices,
956                                                           __global const float4* uniqueEdges,
957                                                           __global const btGpuFace* faces,
958                                                           __global const int* indices,
959                                                           __global const btGpuChildShape* gpuChildShapes,
960                                                           __global btAabbCL* aabbs,
961                                                           __global float4* concaveSeparatingNormalsOut,
962                                                           __global int* concaveHasSeparatingNormals,
963                                                           __global int4* clippingFacesOut,
964                                                           __global float4* worldVertsA1GPU,
965                                                           __global float4*  worldNormalsAGPU,
966                                                           __global float4* worldVertsB1GPU,
967                                                           __global float* dmins,
968                                                           int vertexFaceCapacity,
969                                                           int numConcavePairs
970                                                           )
971 {
972     
973         int i = get_global_id(0);
974         if (i>=numConcavePairs)
975                 return;
976     
977         if (!concaveHasSeparatingNormals[i])
978         return;
979     
980         int pairIdx = i;
981     
982         int bodyIndexA = concavePairs[i].x;
983         int bodyIndexB = concavePairs[i].y;
984     
985         int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
986         int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
987     
988         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
989         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
990     
991     
992         int numFacesA = convexShapes[shapeIndexA].m_numFaces;
993         int numActualConcaveConvexTests = 0;
994         
995         int f = concavePairs[i].z;
996         
997         bool overlap = false;
998         
999         ConvexPolyhedronCL convexPolyhedronA;
1000     
1001         //add 3 vertices of the triangle
1002         convexPolyhedronA.m_numVertices = 3;
1003         convexPolyhedronA.m_vertexOffset = 0;
1004         float4  localCenter = make_float4(0.f,0.f,0.f,0.f);
1005     
1006         btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
1007         float4 triMinAabb, triMaxAabb;
1008         btAabbCL triAabb;
1009         triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);
1010         triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);
1011         
1012         float4 verticesA[3];
1013         for (int i=0;i<3;i++)
1014         {
1015                 int index = indices[face.m_indexOffset+i];
1016                 float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];
1017                 verticesA[i] = vert;
1018                 localCenter += vert;
1019         
1020                 triAabb.m_min = min(triAabb.m_min,vert);
1021                 triAabb.m_max = max(triAabb.m_max,vert);
1022         
1023         }
1024     
1025         overlap = true;
1026         overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;
1027         overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;
1028         overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;
1029     
1030         if (overlap)
1031         {
1032                 float dmin = dmins[i];
1033                 int hasSeparatingAxis=5;
1034                 float4 sepAxis=make_float4(1,2,3,4);
1035         sepAxis = concaveSeparatingNormalsOut[pairIdx];
1036         
1037                 int localCC=0;
1038                 numActualConcaveConvexTests++;
1039         
1040                 //a triangle has 3 unique edges
1041                 convexPolyhedronA.m_numUniqueEdges = 3;
1042                 convexPolyhedronA.m_uniqueEdgesOffset = 0;
1043                 float4 uniqueEdgesA[3];
1044                 
1045                 uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);
1046                 uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);
1047                 uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);
1048         
1049         
1050                 convexPolyhedronA.m_faceOffset = 0;
1051         
1052                 float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
1053         
1054                 btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];
1055                 int indicesA[3+3+2+2+2];
1056                 int curUsedIndices=0;
1057                 int fidx=0;
1058         
1059                 //front size of triangle
1060                 {
1061                         facesA[fidx].m_indexOffset=curUsedIndices;
1062                         indicesA[0] = 0;
1063                         indicesA[1] = 1;
1064                         indicesA[2] = 2;
1065                         curUsedIndices+=3;
1066                         float c = face.m_plane.w;
1067                         facesA[fidx].m_plane.x = normal.x;
1068                         facesA[fidx].m_plane.y = normal.y;
1069                         facesA[fidx].m_plane.z = normal.z;
1070                         facesA[fidx].m_plane.w = c;
1071                         facesA[fidx].m_numIndices=3;
1072                 }
1073                 fidx++;
1074                 //back size of triangle
1075                 {
1076                         facesA[fidx].m_indexOffset=curUsedIndices;
1077                         indicesA[3]=2;
1078                         indicesA[4]=1;
1079                         indicesA[5]=0;
1080                         curUsedIndices+=3;
1081                         float c = dot(normal,verticesA[0]);
1082                         float c1 = -face.m_plane.w;
1083                         facesA[fidx].m_plane.x = -normal.x;
1084                         facesA[fidx].m_plane.y = -normal.y;
1085                         facesA[fidx].m_plane.z = -normal.z;
1086                         facesA[fidx].m_plane.w = c;
1087                         facesA[fidx].m_numIndices=3;
1088                 }
1089                 fidx++;
1090         
1091                 bool addEdgePlanes = true;
1092                 if (addEdgePlanes)
1093                 {
1094                         int numVertices=3;
1095                         int prevVertex = numVertices-1;
1096                         for (int i=0;i<numVertices;i++)
1097                         {
1098                                 float4 v0 = verticesA[i];
1099                                 float4 v1 = verticesA[prevVertex];
1100                 
1101                                 float4 edgeNormal = normalize(cross(normal,v1-v0));
1102                                 float c = -dot(edgeNormal,v0);
1103                 
1104                                 facesA[fidx].m_numIndices = 2;
1105                                 facesA[fidx].m_indexOffset=curUsedIndices;
1106                                 indicesA[curUsedIndices++]=i;
1107                                 indicesA[curUsedIndices++]=prevVertex;
1108                 
1109                                 facesA[fidx].m_plane.x = edgeNormal.x;
1110                                 facesA[fidx].m_plane.y = edgeNormal.y;
1111                                 facesA[fidx].m_plane.z = edgeNormal.z;
1112                                 facesA[fidx].m_plane.w = c;
1113                                 fidx++;
1114                                 prevVertex = i;
1115                         }
1116                 }
1117                 convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;
1118                 convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);
1119         
1120         
1121                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1122                 posA.w = 0.f;
1123                 float4 posB = rigidBodies[bodyIndexB].m_pos;
1124                 posB.w = 0.f;
1125         
1126                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1127                 float4 ornB =rigidBodies[bodyIndexB].m_quat;
1128         
1129                 
1130         
1131         
1132                 ///////////////////
1133                 ///compound shape support
1134         
1135                 if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1136                 {
1137                         int compoundChild = concavePairs[pairIdx].w;
1138                         int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;
1139                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1140                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1141                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1142                         float4 newPosB = transform(&childPosB,&posB,&ornB);
1143                         float4 newOrnB = qtMul(ornB,childOrnB);
1144                         posB = newPosB;
1145                         ornB = newOrnB;
1146                         shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1147                 }
1148                 //////////////////
1149         
1150                 float4 c0local = convexPolyhedronA.m_localCenter;
1151                 float4 c0 = transform(&c0local, &posA, &ornA);
1152                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1153                 float4 c1 = transform(&c1local,&posB,&ornB);
1154                 const float4 DeltaC2 = c0 - c1;
1155         
1156         
1157                 {
1158                         bool sepEE = findSeparatingAxisEdgeEdgeLocalA(  &convexPolyhedronA, &convexShapes[shapeIndexB],
1159                                                               posA,ornA,
1160                                                               posB,ornB,
1161                                                               DeltaC2,
1162                                                               verticesA,uniqueEdgesA,facesA,indicesA,
1163                                                               vertices,uniqueEdges,faces,indices,
1164                                                               &sepAxis,&dmin);
1165                 
1166                         if (!sepEE)
1167                         {
1168                                 hasSeparatingAxis = 0;
1169                         } else
1170                         {
1171                                 hasSeparatingAxis = 1;
1172                         }
1173                 }
1174                 
1175                 
1176                 if (hasSeparatingAxis)
1177                 {
1178                         sepAxis.w = dmin;
1179             dmins[i] = dmin;
1180                         concaveSeparatingNormalsOut[pairIdx]=sepAxis;
1181                         concaveHasSeparatingNormals[i]=1;
1182            
1183         float minDist = -1e30f;
1184                         float maxDist = 0.02f;
1185
1186             
1187             findClippingFaces(sepAxis,
1188                               &convexPolyhedronA,
1189                               &convexShapes[shapeIndexB],
1190                               posA,ornA,
1191                               posB,ornB,
1192                               worldVertsA1GPU,
1193                               worldNormalsAGPU,
1194                               worldVertsB1GPU,
1195                               vertexFaceCapacity,
1196                               minDist, maxDist,
1197                               verticesA,
1198                               facesA,
1199                               indicesA,
1200                               vertices,
1201                               faces,
1202                               indices,
1203                               clippingFacesOut, pairIdx);
1204                    
1205             
1206                 } else
1207                 {       
1208                         //mark this pair as in-active
1209                         concavePairs[pairIdx].w = -1;
1210                 }
1211         }
1212         else
1213         {       
1214                 //mark this pair as in-active
1215                 concavePairs[pairIdx].w = -1;
1216         }
1217         
1218         concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points
1219 }
1220