dali-physics/third-party/bullet3/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl

   1 //keep this enum in sync with the CPU version (in btCollidable.h)
   2 //written by Erwin Coumans
   3
   4
   5 #define SHAPE_CONVEX_HULL 3
   6 #define SHAPE_CONCAVE_TRIMESH 5
   7 #define TRIANGLE_NUM_CONVEX_FACES 5
   8 #define SHAPE_COMPOUND_OF_CONVEX_HULLS 6
   9
  10 #define B3_MAX_STACK_DEPTH 256
  11
  12
  13 typedef unsigned int u32;
  14
  15 ///keep this in sync with btCollidable.h
  16 typedef struct
  17 {
  18         union {
  19                 int m_numChildShapes;
  20                 int m_bvhIndex;
  21         };
  22         union
  23         {
  24                 float m_radius;
  25                 int     m_compoundBvhIndex;
  26         };
  27
  28         int m_shapeType;
  29         int m_shapeIndex;
  30
  31 } btCollidableGpu;
  32
  33 #define MAX_NUM_PARTS_IN_BITS 10
  34
  35 ///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.
  36 ///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
  37 typedef struct
  38 {
  39         //12 bytes
  40         unsigned short int      m_quantizedAabbMin[3];
  41         unsigned short int      m_quantizedAabbMax[3];
  42         //4 bytes
  43         int     m_escapeIndexOrTriangleIndex;
  44 } b3QuantizedBvhNode;
  45
  46 typedef struct
  47 {
  48         float4          m_aabbMin;
  49         float4          m_aabbMax;
  50         float4          m_quantization;
  51         int                     m_numNodes;
  52         int                     m_numSubTrees;
  53         int                     m_nodeOffset;
  54         int                     m_subTreeOffset;
  55
  56 } b3BvhInfo;
  57
  58
  59 int     getTriangleIndex(const b3QuantizedBvhNode* rootNode)
  60 {
  61         unsigned int x=0;
  62         unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
  63         // Get only the lower bits where the triangle index is stored
  64         return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
  65 }
  66
  67 int     getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)
  68 {
  69         unsigned int x=0;
  70         unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
  71         // Get only the lower bits where the triangle index is stored
  72         return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
  73 }
  74
  75 int isLeafNode(const b3QuantizedBvhNode* rootNode)
  76 {
  77         //skipindex is negative (internal node), triangleindex >=0 (leafnode)
  78         return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
  79 }
  80
  81 int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)
  82 {
  83         //skipindex is negative (internal node), triangleindex >=0 (leafnode)
  84         return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
  85 }
  86
  87 int getEscapeIndex(const b3QuantizedBvhNode* rootNode)
  88 {
  89         return -rootNode->m_escapeIndexOrTriangleIndex;
  90 }
  91
  92 int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)
  93 {
  94         return -rootNode->m_escapeIndexOrTriangleIndex;
  95 }
  96
  97
  98 typedef struct
  99 {
 100         //12 bytes
 101         unsigned short int      m_quantizedAabbMin[3];
 102         unsigned short int      m_quantizedAabbMax[3];
 103         //4 bytes, points to the root of the subtree
 104         int                     m_rootNodeIndex;
 105         //4 bytes
 106         int                     m_subtreeSize;
 107         int                     m_padding[3];
 108 } b3BvhSubtreeInfo;
 109
 110
 111
 112
 113
 114
 115
 116 typedef struct
 117 {
 118         float4  m_childPosition;
 119         float4  m_childOrientation;
 120         int m_shapeIndex;
 121         int m_unused0;
 122         int m_unused1;
 123         int m_unused2;
 124 } btGpuChildShape;
 125
 126
 127 typedef struct
 128 {
 129         float4 m_pos;
 130         float4 m_quat;
 131         float4 m_linVel;
 132         float4 m_angVel;
 133
 134         u32 m_collidableIdx;
 135         float m_invMass;
 136         float m_restituitionCoeff;
 137         float m_frictionCoeff;
 138 } BodyData;
 139
 140
 141 typedef struct
 142 {
 143         float4          m_localCenter;
 144         float4          m_extents;
 145         float4          mC;
 146         float4          mE;
 147
 148         float                   m_radius;
 149         int     m_faceOffset;
 150         int m_numFaces;
 151         int     m_numVertices;
 152
 153         int m_vertexOffset;
 154         int     m_uniqueEdgesOffset;
 155         int     m_numUniqueEdges;
 156         int m_unused;
 157 } ConvexPolyhedronCL;
 158
 159 typedef struct
 160 {
 161         union
 162         {
 163                 float4  m_min;
 164                 float   m_minElems[4];
 165                 int                     m_minIndices[4];
 166         };
 167         union
 168         {
 169                 float4  m_max;
 170                 float   m_maxElems[4];
 171                 int                     m_maxIndices[4];
 172         };
 173 } btAabbCL;
 174
 175 #include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
 176 #include "Bullet3Common/shared/b3Int2.h"
 177
 178
 179
 180 typedef struct
 181 {
 182         float4 m_plane;
 183         int m_indexOffset;
 184         int m_numIndices;
 185 } btGpuFace;
 186
 187 #define make_float4 (float4)
 188
 189
 190 __inline
 191 float4 cross3(float4 a, float4 b)
 192 {
 193         return cross(a,b);
 194
 195
 196 //      float4 a1 = make_float4(a.xyz,0.f);
 197 //      float4 b1 = make_float4(b.xyz,0.f);
 198
 199 //      return cross(a1,b1);
 200
 201 //float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);
 202
 203         //      float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);
 204
 205         //return c;
 206 }
 207
 208 __inline
 209 float dot3F4(float4 a, float4 b)
 210 {
 211         float4 a1 = make_float4(a.xyz,0.f);
 212         float4 b1 = make_float4(b.xyz,0.f);
 213         return dot(a1, b1);
 214 }
 215
 216 __inline
 217 float4 fastNormalize4(float4 v)
 218 {
 219         v = make_float4(v.xyz,0.f);
 220         return fast_normalize(v);
 221 }
 222
 223
 224 ///////////////////////////////////////
 225 //      Quaternion
 226 ///////////////////////////////////////
 227
 228 typedef float4 Quaternion;
 229
 230 __inline
 231 Quaternion qtMul(Quaternion a, Quaternion b);
 232
 233 __inline
 234 Quaternion qtNormalize(Quaternion in);
 235
 236 __inline
 237 float4 qtRotate(Quaternion q, float4 vec);
 238
 239 __inline
 240 Quaternion qtInvert(Quaternion q);
 241
 242
 243
 244
 245 __inline
 246 Quaternion qtMul(Quaternion a, Quaternion b)
 247 {
 248         Quaternion ans;
 249         ans = cross3( a, b );
 250         ans += a.w*b+b.w*a;
 251 //      ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
 252         ans.w = a.w*b.w - dot3F4(a, b);
 253         return ans;
 254 }
 255
 256 __inline
 257 Quaternion qtNormalize(Quaternion in)
 258 {
 259         return fastNormalize4(in);
 260 //      in /= length( in );
 261 //      return in;
 262 }
 263 __inline
 264 float4 qtRotate(Quaternion q, float4 vec)
 265 {
 266         Quaternion qInv = qtInvert( q );
 267         float4 vcpy = vec;
 268         vcpy.w = 0.f;
 269         float4 out = qtMul(qtMul(q,vcpy),qInv);
 270         return out;
 271 }
 272
 273 __inline
 274 Quaternion qtInvert(Quaternion q)
 275 {
 276         return (Quaternion)(-q.xyz, q.w);
 277 }
 278
 279 __inline
 280 float4 qtInvRotate(const Quaternion q, float4 vec)
 281 {
 282         return qtRotate( qtInvert( q ), vec );
 283 }
 284
 285 __inline
 286 float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)
 287 {
 288         return qtRotate( *orientation, *p ) + (*translation);
 289 }
 290
 291
 292
 293 __inline
 294 float4 normalize3(const float4 a)
 295 {
 296         float4 n = make_float4(a.x, a.y, a.z, 0.f);
 297         return fastNormalize4( n );
 298 }
 299
 300 inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn,
 301 const float4* dir, const float4* vertices, float* min, float* max)
 302 {
 303         min[0] = FLT_MAX;
 304         max[0] = -FLT_MAX;
 305         int numVerts = hull->m_numVertices;
 306
 307         const float4 localDir = qtInvRotate(orn,*dir);
 308         float offset = dot(pos,*dir);
 309         for(int i=0;i<numVerts;i++)
 310         {
 311                 float dp = dot(vertices[hull->m_vertexOffset+i],localDir);
 312                 if(dp < min[0])
 313                         min[0] = dp;
 314                 if(dp > max[0])
 315                         max[0] = dp;
 316         }
 317         if(min[0]>max[0])
 318         {
 319                 float tmp = min[0];
 320                 min[0] = max[0];
 321                 max[0] = tmp;
 322         }
 323         min[0] += offset;
 324         max[0] += offset;
 325 }
 326
 327 inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn,
 328 const float4* dir, __global const float4* vertices, float* min, float* max)
 329 {
 330         min[0] = FLT_MAX;
 331         max[0] = -FLT_MAX;
 332         int numVerts = hull->m_numVertices;
 333
 334         const float4 localDir = qtInvRotate(orn,*dir);
 335         float offset = dot(pos,*dir);
 336         for(int i=0;i<numVerts;i++)
 337         {
 338                 float dp = dot(vertices[hull->m_vertexOffset+i],localDir);
 339                 if(dp < min[0])
 340                         min[0] = dp;
 341                 if(dp > max[0])
 342                         max[0] = dp;
 343         }
 344         if(min[0]>max[0])
 345         {
 346                 float tmp = min[0];
 347                 min[0] = max[0];
 348                 max[0] = tmp;
 349         }
 350         min[0] += offset;
 351         max[0] += offset;
 352 }
 353
 354 inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
 355         const float4 posA,const float4 ornA,
 356         const float4 posB,const float4 ornB,
 357         float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)
 358 {
 359         float Min0,Max0;
 360         float Min1,Max1;
 361         projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);
 362         project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);
 363
 364         if(Max0<Min1 || Max1<Min0)
 365                 return false;
 366
 367         float d0 = Max0 - Min1;
 368         float d1 = Max1 - Min0;
 369         *depth = d0<d1 ? d0:d1;
 370         return true;
 371 }
 372
 373
 374
 375
 376 inline bool IsAlmostZero(const float4 v)
 377 {
 378         if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)
 379                 return false;
 380         return true;
 381 }
 382
 383
 384
 385 bool findSeparatingAxisLocalA(  const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
 386         const float4 posA1,
 387         const float4 ornA,
 388         const float4 posB1,
 389         const float4 ornB,
 390         const float4 DeltaC2,
 391
 392         const float4* verticesA,
 393         const float4* uniqueEdgesA,
 394         const btGpuFace* facesA,
 395         const int*  indicesA,
 396
 397         __global const float4* verticesB,
 398         __global const float4* uniqueEdgesB,
 399         __global const btGpuFace* facesB,
 400         __global const int*  indicesB,
 401         float4* sep,
 402         float* dmin)
 403 {
 404
 405
 406         float4 posA = posA1;
 407         posA.w = 0.f;
 408         float4 posB = posB1;
 409         posB.w = 0.f;
 410         int curPlaneTests=0;
 411         {
 412                 int numFacesA = hullA->m_numFaces;
 413                 // Test normals from hullA
 414                 for(int i=0;i<numFacesA;i++)
 415                 {
 416                         const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
 417                         float4 faceANormalWS = qtRotate(ornA,normal);
 418                         if (dot3F4(DeltaC2,faceANormalWS)<0)
 419                                 faceANormalWS*=-1.f;
 420                         curPlaneTests++;
 421                         float d;
 422                         if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))
 423                                 return false;
 424                         if(d<*dmin)
 425                         {
 426                                 *dmin = d;
 427                                 *sep = faceANormalWS;
 428                         }
 429                 }
 430         }
 431         if((dot3F4(-DeltaC2,*sep))>0.0f)
 432         {
 433                 *sep = -(*sep);
 434         }
 435         return true;
 436 }
 437
 438 bool findSeparatingAxisLocalB(  __global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB,
 439         const float4 posA1,
 440         const float4 ornA,
 441         const float4 posB1,
 442         const float4 ornB,
 443         const float4 DeltaC2,
 444         __global const float4* verticesA,
 445         __global const float4* uniqueEdgesA,
 446         __global const btGpuFace* facesA,
 447         __global const int*  indicesA,
 448         const float4* verticesB,
 449         const float4* uniqueEdgesB,
 450         const btGpuFace* facesB,
 451         const int*  indicesB,
 452         float4* sep,
 453         float* dmin)
 454 {
 455
 456
 457         float4 posA = posA1;
 458         posA.w = 0.f;
 459         float4 posB = posB1;
 460         posB.w = 0.f;
 461         int curPlaneTests=0;
 462         {
 463                 int numFacesA = hullA->m_numFaces;
 464                 // Test normals from hullA
 465                 for(int i=0;i<numFacesA;i++)
 466                 {
 467                         const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
 468                         float4 faceANormalWS = qtRotate(ornA,normal);
 469                         if (dot3F4(DeltaC2,faceANormalWS)<0)
 470                                 faceANormalWS *= -1.f;
 471                         curPlaneTests++;
 472                         float d;
 473                         if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))
 474                                 return false;
 475                         if(d<*dmin)
 476                         {
 477                                 *dmin = d;
 478                                 *sep = faceANormalWS;
 479                         }
 480                 }
 481         }
 482         if((dot3F4(-DeltaC2,*sep))>0.0f)
 483         {
 484                 *sep = -(*sep);
 485         }
 486         return true;
 487 }
 488
 489
 490
 491 bool findSeparatingAxisEdgeEdgeLocalA(  const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
 492         const float4 posA1,
 493         const float4 ornA,
 494         const float4 posB1,
 495         const float4 ornB,
 496         const float4 DeltaC2,
 497         const float4* verticesA,
 498         const float4* uniqueEdgesA,
 499         const btGpuFace* facesA,
 500         const int*  indicesA,
 501         __global const float4* verticesB,
 502         __global const float4* uniqueEdgesB,
 503         __global const btGpuFace* facesB,
 504         __global const int*  indicesB,
 505                 float4* sep,
 506         float* dmin)
 507 {
 508
 509
 510         float4 posA = posA1;
 511         posA.w = 0.f;
 512         float4 posB = posB1;
 513         posB.w = 0.f;
 514
 515         int curPlaneTests=0;
 516
 517         int curEdgeEdge = 0;
 518         // Test edges
 519         for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)
 520         {
 521                 const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];
 522                 float4 edge0World = qtRotate(ornA,edge0);
 523
 524                 for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)
 525                 {
 526                         const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];
 527                         float4 edge1World = qtRotate(ornB,edge1);
 528
 529
 530                         float4 crossje = cross3(edge0World,edge1World);
 531
 532                         curEdgeEdge++;
 533                         if(!IsAlmostZero(crossje))
 534                         {
 535                                 crossje = normalize3(crossje);
 536                                 if (dot3F4(DeltaC2,crossje)<0)
 537                                         crossje *= -1.f;
 538
 539                                 float dist;
 540                                 bool result = true;
 541                                 {
 542                                         float Min0,Max0;
 543                                         float Min1,Max1;
 544                                         projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);
 545                                         project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);
 546
 547                                         if(Max0<Min1 || Max1<Min0)
 548                                                 result = false;
 549
 550                                         float d0 = Max0 - Min1;
 551                                         float d1 = Max1 - Min0;
 552                                         dist = d0<d1 ? d0:d1;
 553                                         result = true;
 554
 555                                 }
 556
 557
 558                                 if(dist<*dmin)
 559                                 {
 560                                         *dmin = dist;
 561                                         *sep = crossje;
 562                                 }
 563                         }
 564                 }
 565
 566         }
 567
 568
 569         if((dot3F4(-DeltaC2,*sep))>0.0f)
 570         {
 571                 *sep = -(*sep);
 572         }
 573         return true;
 574 }
 575
 576
 577 inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
 578         const float4 posA,const float4 ornA,
 579         const float4 posB,const float4 ornB,
 580         float4* sep_axis, __global const float4* vertices,float* depth)
 581 {
 582         float Min0,Max0;
 583         float Min1,Max1;
 584         project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);
 585         project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);
 586
 587         if(Max0<Min1 || Max1<Min0)
 588                 return false;
 589
 590         float d0 = Max0 - Min1;
 591         float d1 = Max1 - Min0;
 592         *depth = d0<d1 ? d0:d1;
 593         return true;
 594 }
 595
 596
 597 bool findSeparatingAxis(        __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
 598         const float4 posA1,
 599         const float4 ornA,
 600         const float4 posB1,
 601         const float4 ornB,
 602         const float4 DeltaC2,
 603         __global const float4* vertices,
 604         __global const float4* uniqueEdges,
 605         __global const btGpuFace* faces,
 606         __global const int*  indices,
 607         float4* sep,
 608         float* dmin)
 609 {
 610
 611
 612         float4 posA = posA1;
 613         posA.w = 0.f;
 614         float4 posB = posB1;
 615         posB.w = 0.f;
 616
 617         int curPlaneTests=0;
 618
 619         {
 620                 int numFacesA = hullA->m_numFaces;
 621                 // Test normals from hullA
 622                 for(int i=0;i<numFacesA;i++)
 623                 {
 624                         const float4 normal = faces[hullA->m_faceOffset+i].m_plane;
 625                         float4 faceANormalWS = qtRotate(ornA,normal);
 626
 627                         if (dot3F4(DeltaC2,faceANormalWS)<0)
 628                                 faceANormalWS*=-1.f;
 629
 630                         curPlaneTests++;
 631
 632                         float d;
 633                         if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))
 634                                 return false;
 635
 636                         if(d<*dmin)
 637                         {
 638                                 *dmin = d;
 639                                 *sep = faceANormalWS;
 640                         }
 641                 }
 642         }
 643
 644
 645                 if((dot3F4(-DeltaC2,*sep))>0.0f)
 646                 {
 647                         *sep = -(*sep);
 648                 }
 649
 650         return true;
 651 }
 652
 653
 654
 655
 656 bool findSeparatingAxisUnitSphere(      __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
 657         const float4 posA1,
 658         const float4 ornA,
 659         const float4 posB1,
 660         const float4 ornB,
 661         const float4 DeltaC2,
 662         __global const float4* vertices,
 663         __global const float4* unitSphereDirections,
 664         int numUnitSphereDirections,
 665         float4* sep,
 666         float* dmin)
 667 {
 668
 669         float4 posA = posA1;
 670         posA.w = 0.f;
 671         float4 posB = posB1;
 672         posB.w = 0.f;
 673
 674         int curPlaneTests=0;
 675
 676         int curEdgeEdge = 0;
 677         // Test unit sphere directions
 678         for (int i=0;i<numUnitSphereDirections;i++)
 679         {
 680
 681                 float4 crossje;
 682                 crossje = unitSphereDirections[i];
 683
 684                 if (dot3F4(DeltaC2,crossje)>0)
 685                         crossje *= -1.f;
 686                 {
 687                         float dist;
 688                         bool result = true;
 689                         float Min0,Max0;
 690                         float Min1,Max1;
 691                         project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);
 692                         project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);
 693
 694                         if(Max0<Min1 || Max1<Min0)
 695                                 return false;
 696
 697                         float d0 = Max0 - Min1;
 698                         float d1 = Max1 - Min0;
 699                         dist = d0<d1 ? d0:d1;
 700                         result = true;
 701
 702                         if(dist<*dmin)
 703                         {
 704                                 *dmin = dist;
 705                                 *sep = crossje;
 706                         }
 707                 }
 708         }
 709
 710
 711         if((dot3F4(-DeltaC2,*sep))>0.0f)
 712         {
 713                 *sep = -(*sep);
 714         }
 715         return true;
 716 }
 717
 718
 719 bool findSeparatingAxisEdgeEdge(        __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
 720         const float4 posA1,
 721         const float4 ornA,
 722         const float4 posB1,
 723         const float4 ornB,
 724         const float4 DeltaC2,
 725         __global const float4* vertices,
 726         __global const float4* uniqueEdges,
 727         __global const btGpuFace* faces,
 728         __global const int*  indices,
 729         float4* sep,
 730         float* dmin)
 731 {
 732
 733
 734         float4 posA = posA1;
 735         posA.w = 0.f;
 736         float4 posB = posB1;
 737         posB.w = 0.f;
 738
 739         int curPlaneTests=0;
 740
 741         int curEdgeEdge = 0;
 742         // Test edges
 743         for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)
 744         {
 745                 const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];
 746                 float4 edge0World = qtRotate(ornA,edge0);
 747
 748                 for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)
 749                 {
 750                         const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];
 751                         float4 edge1World = qtRotate(ornB,edge1);
 752
 753
 754                         float4 crossje = cross3(edge0World,edge1World);
 755
 756                         curEdgeEdge++;
 757                         if(!IsAlmostZero(crossje))
 758                         {
 759                                 crossje = normalize3(crossje);
 760                                 if (dot3F4(DeltaC2,crossje)<0)
 761                                         crossje*=-1.f;
 762
 763                                 float dist;
 764                                 bool result = true;
 765                                 {
 766                                         float Min0,Max0;
 767                                         float Min1,Max1;
 768                                         project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);
 769                                         project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);
 770
 771                                         if(Max0<Min1 || Max1<Min0)
 772                                                 return false;
 773
 774                                         float d0 = Max0 - Min1;
 775                                         float d1 = Max1 - Min0;
 776                                         dist = d0<d1 ? d0:d1;
 777                                         result = true;
 778
 779                                 }
 780
 781
 782                                 if(dist<*dmin)
 783                                 {
 784                                         *dmin = dist;
 785                                         *sep = crossje;
 786                                 }
 787                         }
 788                 }
 789
 790         }
 791
 792
 793         if((dot3F4(-DeltaC2,*sep))>0.0f)
 794         {
 795                 *sep = -(*sep);
 796         }
 797         return true;
 798 }
 799
 800
 801 // work-in-progress
 802 __kernel void   processCompoundPairsKernel( __global const int4* gpuCompoundPairs,
 803                                                                                                                                                                         __global const BodyData* rigidBodies,
 804                                                                                                                                                                         __global const btCollidableGpu* collidables,
 805                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes,
 806                                                                                                                                                                         __global const float4* vertices,
 807                                                                                                                                                                         __global const float4* uniqueEdges,
 808                                                                                                                                                                         __global const btGpuFace* faces,
 809                                                                                                                                                                         __global const int* indices,
 810                                                                                                                                                                         __global btAabbCL* aabbs,
 811                                                                                                                                                                         __global const btGpuChildShape* gpuChildShapes,
 812                                                                                                                                                                         __global volatile float4* gpuCompoundSepNormalsOut,
 813                                                                                                                                                                         __global volatile int* gpuHasCompoundSepNormalsOut,
 814                                                                                                                                                                         int numCompoundPairs
 815                                                                                                                                                                         )
 816 {
 817
 818         int i = get_global_id(0);
 819         if (i<numCompoundPairs)
 820         {
 821                 int bodyIndexA = gpuCompoundPairs[i].x;
 822                 int bodyIndexB = gpuCompoundPairs[i].y;
 823
 824                 int childShapeIndexA = gpuCompoundPairs[i].z;
 825                 int childShapeIndexB = gpuCompoundPairs[i].w;
 826
 827                 int collidableIndexA = -1;
 828                 int collidableIndexB = -1;
 829
 830                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
 831                 float4 posA = rigidBodies[bodyIndexA].m_pos;
 832
 833                 float4 ornB = rigidBodies[bodyIndexB].m_quat;
 834                 float4 posB = rigidBodies[bodyIndexB].m_pos;
 835
 836                 if (childShapeIndexA >= 0)
 837                 {
 838                         collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
 839                         float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
 840                         float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
 841                         float4 newPosA = qtRotate(ornA,childPosA)+posA;
 842                         float4 newOrnA = qtMul(ornA,childOrnA);
 843                         posA = newPosA;
 844                         ornA = newOrnA;
 845                 } else
 846                 {
 847                         collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
 848                 }
 849
 850                 if (childShapeIndexB>=0)
 851                 {
 852                         collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
 853                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
 854                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
 855                         float4 newPosB = transform(&childPosB,&posB,&ornB);
 856                         float4 newOrnB = qtMul(ornB,childOrnB);
 857                         posB = newPosB;
 858                         ornB = newOrnB;
 859                 } else
 860                 {
 861                         collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
 862                 }
 863
 864                 gpuHasCompoundSepNormalsOut[i] = 0;
 865
 866                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
 867                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
 868
 869                 int shapeTypeA = collidables[collidableIndexA].m_shapeType;
 870                 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
 871
 872
 873                 if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))
 874                 {
 875                         return;
 876                 }
 877
 878                 int hasSeparatingAxis = 5;
 879
 880                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
 881                 float dmin = FLT_MAX;
 882                 posA.w = 0.f;
 883                 posB.w = 0.f;
 884                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
 885                 float4 c0 = transform(&c0local, &posA, &ornA);
 886                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
 887                 float4 c1 = transform(&c1local,&posB,&ornB);
 888                 const float4 DeltaC2 = c0 - c1;
 889                 float4 sepNormal = make_float4(1,0,0,0);
 890                 bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
 891                 hasSeparatingAxis = 4;
 892                 if (!sepA)
 893                 {
 894                         hasSeparatingAxis = 0;
 895                 } else
 896                 {
 897                         bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
 898
 899                         if (!sepB)
 900                         {
 901                                 hasSeparatingAxis = 0;
 902                         } else//(!sepB)
 903                         {
 904                                 bool sepEE = findSeparatingAxisEdgeEdge(        &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
 905                                 if (sepEE)
 906                                 {
 907                                                 gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);
 908                                                 gpuHasCompoundSepNormalsOut[i] = 1;
 909                                 }//sepEE
 910                         }//(!sepB)
 911                 }//(!sepA)
 912
 913
 914         }
 915
 916 }
 917
 918
 919 inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)
 920 {
 921                 b3Float4 vecOut;
 922                 vecOut = b3MakeFloat4(
 923                         (float)(vecIn[0]) / (quantization.x),
 924                         (float)(vecIn[1]) / (quantization.y),
 925                         (float)(vecIn[2]) / (quantization.z),
 926                         0.f);
 927
 928                 vecOut += bvhAabbMin;
 929                 return vecOut;
 930 }
 931
 932 inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)
 933 {
 934                 b3Float4 vecOut;
 935                 vecOut = b3MakeFloat4(
 936                         (float)(vecIn[0]) / (quantization.x),
 937                         (float)(vecIn[1]) / (quantization.y),
 938                         (float)(vecIn[2]) / (quantization.z),
 939                         0.f);
 940
 941                 vecOut += bvhAabbMin;
 942                 return vecOut;
 943 }
 944
 945
 946 // work-in-progress
 947 __kernel void   findCompoundPairsKernel( __global const int4* pairs,
 948         __global const BodyData* rigidBodies,
 949         __global const btCollidableGpu* collidables,
 950         __global const ConvexPolyhedronCL* convexShapes,
 951         __global const float4* vertices,
 952         __global const float4* uniqueEdges,
 953         __global const btGpuFace* faces,
 954         __global const int* indices,
 955         __global b3Aabb_t* aabbLocalSpace,
 956         __global const btGpuChildShape* gpuChildShapes,
 957         __global volatile int4* gpuCompoundPairsOut,
 958         __global volatile int* numCompoundPairsOut,
 959         __global const b3BvhSubtreeInfo* subtrees,
 960         __global const b3QuantizedBvhNode* quantizedNodes,
 961         __global const b3BvhInfo* bvhInfos,
 962         int numPairs,
 963         int maxNumCompoundPairsCapacity
 964         )
 965 {
 966
 967         int i = get_global_id(0);
 968
 969         if (i<numPairs)
 970         {
 971                 int bodyIndexA = pairs[i].x;
 972                 int bodyIndexB = pairs[i].y;
 973
 974                 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
 975                 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
 976
 977                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
 978                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
 979
 980
 981                 //once the broadphase avoids static-static pairs, we can remove this test
 982                 if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
 983                 {
 984                         return;
 985                 }
 986
 987                 if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
 988                 {
 989                         int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;
 990                         int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;
 991                         int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;
 992                         int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;
 993                         int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;
 994
 995
 996                         int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;
 997
 998                         float4 posA = rigidBodies[bodyIndexA].m_pos;
 999                         b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1000
1001                         b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1002                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1003
1004
1005                         for (int p=0;p<numSubTreesA;p++)
1006                         {
1007                                 b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];
1008                                 //bvhInfos[bvhA].m_quantization
1009                                 b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1010                                 b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1011
1012                                 b3Float4 aabbAMinOut,aabbAMaxOut;
1013                                 float margin=0.f;
1014                                 b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);
1015
1016                                 for (int q=0;q<numSubTreesB;q++)
1017                                 {
1018                                         b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];
1019
1020                                         b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1021                                         b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1022
1023                                         b3Float4 aabbBMinOut,aabbBMaxOut;
1024                                         float margin=0.f;
1025                                         b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);
1026
1027
1028
1029                                         bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
1030                                         if (aabbOverlap)
1031                                         {
1032
1033                                                 int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;
1034                                                 int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;
1035
1036                                                 int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;
1037                                                 int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;
1038
1039
1040                                                 b3Int2 nodeStack[B3_MAX_STACK_DEPTH];
1041                                                 b3Int2 node0;
1042                                                 node0.x = startNodeIndexA;
1043                                                 node0.y = startNodeIndexB;
1044                                                 int maxStackDepth = B3_MAX_STACK_DEPTH;
1045                                                 int depth=0;
1046                                                 nodeStack[depth++]=node0;
1047
1048                                                 do
1049                                                 {
1050                                                         b3Int2 node = nodeStack[--depth];
1051
1052                                                         b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1053                                                         b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1054
1055                                                         b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1056                                                         b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1057
1058                                                         float margin=0.f;
1059                                                         b3Float4 aabbAMinOut,aabbAMaxOut;
1060                                                         b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);
1061
1062                                                         b3Float4 aabbBMinOut,aabbBMaxOut;
1063                                                         b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);
1064
1065
1066                                                         bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
1067                                                         if (nodeOverlap)
1068                                                         {
1069                                                                 bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);
1070                                                                 bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);
1071                                                                 bool isInternalA = !isLeafA;
1072                                                                 bool isInternalB = !isLeafB;
1073
1074                                                                 //fail, even though it might hit two leaf nodes
1075                                                                 if (depth+4>maxStackDepth && !(isLeafA && isLeafB))
1076                                                                 {
1077                                                                         //printf("Error: traversal exceeded maxStackDepth");
1078                                                                         continue;
1079                                                                 }
1080
1081                                                                 if(isInternalA)
1082                                                                 {
1083                                                                         int nodeAleftChild = node.x+1;
1084                                                                         bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);
1085                                                                         int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);
1086
1087                                                                         if(isInternalB)
1088                                                                         {
1089                                                                                 int nodeBleftChild = node.y+1;
1090                                                                                 bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);
1091                                                                                 int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);
1092
1093                                                                                 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);
1094                                                                                 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);
1095                                                                                 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);
1096                                                                                 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);
1097                                                                         }
1098                                                                         else
1099                                                                         {
1100                                                                                 nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);
1101                                                                                 nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);
1102                                                                         }
1103                                                                 }
1104                                                                 else
1105                                                                 {
1106                                                                         if(isInternalB)
1107                                                                         {
1108                                                                                 int nodeBleftChild = node.y+1;
1109                                                                                 bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);
1110                                                                                 int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);
1111                                                                                 nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);
1112                                                                                 nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);
1113                                                                         }
1114                                                                         else
1115                                                                         {
1116                                                                                 int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1117                                                                                 if (compoundPairIdx<maxNumCompoundPairsCapacity)
1118                                                                                 {
1119                                                                                         int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);
1120                                                                                         int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);
1121                                                                                         gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);
1122                                                                                 }
1123                                                                         }
1124                                                                 }
1125                                                         }
1126                                                 } while (depth);
1127                                         }
1128                                 }
1129                         }
1130
1131                         return;
1132                 }
1133
1134
1135
1136
1137
1138                 if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1139                 {
1140
1141                         if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1142                         {
1143
1144                                 int numChildrenA = collidables[collidableIndexA].m_numChildShapes;
1145                                 for (int c=0;c<numChildrenA;c++)
1146                                 {
1147                                         int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;
1148                                         int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1149
1150                                         float4 posA = rigidBodies[bodyIndexA].m_pos;
1151                                         float4 ornA = rigidBodies[bodyIndexA].m_quat;
1152                                         float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1153                                         float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1154                                         float4 newPosA = qtRotate(ornA,childPosA)+posA;
1155                                         float4 newOrnA = qtMul(ornA,childOrnA);
1156
1157                                         int shapeIndexA = collidables[childColIndexA].m_shapeIndex;
1158                                         b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];
1159                                         float margin = 0.f;
1160
1161                                         b3Float4 aabbAMinWS;
1162                                         b3Float4 aabbAMaxWS;
1163
1164                                         b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,
1165                                                 newPosA,
1166                                                 newOrnA,
1167                                                 &aabbAMinWS,&aabbAMaxWS);
1168
1169
1170                                         if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1171                                         {
1172                                                 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1173                                                 for (int b=0;b<numChildrenB;b++)
1174                                                 {
1175                                                         int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;
1176                                                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1177                                                         float4 ornB = rigidBodies[bodyIndexB].m_quat;
1178                                                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1179                                                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1180                                                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1181                                                         float4 newPosB = transform(&childPosB,&posB,&ornB);
1182                                                         float4 newOrnB = qtMul(ornB,childOrnB);
1183
1184                                                         int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1185                                                         b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];
1186
1187                                                         b3Float4 aabbBMinWS;
1188                                                         b3Float4 aabbBMaxWS;
1189
1190                                                         b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,
1191                                                                 newPosB,
1192                                                                 newOrnB,
1193                                                                 &aabbBMinWS,&aabbBMaxWS);
1194
1195
1196
1197                                                         bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);
1198                                                         if (aabbOverlap)
1199                                                         {
1200                                                                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1201                                                                 float dmin = FLT_MAX;
1202                                                                 float4 posA = newPosA;
1203                                                                 posA.w = 0.f;
1204                                                                 float4 posB = newPosB;
1205                                                                 posB.w = 0.f;
1206                                                                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1207                                                                 float4 ornA = newOrnA;
1208                                                                 float4 c0 = transform(&c0local, &posA, &ornA);
1209                                                                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1210                                                                 float4 ornB =newOrnB;
1211                                                                 float4 c1 = transform(&c1local,&posB,&ornB);
1212                                                                 const float4 DeltaC2 = c0 - c1;
1213
1214                                                                 {//
1215                                                                         int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1216                                                                         if (compoundPairIdx<maxNumCompoundPairsCapacity)
1217                                                                         {
1218                                                                                 gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);
1219                                                                         }
1220                                                                 }//
1221                                                         }//fi(1)
1222                                                 } //for (int b=0
1223                                         }//if (collidables[collidableIndexB].
1224                                         else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1225                                         {
1226                                                 if (1)
1227                                                 {
1228                                                         int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1229                                                         float dmin = FLT_MAX;
1230                                                         float4 posA = newPosA;
1231                                                         posA.w = 0.f;
1232                                                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1233                                                         posB.w = 0.f;
1234                                                         float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1235                                                         float4 ornA = newOrnA;
1236                                                         float4 c0 = transform(&c0local, &posA, &ornA);
1237                                                         float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1238                                                         float4 ornB = rigidBodies[bodyIndexB].m_quat;
1239                                                         float4 c1 = transform(&c1local,&posB,&ornB);
1240                                                         const float4 DeltaC2 = c0 - c1;
1241
1242                                                         {
1243                                                                 int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1244                                                                 if (compoundPairIdx<maxNumCompoundPairsCapacity)
1245                                                                 {
1246                                                                         gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);
1247                                                                 }//if (compoundPairIdx<maxNumCompoundPairsCapacity)
1248                                                         }//
1249                                                 }//fi (1)
1250                                         }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1251                                 }//for (int b=0;b<numChildrenB;b++)
1252                                 return;
1253                         }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1254                         if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)
1255                                 && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1256                         {
1257                                 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1258                                 for (int b=0;b<numChildrenB;b++)
1259                                 {
1260                                         int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;
1261                                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1262                                         float4 ornB = rigidBodies[bodyIndexB].m_quat;
1263                                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1264                                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1265                                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1266                                         float4 newPosB = qtRotate(ornB,childPosB)+posB;
1267                                         float4 newOrnB = qtMul(ornB,childOrnB);
1268
1269                                         int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1270
1271
1272                                         //////////////////////////////////////
1273
1274                                         if (1)
1275                                         {
1276                                                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1277                                                 float dmin = FLT_MAX;
1278                                                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1279                                                 posA.w = 0.f;
1280                                                 float4 posB = newPosB;
1281                                                 posB.w = 0.f;
1282                                                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1283                                                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1284                                                 float4 c0 = transform(&c0local, &posA, &ornA);
1285                                                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1286                                                 float4 ornB =newOrnB;
1287                                                 float4 c1 = transform(&c1local,&posB,&ornB);
1288                                                 const float4 DeltaC2 = c0 - c1;
1289                                                 {//
1290                                                         int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1291                                                         if (compoundPairIdx<maxNumCompoundPairsCapacity)
1292                                                         {
1293                                                                 gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);
1294                                                         }//fi (compoundPairIdx<maxNumCompoundPairsCapacity)
1295                                                 }//
1296                                         }//fi (1)
1297                                 }//for (int b=0;b<numChildrenB;b++)
1298                                 return;
1299                         }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1300                         return;
1301                 }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1302         }//i<numPairs
1303 }
1304
1305 // work-in-progress
1306 __kernel void   findSeparatingAxisKernel( __global const int4* pairs,
1307                                                                                                                                                                         __global const BodyData* rigidBodies,
1308                                                                                                                                                                         __global const btCollidableGpu* collidables,
1309                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes,
1310                                                                                                                                                                         __global const float4* vertices,
1311                                                                                                                                                                         __global const float4* uniqueEdges,
1312                                                                                                                                                                         __global const btGpuFace* faces,
1313                                                                                                                                                                         __global const int* indices,
1314                                                                                                                                                                         __global btAabbCL* aabbs,
1315                                                                                                                                                                         __global volatile float4* separatingNormals,
1316                                                                                                                                                                         __global volatile int* hasSeparatingAxis,
1317                                                                                                                                                                         int numPairs
1318                                                                                                                                                                         )
1319 {
1320
1321         int i = get_global_id(0);
1322
1323         if (i<numPairs)
1324         {
1325
1326
1327                 int bodyIndexA = pairs[i].x;
1328                 int bodyIndexB = pairs[i].y;
1329
1330                 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1331                 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1332
1333                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1334                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1335
1336
1337                 //once the broadphase avoids static-static pairs, we can remove this test
1338                 if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
1339                 {
1340                         hasSeparatingAxis[i] = 0;
1341                         return;
1342                 }
1343
1344
1345                 if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))
1346                 {
1347                         hasSeparatingAxis[i] = 0;
1348                         return;
1349                 }
1350
1351                 if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))
1352                 {
1353                         hasSeparatingAxis[i] = 0;
1354                         return;
1355                 }
1356
1357                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1358
1359                 float dmin = FLT_MAX;
1360
1361                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1362                 posA.w = 0.f;
1363                 float4 posB = rigidBodies[bodyIndexB].m_pos;
1364                 posB.w = 0.f;
1365                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1366                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1367                 float4 c0 = transform(&c0local, &posA, &ornA);
1368                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1369                 float4 ornB =rigidBodies[bodyIndexB].m_quat;
1370                 float4 c1 = transform(&c1local,&posB,&ornB);
1371                 const float4 DeltaC2 = c0 - c1;
1372                 float4 sepNormal;
1373
1374                 bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1375                                                                                                                                                                                                 posB,ornB,
1376                                                                                                                                                                                                 DeltaC2,
1377                                                                                                                                                                                                 vertices,uniqueEdges,faces,
1378                                                                                                                                                                                                 indices,&sepNormal,&dmin);
1379                 hasSeparatingAxis[i] = 4;
1380                 if (!sepA)
1381                 {
1382                         hasSeparatingAxis[i] = 0;
1383                 } else
1384                 {
1385                         bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,
1386                                                                                                                                                                                                         posA,ornA,
1387                                                                                                                                                                                                         DeltaC2,
1388                                                                                                                                                                                                         vertices,uniqueEdges,faces,
1389                                                                                                                                                                                                         indices,&sepNormal,&dmin);
1390
1391                         if (!sepB)
1392                         {
1393                                 hasSeparatingAxis[i] = 0;
1394                         } else
1395                         {
1396                                 bool sepEE = findSeparatingAxisEdgeEdge(        &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1397                                                                                                                                                                                                         posB,ornB,
1398                                                                                                                                                                                                         DeltaC2,
1399                                                                                                                                                                                                         vertices,uniqueEdges,faces,
1400                                                                                                                                                                                                         indices,&sepNormal,&dmin);
1401                                 if (!sepEE)
1402                                 {
1403                                         hasSeparatingAxis[i] = 0;
1404                                 } else
1405                                 {
1406                                         hasSeparatingAxis[i] = 1;
1407                                         separatingNormals[i] = sepNormal;
1408                                 }
1409                         }
1410                 }
1411
1412         }
1413
1414 }
1415
1416
1417 __kernel void   findSeparatingAxisVertexFaceKernel( __global const int4* pairs,
1418                                                                                                                                                                         __global const BodyData* rigidBodies,
1419                                                                                                                                                                         __global const btCollidableGpu* collidables,
1420                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes,
1421                                                                                                                                                                         __global const float4* vertices,
1422                                                                                                                                                                         __global const float4* uniqueEdges,
1423                                                                                                                                                                         __global const btGpuFace* faces,
1424                                                                                                                                                                         __global const int* indices,
1425                                                                                                                                                                         __global btAabbCL* aabbs,
1426                                                                                                                                                                         __global volatile float4* separatingNormals,
1427                                                                                                                                                                         __global volatile int* hasSeparatingAxis,
1428                                                                                                                                                                         __global  float* dmins,
1429                                                                                                                                                                         int numPairs
1430                                                                                                                                                                         )
1431 {
1432
1433         int i = get_global_id(0);
1434
1435         if (i<numPairs)
1436         {
1437
1438
1439                 int bodyIndexA = pairs[i].x;
1440                 int bodyIndexB = pairs[i].y;
1441
1442                 int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1443                 int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1444
1445                 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1446                 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1447
1448                 hasSeparatingAxis[i] = 0;
1449
1450                 //once the broadphase avoids static-static pairs, we can remove this test
1451                 if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
1452                 {
1453                         return;
1454                 }
1455
1456
1457                 if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))
1458                 {
1459                         return;
1460                 }
1461
1462
1463                 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1464
1465                 float dmin = FLT_MAX;
1466
1467                 dmins[i] = dmin;
1468
1469                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1470                 posA.w = 0.f;
1471                 float4 posB = rigidBodies[bodyIndexB].m_pos;
1472                 posB.w = 0.f;
1473                 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1474                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1475                 float4 c0 = transform(&c0local, &posA, &ornA);
1476                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1477                 float4 ornB =rigidBodies[bodyIndexB].m_quat;
1478                 float4 c1 = transform(&c1local,&posB,&ornB);
1479                 const float4 DeltaC2 = c0 - c1;
1480                 float4 sepNormal;
1481
1482                 bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1483                                                                                                                                                                                                 posB,ornB,
1484                                                                                                                                                                                                 DeltaC2,
1485                                                                                                                                                                                                 vertices,uniqueEdges,faces,
1486                                                                                                                                                                                                 indices,&sepNormal,&dmin);
1487                 hasSeparatingAxis[i] = 4;
1488                 if (!sepA)
1489                 {
1490                         hasSeparatingAxis[i] = 0;
1491                 } else
1492                 {
1493                         bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,
1494                                                                                                                                                                                                         posA,ornA,
1495                                                                                                                                                                                                         DeltaC2,
1496                                                                                                                                                                                                         vertices,uniqueEdges,faces,
1497                                                                                                                                                                                                         indices,&sepNormal,&dmin);
1498
1499                         if (sepB)
1500                         {
1501                                 dmins[i] = dmin;
1502                                 hasSeparatingAxis[i] = 1;
1503                                 separatingNormals[i] = sepNormal;
1504                         }
1505                 }
1506
1507         }
1508
1509 }
1510
1511
1512 __kernel void   findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs,
1513                                                                                                                                                                         __global const BodyData* rigidBodies,
1514                                                                                                                                                                         __global const btCollidableGpu* collidables,
1515                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes,
1516                                                                                                                                                                         __global const float4* vertices,
1517                                                                                                                                                                         __global const float4* uniqueEdges,
1518                                                                                                                                                                         __global const btGpuFace* faces,
1519                                                                                                                                                                         __global const int* indices,
1520                                                                                                                                                                         __global btAabbCL* aabbs,
1521                                                                                                                                                                         __global  float4* separatingNormals,
1522                                                                                                                                                                         __global  int* hasSeparatingAxis,
1523                                                                                                                                                                         __global  float* dmins,
1524                                                                                                                                                                         __global const float4* unitSphereDirections,
1525                                                                                                                                                                         int numUnitSphereDirections,
1526                                                                                                                                                                         int numPairs
1527                                                                                                                                                                         )
1528 {
1529
1530         int i = get_global_id(0);
1531
1532         if (i<numPairs)
1533         {
1534
1535                 if (hasSeparatingAxis[i])
1536                 {
1537
1538                         int bodyIndexA = pairs[i].x;
1539                         int bodyIndexB = pairs[i].y;
1540
1541                         int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1542                         int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1543
1544                         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1545                         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1546
1547
1548                         int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1549
1550                         float dmin = dmins[i];
1551
1552                         float4 posA = rigidBodies[bodyIndexA].m_pos;
1553                         posA.w = 0.f;
1554                         float4 posB = rigidBodies[bodyIndexB].m_pos;
1555                         posB.w = 0.f;
1556                         float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1557                         float4 ornA = rigidBodies[bodyIndexA].m_quat;
1558                         float4 c0 = transform(&c0local, &posA, &ornA);
1559                         float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1560                         float4 ornB =rigidBodies[bodyIndexB].m_quat;
1561                         float4 c1 = transform(&c1local,&posB,&ornB);
1562                         const float4 DeltaC2 = c0 - c1;
1563                         float4 sepNormal = separatingNormals[i];
1564
1565
1566
1567                         bool sepEE = false;
1568                         int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;
1569                         if (numEdgeEdgeDirections<=numUnitSphereDirections)
1570                         {
1571                                 sepEE = findSeparatingAxisEdgeEdge(     &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1572                                                                                                                                                                                                         posB,ornB,
1573                                                                                                                                                                                                         DeltaC2,
1574                                                                                                                                                                                                         vertices,uniqueEdges,faces,
1575                                                                                                                                                                                                         indices,&sepNormal,&dmin);
1576
1577                                         if (!sepEE)
1578                                         {
1579                                                 hasSeparatingAxis[i] = 0;
1580                                         } else
1581                                         {
1582                                                 hasSeparatingAxis[i] = 1;
1583                                                 separatingNormals[i] = sepNormal;
1584                                         }
1585                         }
1586                         /*
1587                         ///else case is a separate kernel, to make Mac OSX OpenCL compiler happy
1588                         else
1589                         {
1590                                 sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1591                                                                                                                                                                                                         posB,ornB,
1592                                                                                                                                                                                                         DeltaC2,
1593                                                                                                                                                                                                         vertices,unitSphereDirections,numUnitSphereDirections,
1594                                                                                                                                                                                                         &sepNormal,&dmin);
1595                                         if (!sepEE)
1596                                         {
1597                                                 hasSeparatingAxis[i] = 0;
1598                                         } else
1599                                         {
1600                                                 hasSeparatingAxis[i] = 1;
1601                                                 separatingNormals[i] = sepNormal;
1602                                         }
1603                         }
1604                         */
1605                 }               //if (hasSeparatingAxis[i])
1606         }//(i<numPairs)
1607 }
1608
1609
1610
1611
1612
1613 inline int      findClippingFaces(const float4 separatingNormal,
1614                       const ConvexPolyhedronCL* hullA,
1615                                           __global const ConvexPolyhedronCL* hullB,
1616                       const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,
1617                        __global float4* worldVertsA1,
1618                       __global float4* worldNormalsA1,
1619                       __global float4* worldVertsB1,
1620                       int capacityWorldVerts,
1621                       const float minDist, float maxDist,
1622                                           const float4* verticesA,
1623                       const btGpuFace* facesA,
1624                       const int* indicesA,
1625                                           __global const float4* verticesB,
1626                       __global const btGpuFace* facesB,
1627                       __global const int* indicesB,
1628                       __global int4* clippingFaces, int pairIndex)
1629 {
1630         int numContactsOut = 0;
1631         int numWorldVertsB1= 0;
1632
1633
1634         int closestFaceB=0;
1635         float dmax = -FLT_MAX;
1636
1637         {
1638                 for(int face=0;face<hullB->m_numFaces;face++)
1639                 {
1640                         const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,
1641                                               facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
1642                         const float4 WorldNormal = qtRotate(ornB, Normal);
1643                         float d = dot3F4(WorldNormal,separatingNormal);
1644                         if (d > dmax)
1645                         {
1646                                 dmax = d;
1647                                 closestFaceB = face;
1648                         }
1649                 }
1650         }
1651
1652         {
1653                 const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];
1654                 int numVertices = polyB.m_numIndices;
1655         if (numVertices>capacityWorldVerts)
1656             numVertices = capacityWorldVerts;
1657
1658                 for(int e0=0;e0<numVertices;e0++)
1659                 {
1660             if (e0<capacityWorldVerts)
1661             {
1662                 const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
1663                 worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);
1664             }
1665                 }
1666         }
1667
1668     int closestFaceA=0;
1669         {
1670                 float dmin = FLT_MAX;
1671                 for(int face=0;face<hullA->m_numFaces;face++)
1672                 {
1673                         const float4 Normal = make_float4(
1674                                               facesA[hullA->m_faceOffset+face].m_plane.x,
1675                                               facesA[hullA->m_faceOffset+face].m_plane.y,
1676                                               facesA[hullA->m_faceOffset+face].m_plane.z,
1677                                               0.f);
1678                         const float4 faceANormalWS = qtRotate(ornA,Normal);
1679
1680                         float d = dot3F4(faceANormalWS,separatingNormal);
1681                         if (d < dmin)
1682                         {
1683                                 dmin = d;
1684                                 closestFaceA = face;
1685                 worldNormalsA1[pairIndex] = faceANormalWS;
1686                         }
1687                 }
1688         }
1689
1690     int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;
1691     if (numVerticesA>capacityWorldVerts)
1692        numVerticesA = capacityWorldVerts;
1693
1694         for(int e0=0;e0<numVerticesA;e0++)
1695         {
1696         if (e0<capacityWorldVerts)
1697         {
1698             const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
1699             worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);
1700         }
1701     }
1702
1703     clippingFaces[pairIndex].x = closestFaceA;
1704     clippingFaces[pairIndex].y = closestFaceB;
1705     clippingFaces[pairIndex].z = numVerticesA;
1706     clippingFaces[pairIndex].w = numWorldVertsB1;
1707
1708
1709         return numContactsOut;
1710 }
1711
1712
1713
1714
1715 // work-in-progress
1716 __kernel void   findConcaveSeparatingAxisKernel( __global int4* concavePairs,
1717                                                                                                                                                                         __global const BodyData* rigidBodies,
1718                                                                                                                                                                         __global const btCollidableGpu* collidables,
1719                                                                                                                                                                         __global const ConvexPolyhedronCL* convexShapes,
1720                                                                                                                                                                         __global const float4* vertices,
1721                                                                                                                                                                         __global const float4* uniqueEdges,
1722                                                                                                                                                                         __global const btGpuFace* faces,
1723                                                                                                                                                                         __global const int* indices,
1724                                                                                                                                                                         __global const btGpuChildShape* gpuChildShapes,
1725                                                                                                                                                                         __global btAabbCL* aabbs,
1726                                                                                                                                                                         __global float4* concaveSeparatingNormalsOut,
1727                                                                                                                                                                         __global int* concaveHasSeparatingNormals,
1728                                                                                                                                                                         __global int4* clippingFacesOut,
1729                                                                                                                                                                         __global float4* worldVertsA1GPU,
1730                                                                                                                                                                         __global float4*  worldNormalsAGPU,
1731                                                                                                                                                                         __global float4* worldVertsB1GPU,
1732                                                                                                                                                                         int vertexFaceCapacity,
1733                                                                                                                                                                         int numConcavePairs
1734                                                                                                                                                                         )
1735 {
1736
1737         int i = get_global_id(0);
1738         if (i>=numConcavePairs)
1739                 return;
1740
1741         concaveHasSeparatingNormals[i] = 0;
1742
1743         int pairIdx = i;
1744
1745         int bodyIndexA = concavePairs[i].x;
1746         int bodyIndexB = concavePairs[i].y;
1747
1748         int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1749         int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1750
1751         int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1752         int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1753
1754         if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&
1755                 collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)
1756         {
1757                 concavePairs[pairIdx].w = -1;
1758                 return;
1759         }
1760
1761
1762
1763         int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1764         int numActualConcaveConvexTests = 0;
1765
1766         int f = concavePairs[i].z;
1767
1768         bool overlap = false;
1769
1770         ConvexPolyhedronCL convexPolyhedronA;
1771
1772         //add 3 vertices of the triangle
1773         convexPolyhedronA.m_numVertices = 3;
1774         convexPolyhedronA.m_vertexOffset = 0;
1775         float4  localCenter = make_float4(0.f,0.f,0.f,0.f);
1776
1777         btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
1778         float4 triMinAabb, triMaxAabb;
1779         btAabbCL triAabb;
1780         triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);
1781         triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);
1782
1783         float4 verticesA[3];
1784         for (int i=0;i<3;i++)
1785         {
1786                 int index = indices[face.m_indexOffset+i];
1787                 float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];
1788                 verticesA[i] = vert;
1789                 localCenter += vert;
1790
1791                 triAabb.m_min = min(triAabb.m_min,vert);
1792                 triAabb.m_max = max(triAabb.m_max,vert);
1793
1794         }
1795
1796         overlap = true;
1797         overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;
1798         overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;
1799         overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;
1800
1801         if (overlap)
1802         {
1803                 float dmin = FLT_MAX;
1804                 int hasSeparatingAxis=5;
1805                 float4 sepAxis=make_float4(1,2,3,4);
1806
1807                 int localCC=0;
1808                 numActualConcaveConvexTests++;
1809
1810                 //a triangle has 3 unique edges
1811                 convexPolyhedronA.m_numUniqueEdges = 3;
1812                 convexPolyhedronA.m_uniqueEdgesOffset = 0;
1813                 float4 uniqueEdgesA[3];
1814
1815                 uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);
1816                 uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);
1817                 uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);
1818
1819
1820                 convexPolyhedronA.m_faceOffset = 0;
1821
1822                 float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
1823
1824                 btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];
1825                 int indicesA[3+3+2+2+2];
1826                 int curUsedIndices=0;
1827                 int fidx=0;
1828
1829                 //front size of triangle
1830                 {
1831                         facesA[fidx].m_indexOffset=curUsedIndices;
1832                         indicesA[0] = 0;
1833                         indicesA[1] = 1;
1834                         indicesA[2] = 2;
1835                         curUsedIndices+=3;
1836                         float c = face.m_plane.w;
1837                         facesA[fidx].m_plane.x = normal.x;
1838                         facesA[fidx].m_plane.y = normal.y;
1839                         facesA[fidx].m_plane.z = normal.z;
1840                         facesA[fidx].m_plane.w = c;
1841                         facesA[fidx].m_numIndices=3;
1842                 }
1843                 fidx++;
1844                 //back size of triangle
1845                 {
1846                         facesA[fidx].m_indexOffset=curUsedIndices;
1847                         indicesA[3]=2;
1848                         indicesA[4]=1;
1849                         indicesA[5]=0;
1850                         curUsedIndices+=3;
1851                         float c = dot(normal,verticesA[0]);
1852                         float c1 = -face.m_plane.w;
1853                         facesA[fidx].m_plane.x = -normal.x;
1854                         facesA[fidx].m_plane.y = -normal.y;
1855                         facesA[fidx].m_plane.z = -normal.z;
1856                         facesA[fidx].m_plane.w = c;
1857                         facesA[fidx].m_numIndices=3;
1858                 }
1859                 fidx++;
1860
1861                 bool addEdgePlanes = true;
1862                 if (addEdgePlanes)
1863                 {
1864                         int numVertices=3;
1865                         int prevVertex = numVertices-1;
1866                         for (int i=0;i<numVertices;i++)
1867                         {
1868                                 float4 v0 = verticesA[i];
1869                                 float4 v1 = verticesA[prevVertex];
1870
1871                                 float4 edgeNormal = normalize(cross(normal,v1-v0));
1872                                 float c = -dot(edgeNormal,v0);
1873
1874                                 facesA[fidx].m_numIndices = 2;
1875                                 facesA[fidx].m_indexOffset=curUsedIndices;
1876                                 indicesA[curUsedIndices++]=i;
1877                                 indicesA[curUsedIndices++]=prevVertex;
1878
1879                                 facesA[fidx].m_plane.x = edgeNormal.x;
1880                                 facesA[fidx].m_plane.y = edgeNormal.y;
1881                                 facesA[fidx].m_plane.z = edgeNormal.z;
1882                                 facesA[fidx].m_plane.w = c;
1883                                 fidx++;
1884                                 prevVertex = i;
1885                         }
1886                 }
1887                 convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;
1888                 convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);
1889
1890
1891                 float4 posA = rigidBodies[bodyIndexA].m_pos;
1892                 posA.w = 0.f;
1893                 float4 posB = rigidBodies[bodyIndexB].m_pos;
1894                 posB.w = 0.f;
1895
1896                 float4 ornA = rigidBodies[bodyIndexA].m_quat;
1897                 float4 ornB =rigidBodies[bodyIndexB].m_quat;
1898
1899
1900
1901
1902                 ///////////////////
1903                 ///compound shape support
1904
1905                 if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1906                 {
1907                         int compoundChild = concavePairs[pairIdx].w;
1908                         int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;
1909                         int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1910                         float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1911                         float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1912                         float4 newPosB = transform(&childPosB,&posB,&ornB);
1913                         float4 newOrnB = qtMul(ornB,childOrnB);
1914                         posB = newPosB;
1915                         ornB = newOrnB;
1916                         shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1917                 }
1918                 //////////////////
1919
1920                 float4 c0local = convexPolyhedronA.m_localCenter;
1921                 float4 c0 = transform(&c0local, &posA, &ornA);
1922                 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1923                 float4 c1 = transform(&c1local,&posB,&ornB);
1924                 const float4 DeltaC2 = c0 - c1;
1925
1926
1927                 bool sepA = findSeparatingAxisLocalA(   &convexPolyhedronA, &convexShapes[shapeIndexB],
1928                                                                                                 posA,ornA,
1929                                                                                                 posB,ornB,
1930                                                                                                 DeltaC2,
1931                                                                                                 verticesA,uniqueEdgesA,facesA,indicesA,
1932                                                                                                 vertices,uniqueEdges,faces,indices,
1933                                                                                                 &sepAxis,&dmin);
1934                 hasSeparatingAxis = 4;
1935                 if (!sepA)
1936                 {
1937                         hasSeparatingAxis = 0;
1938                 } else
1939                 {
1940                         bool sepB = findSeparatingAxisLocalB(   &convexShapes[shapeIndexB],&convexPolyhedronA,
1941                                                                                                 posB,ornB,
1942                                                                                                 posA,ornA,
1943                                                                                                 DeltaC2,
1944                                                                                                 vertices,uniqueEdges,faces,indices,
1945                                                                                                 verticesA,uniqueEdgesA,facesA,indicesA,
1946                                                                                                 &sepAxis,&dmin);
1947
1948                         if (!sepB)
1949                         {
1950                                 hasSeparatingAxis = 0;
1951                         } else
1952                         {
1953                                 bool sepEE = findSeparatingAxisEdgeEdgeLocalA(  &convexPolyhedronA, &convexShapes[shapeIndexB],
1954                                                                                                                         posA,ornA,
1955                                                                                                                         posB,ornB,
1956                                                                                                                         DeltaC2,
1957                                                                                                                         verticesA,uniqueEdgesA,facesA,indicesA,
1958                                                                                                                         vertices,uniqueEdges,faces,indices,
1959                                                                                                                         &sepAxis,&dmin);
1960
1961                                 if (!sepEE)
1962                                 {
1963                                         hasSeparatingAxis = 0;
1964                                 } else
1965                                 {
1966                                         hasSeparatingAxis = 1;
1967                                 }
1968                         }
1969                 }
1970
1971                 if (hasSeparatingAxis)
1972                 {
1973                         sepAxis.w = dmin;
1974                         concaveSeparatingNormalsOut[pairIdx]=sepAxis;
1975                         concaveHasSeparatingNormals[i]=1;
1976
1977
1978                         float minDist = -1e30f;
1979                         float maxDist = 0.02f;
1980
1981
1982
1983                         findClippingFaces(sepAxis,
1984                      &convexPolyhedronA,
1985                                          &convexShapes[shapeIndexB],
1986                                          posA,ornA,
1987                                          posB,ornB,
1988                       worldVertsA1GPU,
1989                       worldNormalsAGPU,
1990                       worldVertsB1GPU,
1991                                           vertexFaceCapacity,
1992                       minDist, maxDist,
1993                       verticesA,
1994                       facesA,
1995                       indicesA,
1996                                           vertices,
1997                       faces,
1998                       indices,
1999                       clippingFacesOut, pairIdx);
2000
2001
2002                 } else
2003                 {
2004                         //mark this pair as in-active
2005                         concavePairs[pairIdx].w = -1;
2006                 }
2007         }
2008         else
2009         {
2010                 //mark this pair as in-active
2011                 concavePairs[pairIdx].w = -1;
2012         }
2013
2014         concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts
2015 }
2016
2017
2018