dali-physics/third-party/bullet3/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl

   1 /*
   2 Copyright (c) 2012 Advanced Micro Devices, Inc.
   3
   4 This software is provided 'as-is', without any express or implied warranty.
   5 In no event will the authors be held liable for any damages arising from the use of this software.
   6 Permission is granted to anyone to use this software for any purpose,
   7 including commercial applications, and to alter it and redistribute it freely,
   8 subject to the following restrictions:
   9
  10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
  11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  12 3. This notice may not be removed or altered from any source distribution.
  13 */
  14 //Originally written by Takahiro Harada
  15
  16
  17 //#pragma OPENCL EXTENSION cl_amd_printf : enable
  18 #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
  19 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
  20 #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
  21 #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
  22
  23
  24 #ifdef cl_ext_atomic_counters_32
  25 #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
  26 #else
  27 #define counter32_t volatile global int*
  28 #endif
  29
  30 typedef unsigned int u32;
  31 typedef unsigned short u16;
  32 typedef unsigned char u8;
  33
  34 #define GET_GROUP_IDX get_group_id(0)
  35 #define GET_LOCAL_IDX get_local_id(0)
  36 #define GET_GLOBAL_IDX get_global_id(0)
  37 #define GET_GROUP_SIZE get_local_size(0)
  38 #define GET_NUM_GROUPS get_num_groups(0)
  39 #define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
  40 #define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
  41 #define AtomInc(x) atom_inc(&(x))
  42 #define AtomInc1(x, out) out = atom_inc(&(x))
  43 #define AppendInc(x, out) out = atomic_inc(x)
  44 #define AtomAdd(x, value) atom_add(&(x), value)
  45 #define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
  46 #define AtomXhg(x, value) atom_xchg ( &(x), value )
  47
  48
  49 #define SELECT_UINT4( b, a, condition ) select( b,a,condition )
  50
  51 #define mymake_float4 (float4)
  52 //#define make_float2 (float2)
  53 //#define make_uint4 (uint4)
  54 //#define make_int4 (int4)
  55 //#define make_uint2 (uint2)
  56 //#define make_int2 (int2)
  57
  58
  59 #define max2 max
  60 #define min2 min
  61
  62
  63 ///////////////////////////////////////
  64 //      Vector
  65 ///////////////////////////////////////
  66
  67
  68
  69
  70 __inline
  71 float4 fastNormalize4(float4 v)
  72 {
  73         return fast_normalize(v);
  74 }
  75
  76
  77
  78 __inline
  79 float4 cross3(float4 a, float4 b)
  80 {
  81         return cross(a,b);
  82 }
  83
  84 __inline
  85 float dot3F4(float4 a, float4 b)
  86 {
  87         float4 a1 = mymake_float4(a.xyz,0.f);
  88         float4 b1 = mymake_float4(b.xyz,0.f);
  89         return dot(a1, b1);
  90 }
  91
  92
  93
  94
  95 __inline
  96 float4 normalize3(const float4 a)
  97 {
  98         float4 n = mymake_float4(a.x, a.y, a.z, 0.f);
  99         return fastNormalize4( n );
 100 //      float length = sqrtf(dot3F4(a, a));
 101 //      return 1.f/length * a;
 102 }
 103
 104
 105
 106
 107 ///////////////////////////////////////
 108 //      Matrix3x3
 109 ///////////////////////////////////////
 110
 111 typedef struct
 112 {
 113         float4 m_row[3];
 114 }Matrix3x3;
 115
 116
 117
 118
 119
 120
 121 __inline
 122 float4 mtMul1(Matrix3x3 a, float4 b);
 123
 124 __inline
 125 float4 mtMul3(float4 a, Matrix3x3 b);
 126
 127
 128
 129
 130 __inline
 131 float4 mtMul1(Matrix3x3 a, float4 b)
 132 {
 133         float4 ans;
 134         ans.x = dot3F4( a.m_row[0], b );
 135         ans.y = dot3F4( a.m_row[1], b );
 136         ans.z = dot3F4( a.m_row[2], b );
 137         ans.w = 0.f;
 138         return ans;
 139 }
 140
 141 __inline
 142 float4 mtMul3(float4 a, Matrix3x3 b)
 143 {
 144         float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
 145         float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
 146         float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
 147
 148         float4 ans;
 149         ans.x = dot3F4( a, colx );
 150         ans.y = dot3F4( a, coly );
 151         ans.z = dot3F4( a, colz );
 152         return ans;
 153 }
 154
 155 ///////////////////////////////////////
 156 //      Quaternion
 157 ///////////////////////////////////////
 158
 159 typedef float4 Quaternion;
 160
 161
 162
 163
 164
 165
 166
 167 #define WG_SIZE 64
 168
 169 typedef struct
 170 {
 171         float4 m_pos;
 172         Quaternion m_quat;
 173         float4 m_linVel;
 174         float4 m_angVel;
 175
 176         u32 m_shapeIdx;
 177         float m_invMass;
 178         float m_restituitionCoeff;
 179         float m_frictionCoeff;
 180 } Body;
 181
 182 typedef struct
 183 {
 184         Matrix3x3 m_invInertia;
 185         Matrix3x3 m_initInvInertia;
 186 } Shape;
 187
 188 typedef struct
 189 {
 190         float4 m_linear;
 191         float4 m_worldPos[4];
 192         float4 m_center;
 193         float m_jacCoeffInv[4];
 194         float m_b[4];
 195         float m_appliedRambdaDt[4];
 196
 197         float m_fJacCoeffInv[2];
 198         float m_fAppliedRambdaDt[2];
 199
 200         u32 m_bodyA;
 201         u32 m_bodyB;
 202
 203         int m_batchIdx;
 204         u32 m_paddings[1];
 205 } Constraint4;
 206
 207
 208
 209 typedef struct
 210 {
 211         int m_nConstraints;
 212         int m_start;
 213         int m_batchIdx;
 214         int m_nSplit;
 215 //      int m_paddings[1];
 216 } ConstBuffer;
 217
 218 typedef struct
 219 {
 220         int m_solveFriction;
 221         int m_maxBatch; //      long batch really kills the performance
 222         int m_batchIdx;
 223         int m_nSplit;
 224 //      int m_paddings[1];
 225 } ConstBufferBatchSolve;
 226
 227 void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);
 228
 229 void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)
 230 {
 231         *linear = mymake_float4(-n.xyz,0.f);
 232         *angular0 = -cross3(r0, n);
 233         *angular1 = cross3(r1, n);
 234 }
 235
 236 float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );
 237
 238 float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )
 239 {
 240         return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);
 241 }
 242
 243
 244 float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,
 245                                    float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);
 246
 247 float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,
 248                                         float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)
 249 {
 250         //      linear0,1 are normlized
 251         float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;
 252         float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);
 253         float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
 254         float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);
 255         return -1.f/(jmj0+jmj1+jmj2+jmj3);
 256 }
 257
 258
 259 void solveContact(__global Constraint4* cs,
 260                                   float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,
 261                                   float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB);
 262
 263 void solveContact(__global Constraint4* cs,
 264                         float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,
 265                         float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)
 266 {
 267         float minRambdaDt = 0;
 268         float maxRambdaDt = FLT_MAX;
 269
 270         for(int ic=0; ic<4; ic++)
 271         {
 272                 if( cs->m_jacCoeffInv[ic] == 0.f ) continue;
 273
 274                 float4 angular0, angular1, linear;
 275                 float4 r0 = cs->m_worldPos[ic] - posA;
 276                 float4 r1 = cs->m_worldPos[ic] - posB;
 277                 setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );
 278
 279                 float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1,
 280                         *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];
 281                 rambdaDt *= cs->m_jacCoeffInv[ic];
 282
 283                 {
 284                         float prevSum = cs->m_appliedRambdaDt[ic];
 285                         float updated = prevSum;
 286                         updated += rambdaDt;
 287                         updated = max2( updated, minRambdaDt );
 288                         updated = min2( updated, maxRambdaDt );
 289                         rambdaDt = updated - prevSum;
 290                         cs->m_appliedRambdaDt[ic] = updated;
 291                 }
 292
 293                 float4 linImp0 = invMassA*linear*rambdaDt;
 294                 float4 linImp1 = invMassB*(-linear)*rambdaDt;
 295                 float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
 296                 float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
 297
 298                 *linVelA += linImp0;
 299                 *angVelA += angImp0;
 300                 *linVelB += linImp1;
 301                 *angVelB += angImp1;
 302         }
 303 }
 304
 305 void btPlaneSpace1 (const float4* n, float4* p, float4* q);
 306  void btPlaneSpace1 (const float4* n, float4* p, float4* q)
 307 {
 308   if (fabs(n[0].z) > 0.70710678f) {
 309     // choose p in y-z plane
 310     float a = n[0].y*n[0].y + n[0].z*n[0].z;
 311     float k = 1.f/sqrt(a);
 312     p[0].x = 0;
 313         p[0].y = -n[0].z*k;
 314         p[0].z = n[0].y*k;
 315     // set q = n x p
 316     q[0].x = a*k;
 317         q[0].y = -n[0].x*p[0].z;
 318         q[0].z = n[0].x*p[0].y;
 319   }
 320   else {
 321     // choose p in x-y plane
 322     float a = n[0].x*n[0].x + n[0].y*n[0].y;
 323     float k = 1.f/sqrt(a);
 324     p[0].x = -n[0].y*k;
 325         p[0].y = n[0].x*k;
 326         p[0].z = 0;
 327     // set q = n x p
 328     q[0].x = -n[0].z*p[0].y;
 329         q[0].y = n[0].z*p[0].x;
 330         q[0].z = a*k;
 331   }
 332 }
 333
 334 void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);
 335 void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)
 336 {
 337         //float frictionCoeff = ldsCs[0].m_linear.w;
 338         int aIdx = ldsCs[0].m_bodyA;
 339         int bIdx = ldsCs[0].m_bodyB;
 340
 341         float4 posA = gBodies[aIdx].m_pos;
 342         float4 linVelA = gBodies[aIdx].m_linVel;
 343         float4 angVelA = gBodies[aIdx].m_angVel;
 344         float invMassA = gBodies[aIdx].m_invMass;
 345         Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;
 346
 347         float4 posB = gBodies[bIdx].m_pos;
 348         float4 linVelB = gBodies[bIdx].m_linVel;
 349         float4 angVelB = gBodies[bIdx].m_angVel;
 350         float invMassB = gBodies[bIdx].m_invMass;
 351         Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;
 352
 353         solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,
 354                         posB, &linVelB, &angVelB, invMassB, invInertiaB );
 355
 356   if (gBodies[aIdx].m_invMass)
 357   {
 358                 gBodies[aIdx].m_linVel = linVelA;
 359                 gBodies[aIdx].m_angVel = angVelA;
 360         } else
 361         {
 362                 gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);
 363                 gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);
 364
 365         }
 366         if (gBodies[bIdx].m_invMass)
 367   {
 368                 gBodies[bIdx].m_linVel = linVelB;
 369                 gBodies[bIdx].m_angVel = angVelB;
 370         } else
 371         {
 372                 gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);
 373                 gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);
 374
 375         }
 376
 377 }
 378
 379
 380
 381 typedef struct
 382 {
 383         int m_valInt0;
 384         int m_valInt1;
 385         int m_valInt2;
 386         int m_valInt3;
 387
 388         float m_val0;
 389         float m_val1;
 390         float m_val2;
 391         float m_val3;
 392 } SolverDebugInfo;
 393
 394
 395
 396
 397 __kernel
 398 __attribute__((reqd_work_group_size(WG_SIZE,1,1)))
 399 void BatchSolveKernelContact(__global Body* gBodies,
 400                       __global Shape* gShapes,
 401                       __global Constraint4* gConstraints,
 402                       __global int* gN,
 403                       __global int* gOffsets,
 404                       __global  int* batchSizes,
 405                        int maxBatch1,
 406                        int cellBatch,
 407                        int4 nSplit
 408                       )
 409 {
 410         //__local int ldsBatchIdx[WG_SIZE+1];
 411         __local int ldsCurBatch;
 412         __local int ldsNextBatch;
 413         __local int ldsStart;
 414
 415         int lIdx = GET_LOCAL_IDX;
 416         int wgIdx = GET_GROUP_IDX;
 417
 418 //      int gIdx = GET_GLOBAL_IDX;
 419 //      debugInfo[gIdx].m_valInt0 = gIdx;
 420         //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;
 421
 422
 423
 424
 425         int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);
 426         int remain= (wgIdx%((nSplit.x*nSplit.y)/4));
 427         int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);
 428         int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);
 429         int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);
 430
 431         //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);
 432         //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);
 433         //int cellIdx = xIdx+yIdx*nSplit;
 434
 435         if( gN[cellIdx] == 0 )
 436                 return;
 437
 438         int maxBatch = batchSizes[cellIdx];
 439
 440
 441         const int start = gOffsets[cellIdx];
 442         const int end = start + gN[cellIdx];
 443
 444
 445
 446
 447         if( lIdx == 0 )
 448         {
 449                 ldsCurBatch = 0;
 450                 ldsNextBatch = 0;
 451                 ldsStart = start;
 452         }
 453
 454
 455         GROUP_LDS_BARRIER;
 456
 457         int idx=ldsStart+lIdx;
 458         while (ldsCurBatch < maxBatch)
 459         {
 460                 for(; idx<end; )
 461                 {
 462                         if (gConstraints[idx].m_batchIdx == ldsCurBatch)
 463                         {
 464                                         solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );
 465
 466                                  idx+=64;
 467                         } else
 468                         {
 469                                 break;
 470                         }
 471                 }
 472                 GROUP_LDS_BARRIER;
 473
 474                 if( lIdx == 0 )
 475                 {
 476                         ldsCurBatch++;
 477                 }
 478                 GROUP_LDS_BARRIER;
 479         }
 480
 481
 482 }
 483
 484
 485
 486 __kernel void solveSingleContactKernel(__global Body* gBodies,
 487                       __global Shape* gShapes,
 488                       __global Constraint4* gConstraints,
 489                        int cellIdx,
 490                        int batchOffset,
 491                        int numConstraintsInBatch
 492                       )
 493 {
 494
 495         int index = get_global_id(0);
 496         if (index < numConstraintsInBatch)
 497         {
 498                 int idx=batchOffset+index;
 499                 solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );
 500         }
 501 }