2 Copyright (c) 2012 Advanced Micro Devices, Inc.
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
14 //Originally written by Takahiro Harada
17 //#pragma OPENCL EXTENSION cl_amd_printf : enable
18 #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
19 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
20 #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
21 #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
24 #ifdef cl_ext_atomic_counters_32
25 #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
27 #define counter32_t volatile global int*
30 typedef unsigned int u32;
31 typedef unsigned short u16;
32 typedef unsigned char u8;
34 #define GET_GROUP_IDX get_group_id(0)
35 #define GET_LOCAL_IDX get_local_id(0)
36 #define GET_GLOBAL_IDX get_global_id(0)
37 #define GET_GROUP_SIZE get_local_size(0)
38 #define GET_NUM_GROUPS get_num_groups(0)
39 #define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
40 #define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
41 #define AtomInc(x) atom_inc(&(x))
42 #define AtomInc1(x, out) out = atom_inc(&(x))
43 #define AppendInc(x, out) out = atomic_inc(x)
44 #define AtomAdd(x, value) atom_add(&(x), value)
45 #define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
46 #define AtomXhg(x, value) atom_xchg ( &(x), value )
49 #define SELECT_UINT4( b, a, condition ) select( b,a,condition )
51 #define mymake_float4 (float4)
52 //#define make_float2 (float2)
53 //#define make_uint4 (uint4)
54 //#define make_int4 (int4)
55 //#define make_uint2 (uint2)
56 //#define make_int2 (int2)
63 ///////////////////////////////////////
65 ///////////////////////////////////////
71 float4 fastNormalize4(float4 v)
73 return fast_normalize(v);
79 float4 cross3(float4 a, float4 b)
85 float dot3F4(float4 a, float4 b)
87 float4 a1 = mymake_float4(a.xyz,0.f);
88 float4 b1 = mymake_float4(b.xyz,0.f);
96 float4 normalize3(const float4 a)
98 float4 n = mymake_float4(a.x, a.y, a.z, 0.f);
99 return fastNormalize4( n );
100 // float length = sqrtf(dot3F4(a, a));
101 // return 1.f/length * a;
107 ///////////////////////////////////////
109 ///////////////////////////////////////
122 float4 mtMul1(Matrix3x3 a, float4 b);
125 float4 mtMul3(float4 a, Matrix3x3 b);
131 float4 mtMul1(Matrix3x3 a, float4 b)
134 ans.x = dot3F4( a.m_row[0], b );
135 ans.y = dot3F4( a.m_row[1], b );
136 ans.z = dot3F4( a.m_row[2], b );
142 float4 mtMul3(float4 a, Matrix3x3 b)
144 float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
145 float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
146 float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
149 ans.x = dot3F4( a, colx );
150 ans.y = dot3F4( a, coly );
151 ans.z = dot3F4( a, colz );
155 ///////////////////////////////////////
157 ///////////////////////////////////////
159 typedef float4 Quaternion;
178 float m_restituitionCoeff;
179 float m_frictionCoeff;
184 Matrix3x3 m_invInertia;
185 Matrix3x3 m_initInvInertia;
191 float4 m_worldPos[4];
193 float m_jacCoeffInv[4];
195 float m_appliedRambdaDt[4];
197 float m_fJacCoeffInv[2];
198 float m_fAppliedRambdaDt[2];
215 // int m_paddings[1];
221 int m_maxBatch; // long batch really kills the performance
224 // int m_paddings[1];
225 } ConstBufferBatchSolve;
227 void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);
229 void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)
231 *linear = mymake_float4(-n.xyz,0.f);
232 *angular0 = -cross3(r0, n);
233 *angular1 = cross3(r1, n);
236 float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );
238 float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )
240 return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);
244 float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,
245 float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);
247 float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,
248 float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)
250 // linear0,1 are normlized
251 float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;
252 float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);
253 float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
254 float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);
255 return -1.f/(jmj0+jmj1+jmj2+jmj3);
259 void solveContact(__global Constraint4* cs,
260 float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,
261 float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB);
263 void solveContact(__global Constraint4* cs,
264 float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,
265 float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)
267 float minRambdaDt = 0;
268 float maxRambdaDt = FLT_MAX;
270 for(int ic=0; ic<4; ic++)
272 if( cs->m_jacCoeffInv[ic] == 0.f ) continue;
274 float4 angular0, angular1, linear;
275 float4 r0 = cs->m_worldPos[ic] - posA;
276 float4 r1 = cs->m_worldPos[ic] - posB;
277 setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );
279 float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1,
280 *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];
281 rambdaDt *= cs->m_jacCoeffInv[ic];
284 float prevSum = cs->m_appliedRambdaDt[ic];
285 float updated = prevSum;
287 updated = max2( updated, minRambdaDt );
288 updated = min2( updated, maxRambdaDt );
289 rambdaDt = updated - prevSum;
290 cs->m_appliedRambdaDt[ic] = updated;
293 float4 linImp0 = invMassA*linear*rambdaDt;
294 float4 linImp1 = invMassB*(-linear)*rambdaDt;
295 float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
296 float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
305 void btPlaneSpace1 (const float4* n, float4* p, float4* q);
306 void btPlaneSpace1 (const float4* n, float4* p, float4* q)
308 if (fabs(n[0].z) > 0.70710678f) {
309 // choose p in y-z plane
310 float a = n[0].y*n[0].y + n[0].z*n[0].z;
311 float k = 1.f/sqrt(a);
317 q[0].y = -n[0].x*p[0].z;
318 q[0].z = n[0].x*p[0].y;
321 // choose p in x-y plane
322 float a = n[0].x*n[0].x + n[0].y*n[0].y;
323 float k = 1.f/sqrt(a);
328 q[0].x = -n[0].z*p[0].y;
329 q[0].y = n[0].z*p[0].x;
334 void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);
335 void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)
337 //float frictionCoeff = ldsCs[0].m_linear.w;
338 int aIdx = ldsCs[0].m_bodyA;
339 int bIdx = ldsCs[0].m_bodyB;
341 float4 posA = gBodies[aIdx].m_pos;
342 float4 linVelA = gBodies[aIdx].m_linVel;
343 float4 angVelA = gBodies[aIdx].m_angVel;
344 float invMassA = gBodies[aIdx].m_invMass;
345 Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;
347 float4 posB = gBodies[bIdx].m_pos;
348 float4 linVelB = gBodies[bIdx].m_linVel;
349 float4 angVelB = gBodies[bIdx].m_angVel;
350 float invMassB = gBodies[bIdx].m_invMass;
351 Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;
353 solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,
354 posB, &linVelB, &angVelB, invMassB, invInertiaB );
356 if (gBodies[aIdx].m_invMass)
358 gBodies[aIdx].m_linVel = linVelA;
359 gBodies[aIdx].m_angVel = angVelA;
362 gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);
363 gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);
366 if (gBodies[bIdx].m_invMass)
368 gBodies[bIdx].m_linVel = linVelB;
369 gBodies[bIdx].m_angVel = angVelB;
372 gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);
373 gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);
398 __attribute__((reqd_work_group_size(WG_SIZE,1,1)))
399 void BatchSolveKernelContact(__global Body* gBodies,
400 __global Shape* gShapes,
401 __global Constraint4* gConstraints,
403 __global int* gOffsets,
404 __global int* batchSizes,
410 //__local int ldsBatchIdx[WG_SIZE+1];
411 __local int ldsCurBatch;
412 __local int ldsNextBatch;
413 __local int ldsStart;
415 int lIdx = GET_LOCAL_IDX;
416 int wgIdx = GET_GROUP_IDX;
418 // int gIdx = GET_GLOBAL_IDX;
419 // debugInfo[gIdx].m_valInt0 = gIdx;
420 //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;
425 int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);
426 int remain= (wgIdx%((nSplit.x*nSplit.y)/4));
427 int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);
428 int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);
429 int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);
431 //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);
432 //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);
433 //int cellIdx = xIdx+yIdx*nSplit;
435 if( gN[cellIdx] == 0 )
438 int maxBatch = batchSizes[cellIdx];
441 const int start = gOffsets[cellIdx];
442 const int end = start + gN[cellIdx];
457 int idx=ldsStart+lIdx;
458 while (ldsCurBatch < maxBatch)
462 if (gConstraints[idx].m_batchIdx == ldsCurBatch)
464 solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );
486 __kernel void solveSingleContactKernel(__global Body* gBodies,
487 __global Shape* gShapes,
488 __global Constraint4* gConstraints,
491 int numConstraintsInBatch
495 int index = get_global_id(0);
496 if (index < numConstraintsInBatch)
498 int idx=batchOffset+index;
499 solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );