4 Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
\r
5 Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
\r
7 This software is provided 'as-is', without any express or implied warranty.
\r
8 In no event will the authors be held liable for any damages arising from the use of this software.
\r
9 Permission is granted to anyone to use this software for any purpose,
\r
10 including commercial applications, and to alter it and redistribute it freely,
\r
11 subject to the following restrictions:
\r
13 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
\r
14 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
\r
15 3. This notice may not be removed or altered from any source distribution.
\r
20 int4 getGridPos(float4 worldPos, __global float4* pParams)
\r
23 gridPos.x = (int)floor((worldPos.x - pParams[1].x) / pParams[3].x);
\r
24 gridPos.y = (int)floor((worldPos.y - pParams[1].y) / pParams[3].y);
\r
25 gridPos.z = (int)floor((worldPos.z - pParams[1].z) / pParams[3].z);
\r
29 unsigned int getPosHash(int4 gridPos, __global float4* pParams)
\r
31 int4 gridDim = *((__global int4*)(pParams + 4));
\r
32 if(gridPos.x < 0) gridPos.x = 0;
\r
33 if(gridPos.x >= gridDim.x) gridPos.x = gridDim.x - 1;
\r
34 if(gridPos.y < 0) gridPos.y = 0;
\r
35 if(gridPos.y >= gridDim.y) gridPos.y = gridDim.y - 1;
\r
36 if(gridPos.z < 0) gridPos.z = 0;
\r
37 if(gridPos.z >= gridDim.z) gridPos.z = gridDim.z - 1;
\r
38 unsigned int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;
\r
43 __kernel void kComputeCellId( int numParticles,
\r
44 __global float4* pPos,
\r
45 __global int2* pPosHash,
\r
46 __global float4* pParams GUID_ARG)
\r
48 int index = get_global_id(0);
\r
49 if(index >= numParticles)
\r
53 float4 pos = pPos[index];
\r
54 int4 gridPos = getGridPos(pos, pParams);
\r
55 unsigned int hash = getPosHash(gridPos, pParams);
\r
56 pPosHash[index].x = hash;
\r
57 pPosHash[index].y = index;
\r
60 __kernel void kClearCellStart( int numCells,
\r
61 __global int* pCellStart GUID_ARG)
\r
63 int index = get_global_id(0);
\r
64 if(index >= numCells)
\r
68 pCellStart[index] = -1;
\r
71 __kernel void kFindCellStart( int numParticles,
\r
72 __global int2* pHash,
\r
73 __global int* cellStart,
\r
74 __global float4* pPos,
\r
75 __global float4* pVel,
\r
76 __global float4* pSortedPos,
\r
77 __global float4* pSortedVel GUID_ARG)
\r
79 int index = get_global_id(0);
\r
80 __local int sharedHash[1025];//maximum workgroup size 1024
\r
83 if(index < numParticles)
\r
86 sortedData = pHash[index];
\r
87 // Load hash data into shared memory so that we can look
\r
88 // at neighboring body's hash value without loading
\r
89 // two hash values per thread
\r
90 sharedHash[get_local_id(0) + 1] = sortedData.x;
\r
91 if((index > 0) && (get_local_id(0) == 0))
\r
93 // first thread in block must load neighbor body hash
\r
94 sharedHash[0] = pHash[index-1].x;
\r
98 barrier(CLK_LOCAL_MEM_FENCE);
\r
100 if(index < numParticles)
\r
102 if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))
\r
104 cellStart[sortedData.x] = index;
\r
106 int unsortedIndex = sortedData.y;
\r
107 float4 pos = pPos[unsortedIndex];
\r
108 float4 vel = pVel[unsortedIndex];
\r
109 pSortedPos[index] = pos;
\r
110 pSortedVel[index] = vel;
\r
114 __kernel void kIntegrateMotion( int numParticles,
\r
115 __global float4* pPos,
\r
116 __global float4* pVel,
\r
117 __global float4* pParams,
\r
118 float timeStep GUID_ARG)
\r
120 int index = get_global_id(0);
\r
121 if(index >= numParticles)
\r
125 float4 pos = pPos[index];
\r
126 float4 vel = pVel[index];
\r
130 float4 gravity = *((__global float4*)(pParams + 0));
\r
131 float particleRad = pParams[5].x;
\r
132 float globalDamping = pParams[5].y;
\r
133 float boundaryDamping = pParams[5].z;
\r
134 vel += gravity * timeStep;
\r
135 vel *= globalDamping;
\r
136 // integrate position
\r
137 pos += vel * timeStep;
\r
138 // collide with world boundaries
\r
139 float4 worldMin = *((__global float4*)(pParams + 1));
\r
140 float4 worldMax = *((__global float4*)(pParams + 2));
\r
143 if(pos.x < (worldMin.x + 2*particleRad))
\r
145 pos.x = worldMin.x + 2*particleRad;
\r
146 vel.x *= boundaryDamping;
\r
148 if(pos.x > (worldMax.x - 2*particleRad))
\r
150 pos.x = worldMax.x - 2*particleRad;
\r
151 vel.x *= boundaryDamping;
\r
153 if(pos.y < (worldMin.y + 2*particleRad))
\r
155 pos.y = worldMin.y + 2*particleRad;
\r
156 vel.y *= boundaryDamping;
\r
158 if(pos.y > (worldMax.y - 2*particleRad))
\r
160 pos.y = worldMax.y - 2*particleRad;
\r
161 vel.y *= boundaryDamping;
\r
163 if(pos.z < (worldMin.z + 2*particleRad))
\r
165 pos.z = worldMin.z + 2*particleRad;
\r
166 vel.z *= boundaryDamping;
\r
168 if(pos.z > (worldMax.z - 2*particleRad))
\r
170 pos.z = worldMax.z - 2*particleRad;
\r
171 vel.z *= boundaryDamping;
\r
173 // write back position and velocity
\r
179 float4 collideTwoParticles(
\r
192 //Calculate relative position
\r
193 float4 relPos = posB - posA; relPos.w = 0.f;
\r
194 float dist = sqrt(relPos.x * relPos.x + relPos.y * relPos.y + relPos.z * relPos.z);
\r
195 float collideDist = radiusA + radiusB;
\r
197 float4 force = (float4)0.f;
\r
198 if(dist < collideDist){
\r
199 float4 norm = relPos * (1.f / dist); norm.w = 0.f;
\r
201 //Relative velocity
\r
202 float4 relVel = velB - velA; relVel.w = 0.f;
\r
204 //Relative tangential velocity
\r
205 float relVelDotNorm = relVel.x * norm.x + relVel.y * norm.y + relVel.z * norm.z;
\r
206 float4 tanVel = relVel - norm * relVelDotNorm; tanVel.w = 0.f;
\r
208 //Spring force (potential)
\r
209 float springFactor = -spring * (collideDist - dist);
\r
210 force = springFactor * norm + damping * relVel + shear * tanVel + attraction * relPos;
\r
217 __kernel void kCollideParticles(int numParticles,
\r
218 __global float4* pVel, //output: new velocity
\r
219 __global const float4* pSortedPos, //input: reordered positions
\r
220 __global const float4* pSortedVel, //input: reordered velocities
\r
221 __global const int2 *pPosHash, //input: reordered particle indices
\r
222 __global const int *pCellStart, //input: cell boundaries
\r
223 __global float4* pParams GUID_ARG)
\r
225 int index = get_global_id(0);
\r
226 if(index >= numParticles)
\r
231 float4 posA = pSortedPos[index];
\r
232 float4 velA = pSortedVel[index];
\r
233 float4 force = (float4)0.f;
\r
234 float particleRad = pParams[5].x;
\r
235 float collisionDamping = pParams[5].w;
\r
236 float spring = pParams[6].x;
\r
237 float shear = pParams[6].y;
\r
238 float attraction = pParams[6].z;
\r
239 int unsortedIndex = pPosHash[index].y;
\r
241 //Get address in grid
\r
242 int4 gridPosA = getGridPos(posA, pParams);
\r
244 //Accumulate surrounding cells
\r
246 for(int z = -1; z <= 1; z++)
\r
248 gridPosB.z = gridPosA.z + z;
\r
249 for(int y = -1; y <= 1; y++)
\r
251 gridPosB.y = gridPosA.y + y;
\r
252 for(int x = -1; x <= 1; x++)
\r
254 gridPosB.x = gridPosA.x + x;
\r
255 //Get start particle index for this cell
\r
256 uint hashB = getPosHash(gridPosB, pParams);
\r
257 int startI = pCellStart[hashB];
\r
263 //Iterate over particles in this cell
\r
264 int endI = startI + 32;
\r
265 if(endI >= numParticles)
\r
266 endI = numParticles ;
\r
268 for(int j = startI; j < endI; j++)
\r
270 uint hashC = pPosHash[j].x;
\r
279 float4 posB = pSortedPos[j];
\r
280 float4 velB = pSortedVel[j];
\r
281 //Collide two spheres
\r
282 force += collideTwoParticles( posA, posB, velA, velB, particleRad, particleRad,
\r
283 spring, collisionDamping, shear, attraction);
\r
288 //Write new velocity back to original unsorted location
\r
289 pVel[unsortedIndex] = velA + force;
\r
298 * Copyright 1993-2009 NVIDIA Corporation. All rights reserved.
\r
300 * NVIDIA Corporation and its licensors retain all intellectual property and
\r
301 * proprietary rights in and to this software and related documentation.
\r
302 * Any use, reproduction, disclosure, or distribution of this software
\r
303 * and related documentation without an express license agreement from
\r
304 * NVIDIA Corporation is strictly prohibited.
\r
306 * Please refer to the applicable NVIDIA end user license agreement (EULA)
\r
307 * associated with this source code for terms and conditions that govern
\r
308 * your use of this NVIDIA software.
\r
314 inline void ComparatorPrivate(int2* keyA, int2* keyB, uint dir)
\r
316 if((keyA[0].x > keyB[0].x) == dir)
\r
324 inline void ComparatorLocal(__local int2* keyA, __local int2* keyB, uint dir)
\r
326 if((keyA[0].x > keyB[0].x) == dir)
\r
334 ////////////////////////////////////////////////////////////////////////////////
\r
335 // Monolithic bitonic sort kernel for short arrays fitting into local memory
\r
336 ////////////////////////////////////////////////////////////////////////////////
\r
337 __kernel void kBitonicSortCellIdLocal(__global int2* pKey, uint arrayLength, uint dir GUID_ARG)
\r
339 __local int2 l_key[LOCAL_SIZE_MAX];
\r
340 int localSizeLimit = get_local_size(0) * 2;
\r
342 //Offset to the beginning of subbatch and load data
\r
343 pKey += get_group_id(0) * localSizeLimit + get_local_id(0);
\r
344 l_key[get_local_id(0) + 0] = pKey[ 0];
\r
345 l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)];
\r
347 for(uint size = 2; size < arrayLength; size <<= 1)
\r
350 uint ddd = dir ^ ( (get_local_id(0) & (size / 2)) != 0 );
\r
351 for(uint stride = size / 2; stride > 0; stride >>= 1)
\r
353 barrier(CLK_LOCAL_MEM_FENCE);
\r
354 uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
\r
355 ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd);
\r
359 //ddd == dir for the last bitonic merge step
\r
361 for(uint stride = arrayLength / 2; stride > 0; stride >>= 1)
\r
363 barrier(CLK_LOCAL_MEM_FENCE);
\r
364 uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
\r
365 ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], dir);
\r
369 barrier(CLK_LOCAL_MEM_FENCE);
\r
370 pKey[ 0] = l_key[get_local_id(0) + 0];
\r
371 pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)];
\r
374 ////////////////////////////////////////////////////////////////////////////////
\r
375 // Bitonic sort kernel for large arrays (not fitting into local memory)
\r
376 ////////////////////////////////////////////////////////////////////////////////
\r
377 //Bottom-level bitonic sort
\r
378 //Almost the same as bitonicSortLocal with the only exception
\r
379 //of even / odd subarrays (of LOCAL_SIZE_LIMIT points) being
\r
380 //sorted in opposite directions
\r
381 __kernel void kBitonicSortCellIdLocal1(__global int2* pKey GUID_ARG)
\r
383 __local int2 l_key[LOCAL_SIZE_MAX];
\r
384 uint localSizeLimit = get_local_size(0) * 2;
\r
386 //Offset to the beginning of subarray and load data
\r
387 pKey += get_group_id(0) * localSizeLimit + get_local_id(0);
\r
388 l_key[get_local_id(0) + 0] = pKey[ 0];
\r
389 l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)];
\r
391 uint comparatorI = get_global_id(0) & ((localSizeLimit / 2) - 1);
\r
393 for(uint size = 2; size < localSizeLimit; size <<= 1)
\r
396 uint ddd = (comparatorI & (size / 2)) != 0;
\r
397 for(uint stride = size / 2; stride > 0; stride >>= 1)
\r
399 barrier(CLK_LOCAL_MEM_FENCE);
\r
400 uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
\r
401 ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd);
\r
405 //Odd / even arrays of localSizeLimit elements
\r
406 //sorted in opposite directions
\r
408 uint ddd = (get_group_id(0) & 1);
\r
409 for(uint stride = localSizeLimit / 2; stride > 0; stride >>= 1)
\r
411 barrier(CLK_LOCAL_MEM_FENCE);
\r
412 uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
\r
413 ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd);
\r
417 barrier(CLK_LOCAL_MEM_FENCE);
\r
418 pKey[ 0] = l_key[get_local_id(0) + 0];
\r
419 pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)];
\r
422 //Bitonic merge iteration for 'stride' >= LOCAL_SIZE_LIMIT
\r
423 __kernel void kBitonicSortCellIdMergeGlobal(__global int2* pKey, uint arrayLength, uint size, uint stride, uint dir GUID_ARG)
\r
425 uint global_comparatorI = get_global_id(0);
\r
426 uint comparatorI = global_comparatorI & (arrayLength / 2 - 1);
\r
429 uint ddd = dir ^ ( (comparatorI & (size / 2)) != 0 );
\r
430 uint pos = 2 * global_comparatorI - (global_comparatorI & (stride - 1));
\r
432 int2 keyA = pKey[pos + 0];
\r
433 int2 keyB = pKey[pos + stride];
\r
435 ComparatorPrivate(&keyA, &keyB, ddd);
\r
437 pKey[pos + 0] = keyA;
\r
438 pKey[pos + stride] = keyB;
\r
441 //Combined bitonic merge steps for
\r
442 //'size' > LOCAL_SIZE_LIMIT and 'stride' = [1 .. LOCAL_SIZE_LIMIT / 2]
\r
443 __kernel void kBitonicSortCellIdMergeLocal(__global int2* pKey, uint arrayLength, uint stride, uint size, uint dir GUID_ARG)
\r
445 __local int2 l_key[LOCAL_SIZE_MAX];
\r
446 int localSizeLimit = get_local_size(0) * 2;
\r
448 pKey += get_group_id(0) * localSizeLimit + get_local_id(0);
\r
449 l_key[get_local_id(0) + 0] = pKey[ 0];
\r
450 l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)];
\r
453 uint comparatorI = get_global_id(0) & ((arrayLength / 2) - 1);
\r
454 uint ddd = dir ^ ( (comparatorI & (size / 2)) != 0 );
\r
455 for(; stride > 0; stride >>= 1)
\r
457 barrier(CLK_LOCAL_MEM_FENCE);
\r
458 uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1));
\r
459 ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd);
\r
462 barrier(CLK_LOCAL_MEM_FENCE);
\r
463 pKey[ 0] = l_key[get_local_id(0) + 0];
\r
464 pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)];
\r