2 Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
3 Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
16 //----------------------------------------------------------------------------------------
18 //----------------------------------------------------------------------------------------
19 //----------------------------------------------------------------------------------------
20 //----------------------------------------------------------------------------------------
21 //----------------------------------------------------------------------------------------
22 // K E R N E L F U N C T I O N S
23 //----------------------------------------------------------------------------------------
24 //----------------------------------------------------------------------------------------
25 //----------------------------------------------------------------------------------------
26 //----------------------------------------------------------------------------------------
27 //----------------------------------------------------------------------------------------
28 //----------------------------------------------------------------------------------------
30 // calculate position in uniform grid
31 BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
34 gridPos.x = (int)floor((p.x - BT_GPU_params.m_worldOriginX) / BT_GPU_params.m_cellSizeX);
35 gridPos.y = (int)floor((p.y - BT_GPU_params.m_worldOriginY) / BT_GPU_params.m_cellSizeY);
36 gridPos.z = (int)floor((p.z - BT_GPU_params.m_worldOriginZ) / BT_GPU_params.m_cellSizeZ);
38 } // bt3DGrid_calcGridPos()
40 //----------------------------------------------------------------------------------------
42 // calculate address in grid from position (clamping to edges)
43 BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
45 gridPos.x = BT_GPU_max(0, BT_GPU_min(gridPos.x, (int)BT_GPU_params.m_gridSizeX - 1));
46 gridPos.y = BT_GPU_max(0, BT_GPU_min(gridPos.y, (int)BT_GPU_params.m_gridSizeY - 1));
47 gridPos.z = BT_GPU_max(0, BT_GPU_min(gridPos.z, (int)BT_GPU_params.m_gridSizeZ - 1));
48 return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
49 } // bt3DGrid_calcGridHash()
51 //----------------------------------------------------------------------------------------
53 // calculate grid hash value for each body using its AABB
54 BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
56 int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
57 if(index >= (int)numBodies)
61 bt3DGrid3F1U bbMin = pAABB[index*2];
62 bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
64 pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
65 pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
66 pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
67 // get address in grid
68 int3 gridPos = bt3DGrid_calcGridPos(pos);
69 uint gridHash = bt3DGrid_calcGridHash(gridPos);
70 // store grid hash and body index
71 pHash[index] = BT_GPU_make_uint2(gridHash, index);
74 //----------------------------------------------------------------------------------------
76 BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
78 int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
79 if(index >= (int)numBodies)
83 uint2 sortedData = pHash[index];
84 // Load hash data into shared memory so that we can look
85 // at neighboring body's hash value without loading
86 // two hash values per thread
87 BT_GPU___shared__ uint sharedHash[257];
88 sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
89 if((index > 0) && (BT_GPU_threadIdx.x == 0))
91 // first thread in block must load neighbor body hash
92 volatile uint2 prevData = pHash[index-1];
93 sharedHash[0] = prevData.x;
95 BT_GPU___syncthreads();
96 if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
98 cellStart[sortedData.x] = index;
100 } // findCellStartD()
102 //----------------------------------------------------------------------------------------
104 BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
106 return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) &&
107 (min0.fy <= max1.fy)&& (min1.fy <= max0.fy) &&
108 (min0.fz <= max1.fz)&& (min1.fz <= max0.fz);
109 } // cudaTestAABBOverlap()
111 //----------------------------------------------------------------------------------------
113 BT_GPU___device__ void findPairsInCell( int3 gridPos,
119 uint2* pPairBuffStartCurr,
122 if ( (gridPos.x < 0) || (gridPos.x > (int)BT_GPU_params.m_gridSizeX - 1)
123 || (gridPos.y < 0) || (gridPos.y > (int)BT_GPU_params.m_gridSizeY - 1)
124 || (gridPos.z < 0) || (gridPos.z > (int)BT_GPU_params.m_gridSizeZ - 1))
128 uint gridHash = bt3DGrid_calcGridHash(gridPos);
129 // get start of bucket for this cell
130 uint bucketStart = pCellStart[gridHash];
131 if (bucketStart == 0xffffffff)
133 return; // cell empty
135 // iterate over bodies in this cell
136 uint2 sortedData = pHash[index];
137 uint unsorted_indx = sortedData.y;
138 bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
139 bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
140 uint handleIndex = min0.uw;
141 uint2 start_curr = pPairBuffStartCurr[handleIndex];
142 uint start = start_curr.x;
143 uint curr = start_curr.y;
144 uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
145 uint curr_max = start_curr_next.x - start - 1;
146 uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
147 bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
148 for(uint index2 = bucketStart; index2 < bucketEnd; index2++)
150 uint2 cellData = pHash[index2];
151 if (cellData.x != gridHash)
153 break; // no longer in same bucket
155 uint unsorted_indx2 = cellData.y;
156 if (unsorted_indx2 < unsorted_indx) // check not colliding with self
158 bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
159 bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
160 if(cudaTestAABBOverlap(min0, max0, min1, max1))
162 uint handleIndex2 = min1.uw;
164 for(k = 0; k < curr; k++)
166 uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
167 if(old_pair == handleIndex2)
169 pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
176 { // not a good solution, but let's avoid crash
179 pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
185 pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
187 } // findPairsInCell()
189 //----------------------------------------------------------------------------------------
191 BT_GPU___global__ void findOverlappingPairsD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart,
192 uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
194 int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
195 if(index >= (int)numBodies)
199 uint2 sortedData = pHash[index];
200 uint unsorted_indx = sortedData.y;
201 bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
202 bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
204 pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
205 pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
206 pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
207 // get address in grid
208 int3 gridPos = bt3DGrid_calcGridPos(pos);
209 // examine only neighbouring cells
210 for(int z=-1; z<=1; z++) {
211 for(int y=-1; y<=1; y++) {
212 for(int x=-1; x<=1; x++) {
213 findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
217 } // findOverlappingPairsD()
219 //----------------------------------------------------------------------------------------
221 BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff,
222 uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
224 int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
225 if(index >= (int)numBodies)
229 uint2 sortedData = pHash[index];
230 uint unsorted_indx = sortedData.y;
231 bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
232 bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
233 uint handleIndex = min0.uw;
234 uint2 start_curr = pPairBuffStartCurr[handleIndex];
235 uint start = start_curr.x;
236 uint curr = start_curr.y;
237 uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
238 uint curr_max = start_curr_next.x - start - 1;
239 for(uint i = 0; i < numLarge; i++)
241 uint indx2 = numBodies + i;
242 bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
243 bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
244 if(cudaTestAABBOverlap(min0, max0, min1, max1))
247 uint handleIndex2 = min1.uw;
248 for(k = 0; k < curr; k++)
250 uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
251 if(old_pair == handleIndex2)
253 pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
259 pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
261 { // not a good solution, but let's avoid crash
268 pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
270 } // findPairsLargeD()
272 //----------------------------------------------------------------------------------------
274 BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr,
275 uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
277 int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
278 if(index >= (int)numBodies)
282 bt3DGrid3F1U bbMin = pAABB[index * 2];
283 uint handleIndex = bbMin.uw;
284 uint2 start_curr = pPairBuffStartCurr[handleIndex];
285 uint start = start_curr.x;
286 uint curr = start_curr.y;
287 uint *pInp = pPairBuff + start;
288 uint num_changes = 0;
289 for(uint k = 0; k < curr; k++, pInp++)
291 if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
296 pPairScan[index+1] = num_changes;
297 } // computePairCacheChangesD()
299 //----------------------------------------------------------------------------------------
301 BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
302 uint* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
304 int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
305 if(index >= (int)numBodies)
309 bt3DGrid3F1U bbMin = pAABB[index * 2];
310 uint handleIndex = bbMin.uw;
311 uint2 start_curr = pPairBuffStartCurr[handleIndex];
312 uint start = start_curr.x;
313 uint curr = start_curr.y;
314 uint* pInp = pPairBuff + start;
315 uint* pOut = pPairOut + pPairScan[index];
318 for(uint k = 0; k < curr; k++, pInp++)
320 if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
325 if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
327 *pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
332 pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
333 } // squeezeOverlappingPairBuffD()
336 //----------------------------------------------------------------------------------------
337 //----------------------------------------------------------------------------------------
338 //----------------------------------------------------------------------------------------
339 //----------------------------------------------------------------------------------------
340 // E N D O F K E R N E L F U N C T I O N S
341 //----------------------------------------------------------------------------------------
342 //----------------------------------------------------------------------------------------
343 //----------------------------------------------------------------------------------------
344 //----------------------------------------------------------------------------------------
349 //----------------------------------------------------------------------------------------
351 void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies)
353 int numThreads, numBlocks;
354 BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
355 // execute the kernel
356 BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
357 // check if kernel invocation generated an error
358 BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
361 //----------------------------------------------------------------------------------------
363 void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
365 int numThreads, numBlocks;
366 BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
367 BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
368 BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
369 BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
372 //----------------------------------------------------------------------------------------
374 void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies))
377 BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
379 int numThreads, numBlocks;
380 BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
381 BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
382 BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
384 BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
386 } // findOverlappingPairs()
388 //----------------------------------------------------------------------------------------
390 void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
393 BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
395 int numThreads, numBlocks;
396 BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
397 BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
398 BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
400 BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
402 } // findPairsLarge()
404 //----------------------------------------------------------------------------------------
406 void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
408 int numThreads, numBlocks;
409 BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
410 BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
411 BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
412 } // computePairCacheChanges()
414 //----------------------------------------------------------------------------------------
416 void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
418 int numThreads, numBlocks;
419 BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
420 BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies));
421 BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
422 } // btCuda_squeezeOverlappingPairBuff()
424 //------------------------------------------------------------------------------------------------
428 //------------------------------------------------------------------------------------------------
429 //------------------------------------------------------------------------------------------------
430 //------------------------------------------------------------------------------------------------