4 int getPosHash(int4 gridPos, __global float4* pParams)
6 int4 gridDim = *((__global int4*)(pParams + 1));
7 gridPos.x &= gridDim.x - 1;
8 gridPos.y &= gridDim.y - 1;
9 gridPos.z &= gridDim.z - 1;
10 int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;
14 int4 getGridPos(float4 worldPos, __global float4* pParams)
17 int4 gridDim = *((__global int4*)(pParams + 1));
18 gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);
19 gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);
20 gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);
25 // calculate grid hash value for each body using its AABB
26 __kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams GUID_ARG)
28 int index = get_global_id(0);
29 if(index >= numObjects)
33 float4 bbMin = pAABB[index*2];
34 float4 bbMax = pAABB[index*2 + 1];
36 pos.x = (bbMin.x + bbMax.x) * 0.5f;
37 pos.y = (bbMin.y + bbMax.y) * 0.5f;
38 pos.z = (bbMin.z + bbMax.z) * 0.5f;
40 // get address in grid
41 int4 gridPos = getGridPos(pos, pParams);
42 int gridHash = getPosHash(gridPos, pParams);
43 // store grid hash and body index
47 pHash[index] = hashVal;
50 __kernel void kClearCellStart( int numCells,
51 __global int* pCellStart GUID_ARG)
53 int index = get_global_id(0);
58 pCellStart[index] = -1;
61 __kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart GUID_ARG)
63 __local int sharedHash[513];
64 int index = get_global_id(0);
66 if(index < numObjects)
68 sortedData = pHash[index];
69 // Load hash data into shared memory so that we can look
70 // at neighboring body's hash value without loading
71 // two hash values per thread
72 sharedHash[get_local_id(0) + 1] = sortedData.x;
73 if((index > 0) && (get_local_id(0) == 0))
75 // first thread in block must load neighbor body hash
76 sharedHash[0] = pHash[index-1].x;
79 barrier(CLK_LOCAL_MEM_FENCE);
80 if(index < numObjects)
82 if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))
84 cellStart[sortedData.x] = index;
89 int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
91 return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
92 (min0.y <= max1.y)&& (min1.y <= max0.y) &&
93 (min0.z <= max1.z)&& (min1.z <= max0.z);
100 void findPairsInCell( int numObjects,
103 __global int2* pHash,
104 __global int* pCellStart,
105 __global float4* pAABB,
106 __global int* pPairBuff,
107 __global int2* pPairBuffStartCurr,
108 __global float4* pParams)
110 int4 pGridDim = *((__global int4*)(pParams + 1));
111 int maxBodiesPerCell = pGridDim.w;
112 int gridHash = getPosHash(gridPos, pParams);
113 // get start of bucket for this cell
114 int bucketStart = pCellStart[gridHash];
115 if (bucketStart == -1)
117 return; // cell empty
119 // iterate over bodies in this cell
120 int2 sortedData = pHash[index];
121 int unsorted_indx = sortedData.y;
122 float4 min0 = pAABB[unsorted_indx*2 + 0];
123 float4 max0 = pAABB[unsorted_indx*2 + 1];
124 int handleIndex = as_int(min0.w);
125 int2 start_curr = pPairBuffStartCurr[handleIndex];
126 int start = start_curr.x;
127 int curr = start_curr.y;
128 int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
129 int curr_max = start_curr_next.x - start - 1;
130 int bucketEnd = bucketStart + maxBodiesPerCell;
131 bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;
132 for(int index2 = bucketStart; index2 < bucketEnd; index2++)
134 int2 cellData = pHash[index2];
135 if (cellData.x != gridHash)
137 break; // no longer in same bucket
139 int unsorted_indx2 = cellData.y;
140 if (unsorted_indx2 < unsorted_indx) // check not colliding with self
142 float4 min1 = pAABB[unsorted_indx2*2 + 0];
143 float4 max1 = pAABB[unsorted_indx2*2 + 1];
144 if(testAABBOverlap(min0, max0, min1, max1))
146 int handleIndex2 = as_int(min1.w);
148 for(k = 0; k < curr; k++)
150 int old_pair = pPairBuff[start+k] & (~0x60000000);
151 if(old_pair == handleIndex2)
153 pPairBuff[start+k] |= 0x40000000;
160 { // not a good solution, but let's avoid crash
163 pPairBuff[start+curr] = handleIndex2 | 0x20000000;
170 newStartCurr.x = start;
171 newStartCurr.y = curr;
172 pPairBuffStartCurr[handleIndex] = newStartCurr;
176 __kernel void kFindOverlappingPairs( int numObjects,
177 __global float4* pAABB,
178 __global int2* pHash,
179 __global int* pCellStart,
180 __global int* pPairBuff,
181 __global int2* pPairBuffStartCurr,
182 __global float4* pParams GUID_ARG)
185 int index = get_global_id(0);
186 if(index >= numObjects)
190 int2 sortedData = pHash[index];
191 int unsorted_indx = sortedData.y;
192 float4 bbMin = pAABB[unsorted_indx*2 + 0];
193 float4 bbMax = pAABB[unsorted_indx*2 + 1];
195 pos.x = (bbMin.x + bbMax.x) * 0.5f;
196 pos.y = (bbMin.y + bbMax.y) * 0.5f;
197 pos.z = (bbMin.z + bbMax.z) * 0.5f;
198 // get address in grid
199 int4 gridPosA = getGridPos(pos, pParams);
201 // examine only neighbouring cells
202 for(int z=-1; z<=1; z++)
204 gridPosB.z = gridPosA.z + z;
205 for(int y=-1; y<=1; y++)
207 gridPosB.y = gridPosA.y + y;
208 for(int x=-1; x<=1; x++)
210 gridPosB.x = gridPosA.x + x;
211 findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, pParams);
218 __kernel void kFindPairsLarge( int numObjects,
219 __global float4* pAABB,
220 __global int2* pHash,
221 __global int* pCellStart,
222 __global int* pPairBuff,
223 __global int2* pPairBuffStartCurr,
224 uint numLarge GUID_ARG)
226 int index = get_global_id(0);
227 if(index >= numObjects)
231 int2 sortedData = pHash[index];
232 int unsorted_indx = sortedData.y;
233 float4 min0 = pAABB[unsorted_indx*2 + 0];
234 float4 max0 = pAABB[unsorted_indx*2 + 1];
235 int handleIndex = as_int(min0.w);
236 int2 start_curr = pPairBuffStartCurr[handleIndex];
237 int start = start_curr.x;
238 int curr = start_curr.y;
239 int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
240 int curr_max = start_curr_next.x - start - 1;
241 for(uint i = 0; i < numLarge; i++)
243 int indx2 = numObjects + i;
244 float4 min1 = pAABB[indx2*2 + 0];
245 float4 max1 = pAABB[indx2*2 + 1];
246 if(testAABBOverlap(min0, max0, min1, max1))
249 int handleIndex2 = as_int(min1.w);
250 for(k = 0; k < curr; k++)
252 int old_pair = pPairBuff[start+k] & (~0x60000000);
253 if(old_pair == handleIndex2)
255 pPairBuff[start+k] |= 0x40000000;
261 pPairBuff[start+curr] = handleIndex2 | 0x20000000;
263 { // not a good solution, but let's avoid crash
271 newStartCurr.x = start;
272 newStartCurr.y = curr;
273 pPairBuffStartCurr[handleIndex] = newStartCurr;
277 __kernel void kComputePairCacheChanges( int numObjects,
278 __global int* pPairBuff,
279 __global int2* pPairBuffStartCurr,
280 __global int* pPairScan,
281 __global float4* pAABB GUID_ARG)
283 int index = get_global_id(0);
284 if(index >= numObjects)
288 float4 bbMin = pAABB[index * 2];
289 int handleIndex = as_int(bbMin.w);
290 int2 start_curr = pPairBuffStartCurr[handleIndex];
291 int start = start_curr.x;
292 int curr = start_curr.y;
293 __global int *pInp = pPairBuff + start;
295 for(int k = 0; k < curr; k++, pInp++)
297 if(!((*pInp) & 0x40000000))
302 pPairScan[index+1] = num_changes;
305 __kernel void kSqueezeOverlappingPairBuff( int numObjects,
306 __global int* pPairBuff,
307 __global int2* pPairBuffStartCurr,
308 __global int* pPairScan,
309 __global int* pPairOut,
310 __global float4* pAABB GUID_ARG)
312 int index = get_global_id(0);
313 if(index >= numObjects)
317 float4 bbMin = pAABB[index * 2];
318 int handleIndex = as_int(bbMin.w);
319 int2 start_curr = pPairBuffStartCurr[handleIndex];
320 int start = start_curr.x;
321 int curr = start_curr.y;
322 __global int* pInp = pPairBuff + start;
323 __global int* pOut = pPairOut + pPairScan[index+1];
324 __global int* pOut2 = pInp;
326 for(int k = 0; k < curr; k++, pInp++)
328 if(!((*pInp) & 0x40000000))
333 if((*pInp) & 0x60000000)
335 *pOut2 = (*pInp) & (~0x60000000);
341 newStartCurr.x = start;
342 newStartCurr.y = num;
343 pPairBuffStartCurr[handleIndex] = newStartCurr;