2 #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n
\r
3 #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n
\r
6 ComputeBoundsKernel(
\r
8 const int numSoftBodies,
\r
9 __global int * g_vertexClothIdentifier,
\r
10 __global float4 * g_vertexPositions,
\r
11 /* Unfortunately, to get the atomics below to work these arrays cannot be */
\r
12 /* uint4, though that is the layout of the data */
\r
13 /* Therefore this is little-endian-only code */
\r
14 volatile __global uint * g_clothMinBounds,
\r
15 volatile __global uint * g_clothMaxBounds,
\r
16 volatile __local uint * clothMinBounds,
\r
17 volatile __local uint * clothMaxBounds)
\r
19 // Init min and max bounds arrays
\r
20 if( get_local_id(0) < numSoftBodies )
\r
23 clothMinBounds[get_local_id(0)*4] = UINT_MAX;
\r
24 clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
\r
25 clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
\r
26 clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
\r
27 clothMaxBounds[get_local_id(0)*4] = 0;
\r
28 clothMaxBounds[get_local_id(0)*4+1] = 0;
\r
29 clothMaxBounds[get_local_id(0)*4+2] = 0;
\r
30 clothMaxBounds[get_local_id(0)*4+3] = 0;
\r
34 barrier(CLK_LOCAL_MEM_FENCE);
\r
36 int nodeID = get_global_id(0);
\r
37 if( nodeID < numNodes )
\r
39 int clothIdentifier = g_vertexClothIdentifier[nodeID];
\r
40 if( clothIdentifier >= 0 )
\r
43 float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
\r
45 /* Reinterpret position as uint */
\r
46 uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);
\r
48 /* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
\r
49 positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
\r
50 positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
\r
51 positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
\r
53 // Min/max with the LDS values
\r
54 atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
\r
55 atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
\r
56 atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
\r
58 atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
\r
59 atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
\r
60 atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
\r
64 barrier(CLK_LOCAL_MEM_FENCE);
\r
67 /* Use global atomics to update the global versions of the data */
\r
68 if( get_local_id(0) < numSoftBodies )
\r
70 /*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
\r
71 atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
\r
72 atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
\r
73 atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
\r
75 atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
\r
76 atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
\r
77 atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
\r