Imported Upstream version 2.81
[platform/upstream/libbullet.git] / src / BulletMultiThreaded / GpuSoftBodySolvers / OpenCL / OpenCLC10 / ComputeBounds.cl
1 MSTRINGIFY(\r
2 #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n\r
3 #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n\r
4 \r
5 __kernel void\r
6 ComputeBoundsKernel( \r
7         const int numNodes,\r
8         const int numSoftBodies,\r
9         __global int * g_vertexClothIdentifier,\r
10         __global float4 * g_vertexPositions,\r
11         /* Unfortunately, to get the atomics below to work these arrays cannot be */\r
12         /* uint4, though that is the layout of the data */\r
13         /* Therefore this is little-endian-only code */\r
14         volatile __global uint * g_clothMinBounds,\r
15         volatile __global uint * g_clothMaxBounds,\r
16         volatile __local uint * clothMinBounds,\r
17         volatile __local uint * clothMaxBounds)\r
18 {\r
19         // Init min and max bounds arrays\r
20         if( get_local_id(0) < numSoftBodies )\r
21         {\r
22                 \r
23                 clothMinBounds[get_local_id(0)*4] = UINT_MAX;\r
24                 clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;\r
25                 clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;\r
26                 clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;\r
27                 clothMaxBounds[get_local_id(0)*4] = 0;\r
28                 clothMaxBounds[get_local_id(0)*4+1] = 0;\r
29                 clothMaxBounds[get_local_id(0)*4+2] = 0;\r
30                 clothMaxBounds[get_local_id(0)*4+3] = 0;\r
31 \r
32         }\r
33 \r
34         barrier(CLK_LOCAL_MEM_FENCE);\r
35 \r
36         int nodeID = get_global_id(0);\r
37         if( nodeID < numNodes )\r
38         {       \r
39                 int clothIdentifier = g_vertexClothIdentifier[nodeID];\r
40                 if( clothIdentifier >= 0 )\r
41                 {\r
42 \r
43                         float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);\r
44 \r
45                         /* Reinterpret position as uint */\r
46                         uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);\r
47                 \r
48                         /* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */\r
49                         positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);\r
50                         positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);             \r
51                         positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);\r
52                 \r
53                         // Min/max with the LDS values\r
54                         atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);\r
55                         atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);\r
56                         atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);\r
57 \r
58                         atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);\r
59                         atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);\r
60                         atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);\r
61                 }\r
62         }\r
63         \r
64         barrier(CLK_LOCAL_MEM_FENCE);\r
65 \r
66 \r
67         /* Use global atomics to update the global versions of the data */\r
68         if( get_local_id(0) < numSoftBodies )\r
69         {\r
70                 /*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/\r
71                 atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);\r
72                 atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);\r
73                 atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);\r
74 \r
75                 atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);            \r
76                 atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);\r
77                 atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);\r
78         }\r
79 }\r
80 \r
81 \r
82 );\r