Initialize libbullet git in 2.0_beta.
[platform/upstream/libbullet.git] / Extras / RigidBodyGpuPipeline / opencl / primitives / AdlPrimitives / Sort / RadixSortSimpleCL.h
1 static const char* radixSortSimpleKernelsCL = \
2         "#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
3         "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
4         "\n"
5         "typedef unsigned int u32;\n"
6         "#define GET_GROUP_IDX get_group_id(0)\n"
7         "#define GET_LOCAL_IDX get_local_id(0)\n"
8         "#define GET_GLOBAL_IDX get_global_id(0)\n"
9         "#define GET_GROUP_SIZE get_local_size(0)\n"
10         "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
11         "#define AtomInc(x) atom_inc(&(x))\n"
12         "#define AtomInc1(x, out) out = atom_inc(&(x))\n"
13         "\n"
14         "\n"
15         "#define WG_SIZE 128\n"
16         "#define NUM_PER_WI 4\n"
17         "\n"
18         "\n"
19         "typedef struct\n"
20         "{\n"
21         "       u32 m_key;\n"
22         "       u32 m_value;\n"
23         "}SortData;\n"
24         "\n"
25         "\n"
26         "typedef struct\n"
27         "{\n"
28         "       u32 m_startBit;\n"
29         "       u32 m_numGroups;\n"
30         "       u32 m_padding[2];\n"
31         "} ConstBuffer;\n"
32         "\n"
33         "\n"
34         "__kernel\n"
35         "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
36         "void LocalCountKernel(__global SortData* sortData,\n"
37         "                                               __global u32* ldsHistogramOut,\n"
38         "                                               ConstBuffer cb)\n"
39         "{\n"
40         "       __local u32 ldsHistogram[16][256];\n"
41         "\n"
42         "       int lIdx = GET_LOCAL_IDX;\n"
43         "       int gIdx = GET_GLOBAL_IDX;\n"
44         "\n"
45         "       for(int i=0; i<16; i++)\n"
46         "       {\n"
47         "               ldsHistogram[i][lIdx] = 0.f;\n"
48         "               ldsHistogram[i][lIdx+128] = 0.f;\n"
49         "       }\n"
50         "\n"
51         "       GROUP_LDS_BARRIER;\n"
52         "\n"
53         "       SortData datas[NUM_PER_WI];\n"
54         "       datas[0] = sortData[gIdx*NUM_PER_WI+0];\n"
55         "       datas[1] = sortData[gIdx*NUM_PER_WI+1];\n"
56         "       datas[2] = sortData[gIdx*NUM_PER_WI+2];\n"
57         "       datas[3] = sortData[gIdx*NUM_PER_WI+3];\n"
58         "\n"
59         "       datas[0].m_key = (datas[0].m_key >> cb.m_startBit) & 0xff;\n"
60         "       datas[1].m_key = (datas[1].m_key >> cb.m_startBit) & 0xff;\n"
61         "       datas[2].m_key = (datas[2].m_key >> cb.m_startBit) & 0xff;\n"
62         "       datas[3].m_key = (datas[3].m_key >> cb.m_startBit) & 0xff;\n"
63         "\n"
64         "       int tableIdx = lIdx%16;\n"
65         "\n"
66         "       AtomInc(ldsHistogram[tableIdx][datas[0].m_key]);\n"
67         "       AtomInc(ldsHistogram[tableIdx][datas[1].m_key]);\n"
68         "       AtomInc(ldsHistogram[tableIdx][datas[2].m_key]);\n"
69         "       AtomInc(ldsHistogram[tableIdx][datas[3].m_key]);\n"
70         "\n"
71         "       GROUP_LDS_BARRIER;\n"
72         "\n"
73         "       u32 sum0, sum1;\n"
74         "       sum0 = sum1 = 0;\n"
75         "       for(int i=0; i<16; i++)\n"
76         "       {\n"
77         "               sum0 += ldsHistogram[i][lIdx];\n"
78         "               sum1 += ldsHistogram[i][lIdx+128];\n"
79         "       }\n"
80         "\n"
81         "       ldsHistogramOut[lIdx*cb.m_numGroups+GET_GROUP_IDX] = sum0;\n"
82         "       ldsHistogramOut[(lIdx+128)*cb.m_numGroups+GET_GROUP_IDX] = sum1;\n"
83         "}\n"
84         "\n"
85         "__kernel\n"
86         "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
87         "void ScatterKernel(__global SortData* sortData,\n"
88         "                                       __global SortData* sortDataOut,\n"
89         "                                       __global u32* scannedHistogram,\n"
90         "                                       ConstBuffer cb)\n"
91         "{\n"
92         "       __local u32 ldsCurrentLocation[256];\n"
93         "\n"
94         "       int lIdx = GET_LOCAL_IDX;\n"
95         "       int gIdx = GET_GLOBAL_IDX;\n"
96         "\n"
97         "       {\n"
98         "               ldsCurrentLocation[lIdx] = scannedHistogram[lIdx*cb.m_numGroups+GET_GROUP_IDX];\n"
99         "               ldsCurrentLocation[lIdx+128] = scannedHistogram[(lIdx+128)*cb.m_numGroups+GET_GROUP_IDX];\n"
100         "       }\n"
101         "\n"
102         "       GROUP_LDS_BARRIER;\n"
103         "\n"
104         "       SortData datas[NUM_PER_WI];\n"
105         "       int keys[NUM_PER_WI];\n"
106         "       datas[0] = sortData[gIdx*NUM_PER_WI+0];\n"
107         "       datas[1] = sortData[gIdx*NUM_PER_WI+1];\n"
108         "       datas[2] = sortData[gIdx*NUM_PER_WI+2];\n"
109         "       datas[3] = sortData[gIdx*NUM_PER_WI+3];\n"
110         "\n"
111         "       keys[0] = (datas[0].m_key >> cb.m_startBit) & 0xff;\n"
112         "       keys[1] = (datas[1].m_key >> cb.m_startBit) & 0xff;\n"
113         "       keys[2] = (datas[2].m_key >> cb.m_startBit) & 0xff;\n"
114         "       keys[3] = (datas[3].m_key >> cb.m_startBit) & 0xff;\n"
115         "\n"
116         "       int dst[NUM_PER_WI];\n"
117         "       for(int i=0; i<WG_SIZE; i++)\n"
118         "       {\n"
119         "               if( i==lIdx )\n"
120         "               {\n"
121         "                       AtomInc1(ldsCurrentLocation[keys[0]], dst[0]);\n"
122         "                       AtomInc1(ldsCurrentLocation[keys[1]], dst[1]);\n"
123         "                       AtomInc1(ldsCurrentLocation[keys[2]], dst[2]);\n"
124         "                       AtomInc1(ldsCurrentLocation[keys[3]], dst[3]);\n"
125         "               }\n"
126         "               GROUP_LDS_BARRIER;\n"
127         "       }\n"
128         "       sortDataOut[dst[0]] = datas[0];\n"
129         "       sortDataOut[dst[1]] = datas[1];\n"
130         "       sortDataOut[dst[2]] = datas[2];\n"
131         "       sortDataOut[dst[3]] = datas[3];\n"
132         "}\n"
133         "\n"
134         "";