1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Peng Xiao, pengxiao@outlook.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other oclMaterials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
50 __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
52 inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
54 return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
57 inline float ELEM_FLT2(image2d_t _eig, float2 pt)
59 return read_imagef(_eig, sampler, pt).x;
66 __global const char * mask,
67 __global float2 * corners,
68 const int mask_strip,// in pixels
69 const float threshold,
73 __global int * g_counter
76 const int j = get_global_id(0);
77 const int i = get_global_id(1);
79 if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1
81 && mask[i * mask_strip + j] != 0
85 const float val = ELEM_INT2(eig, j, i);
91 maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
92 maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal);
93 maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
95 maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
96 maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
98 maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
99 maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal);
100 maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
104 const int ind = atomic_inc(g_counter);
107 corners[ind] = (float2)(j, i);
115 void sortCorners_bitonicSort
118 __global float2 * corners,
121 const int passOfStage
124 const int threadId = get_global_id(0);
125 if(threadId >= count / 2)
130 const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent
132 const int pairDistance = 1 << (stage - passOfStage);
133 const int blockWidth = 2 * pairDistance;
135 const int leftId = min( (threadId % pairDistance)
136 + (threadId / pairDistance) * blockWidth, count );
138 const int rightId = min( leftId + pairDistance, count );
140 const float2 leftPt = corners[leftId];
141 const float2 rightPt = corners[rightId];
143 const float leftVal = ELEM_FLT2(eig, leftPt);
144 const float rightVal = ELEM_FLT2(eig, rightPt);
146 const bool compareResult = leftVal > rightVal;
148 float2 greater = compareResult ? leftPt:rightPt;
149 float2 lesser = compareResult ? rightPt:leftPt;
151 corners[leftId] = sortOrder ? lesser : greater;
152 corners[rightId] = sortOrder ? greater : lesser;
155 //selection sort for gfft
156 //kernel is ported from Bolt library:
157 //https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
158 // Local sort will firstly sort elements of each workgroup using selection sort
159 // its performance is O(n)
161 void sortCorners_selectionSortLocal
164 __global float2 * corners,
166 __local float2 * scratch
169 int i = get_local_id(0); // index in workgroup
170 int numOfGroups = get_num_groups(0); // index in workgroup
171 int groupID = get_group_id(0);
172 int wg = get_local_size(0); // workgroup size = block size
173 int n; // number of elements to be processed for this work group
175 int offset = groupID * wg;
178 n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
181 pt1 = corners[min(i, n)];
183 barrier(CLK_LOCAL_MEM_FENCE);
190 float val1 = ELEM_FLT2(eig, pt1);
194 for (int j=0;j<n;++j)
197 val2 = ELEM_FLT2(eig, pt2);
199 pos++;//calculate the rank of this element in this work group
206 // val1 and val2 are same
211 for (int j=0; j< same; j++)
212 corners[pos + j] = pt1;
215 void sortCorners_selectionSortFinal
218 __global float2 * corners,
222 const int i = get_local_id(0); // index in workgroup
223 const int numOfGroups = get_num_groups(0); // index in workgroup
224 const int groupID = get_group_id(0);
225 const int wg = get_local_size(0); // workgroup size = block size
226 int pos = 0, same = 0;
227 const int offset = get_group_id(0) * wg;
228 const int remainder = count - wg*(numOfGroups-1);
230 if((offset + i ) >= count)
233 pt1 = corners[groupID*wg + i];
235 float val1 = ELEM_FLT2(eig, pt1);
238 for(int j=0; j<numOfGroups-1; j++ )
240 for(int k=0; k<wg; k++)
242 pt2 = corners[j*wg + k];
243 val2 = ELEM_FLT2(eig, pt2);
248 //Increment only if the value is not the same.
257 for(int k=0; k<remainder; k++)
259 pt2 = corners[(numOfGroups-1)*wg + k];
260 val2 = ELEM_FLT2(eig, pt2);
265 //Don't increment if the value is the same.
266 //Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
273 for (int j=0; j< same; j++)
274 corners[pos + j] = pt1;