1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Jin Ma, jin@multicorewareinc.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other materials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
48 #pragma OPENCL EXTENSION cl_amd_fp64:enable
49 #elif defined (cl_khr_fp64)
50 #pragma OPENCL EXTENSION cl_khr_fp64:enable
57 #define CV_SWAP(a,b,t) ((t) = (a), (a) = (b), (b) = (t))
58 ///////////////////////////////////// find_nearest //////////////////////////////////////
59 __kernel void knn_find_nearest(__global float* sample, int sample_row, int sample_col, int sample_step,
60 int k, __global float* samples_ocl, int sample_ocl_row, int sample_ocl_step,
61 __global float* _results, int _results_step, int _regression, int K1,
62 int sample_ocl_col, int nThreads, __local float* nr)
67 bool regression = false;
79 int y = get_global_id(1);
81 int threadY = (y % nThreads);
82 __local float* dd = nr + nThreads * k;
87 for(j = 0; j < sample_ocl_row; j++)
97 for(t = 0; t < sample_col - 16; t += 16)
99 float16 t0 = vload16(0, sample + y * sample_step + t) - vload16(0, samples_ocl + j * sample_ocl_step + t);
101 sum += t0.s0 + t0.s1 + t0.s2 + t0.s3 + t0.s4 + t0.s5 + t0.s6 + t0.s7 +
102 t0.s8 + t0.s9 + t0.sa + t0.sb + t0.sc + t0.sd + t0.se + t0.sf;
105 for(; t < sample_col; t++)
107 #ifdef DOUBLE_SUPPORT
108 double t0 = sample[y * sample_step + t] - samples_ocl[j * sample_ocl_step + t];
110 float t0 = sample[y * sample_step + t] - samples_ocl[j * sample_ocl_step + t];
116 for(ii = k1 - 1; ii >= 0; ii--)
118 if(as_int(si) > as_int(dd[ii * nThreads + threadY]))
123 for(ii1 = k2 - 1; ii1 > ii; ii1--)
125 dd[(ii1 + 1) * nThreads + threadY] = dd[ii1 * nThreads + threadY];
126 nr[(ii1 + 1) * nThreads + threadY] = nr[ii1 * nThreads + threadY];
129 dd[(ii + 1) * nThreads + threadY] = si;
130 nr[(ii + 1) * nThreads + threadY] = samples_ocl[sample_col + j * sample_ocl_step];
132 k1 = (k1 + 1) < k ? (k1 + 1) : k;
133 k2 = k1 < (k - 1) ? k1 : (k - 1);
135 /*! find_nearest_neighbor done!*/
136 /*! write_results start!*/
140 #ifdef DOUBLE_SUPPORT
145 for(j = 0; j < K1; j++)
146 s += nr[j * nThreads + threadY];
148 _results[y * _results_step] = (float)(s * inv_scale);
152 int prev_start = 0, best_count = 0, cur_count;
155 for(j = K1 - 1; j > 0; j--)
157 bool swap_f1 = false;
158 for(j1 = 0; j1 < j; j1++)
160 if(nr[j1 * nThreads + threadY] > nr[(j1 + 1) * nThreads + threadY])
163 CV_SWAP(nr[j1 * nThreads + threadY], nr[(j1 + 1) * nThreads + threadY], t);
172 for(j = 1; j <= K1; j++)
173 if(j == K1 || nr[j * nThreads + threadY] != nr[(j - 1) * nThreads + threadY])
175 cur_count = j - prev_start;
176 if(best_count < cur_count)
178 best_count = cur_count;
179 best_val = nr[(j - 1) * nThreads + threadY];
183 _results[y * _results_step] = best_val;
185 ///*! write_results done!*/