1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Xiaopeng Fu, fuxiaopeng2222@163.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other oclMaterials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
46 #include "precomp.hpp"
47 #include "opencl_kernels.hpp"
50 using namespace cv::ocl;
52 static void generateRandomCenter(const vector<Vec2f>& box, float* center, RNG& rng)
54 size_t j, dims = box.size();
55 float margin = 1.f/dims;
56 for( j = 0; j < dims; j++ )
57 center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0];
60 // This class is copied from matrix.cpp in core module.
61 class KMeansPPDistanceComputer : public ParallelLoopBody
64 KMeansPPDistanceComputer( float *_tdist2,
77 void operator()( const cv::Range& range ) const
79 const int begin = range.start;
80 const int end = range.end;
82 for ( int i = begin; i<end; i++ )
84 tdist2[i] = std::min(normL2Sqr_(data + step*i, data + stepci, dims), dist[i]);
89 KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // to quiet MSVC
99 k-means center initialization using the following algorithm:
100 Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding
102 static void generateCentersPP(const Mat& _data, Mat& _out_centers,
103 int K, RNG& rng, int trials)
105 int i, j, k, dims = _data.cols, N = _data.rows;
106 const float* data = (float*)_data.data;
107 size_t step = _data.step/sizeof(data[0]);
108 vector<int> _centers(K);
109 int* centers = &_centers[0];
110 vector<float> _dist(N*3);
111 float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N;
114 centers[0] = (unsigned)rng % N;
116 for( i = 0; i < N; i++ )
118 dist[i] = normL2Sqr_(data + step*i, data + step*centers[0], dims);
122 for( k = 1; k < K; k++ )
124 double bestSum = DBL_MAX;
127 for( j = 0; j < trials; j++ )
129 double p = (double)rng*sum0, s = 0;
130 for( i = 0; i < N-1; i++ )
131 if( (p -= dist[i]) <= 0 )
135 parallel_for_(Range(0, N),
136 KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci));
137 for( i = 0; i < N; i++ )
146 std::swap(tdist, tdist2);
149 centers[k] = bestCenter;
151 std::swap(dist, tdist);
154 for( k = 0; k < K; k++ )
156 const float* src = data + step*centers[k];
157 float* dst = _out_centers.ptr<float>(k);
158 for( j = 0; j < dims; j++ )
163 void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers)
165 //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
167 // CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
171 Context *clCxt = src.clCxt;
172 int labels_step = (int)(labels.step/labels.elemSize());
173 string kernelname = "distanceToCenters";
174 int threadNum = src.rows > 256 ? 256 : src.rows;
175 size_t localThreads[3] = {1, threadNum, 1};
176 size_t globalThreads[3] = {1, src.rows, 1};
178 vector<pair<size_t, const void *> > args;
179 args.push_back(make_pair(sizeof(cl_int), (void *)&labels_step));
180 args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.rows));
181 args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
182 args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data));
183 args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.cols));
184 args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
185 args.push_back(make_pair(sizeof(cl_mem), (void *)¢ers.data));
186 args.push_back(make_pair(sizeof(cl_mem), (void*)&dists.data));
188 openCLExecuteKernel(clCxt, &kmeans_kernel, kernelname, globalThreads, localThreads, args, -1, -1, NULL);
190 ///////////////////////////////////k - means /////////////////////////////////////////////////////////
191 double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
192 TermCriteria criteria, int attempts, int flags, oclMat &_centers)
194 const int SPP_TRIALS = 3;
195 bool isrow = _src.rows == 1 && _src.oclchannels() > 1;
196 int N = !isrow ? _src.rows : _src.cols;
197 int dims = (!isrow ? _src.cols : 1) * _src.oclchannels();
198 int type = _src.depth();
200 attempts = std::max(attempts, 1);
201 CV_Assert(type == CV_32F && K > 0 );
205 if( flags & CV_KMEANS_USE_INITIAL_LABELS )
207 CV_Assert( (_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
208 _bestLabels.cols * _bestLabels.rows == N &&
209 _bestLabels.type() == CV_32S );
210 _bestLabels.download(_labels);
214 if( !((_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
215 _bestLabels.cols * _bestLabels.rows == N &&
216 _bestLabels.type() == CV_32S &&
217 _bestLabels.isContinuous()))
218 _bestLabels.create(N, 1, CV_32S);
219 _labels.create(_bestLabels.size(), _bestLabels.type());
221 int* labels = _labels.ptr<int>();
225 Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type);
226 vector<int> counters(K);
227 vector<Vec2f> _box(dims);
228 Vec2f* box = &_box[0];
229 double best_compactness = DBL_MAX, compactness = 0;
231 int a, iter, i, j, k;
233 if( criteria.type & TermCriteria::EPS )
234 criteria.epsilon = std::max(criteria.epsilon, 0.);
236 criteria.epsilon = FLT_EPSILON;
237 criteria.epsilon *= criteria.epsilon;
239 if( criteria.type & TermCriteria::COUNT )
240 criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100);
242 criteria.maxCount = 100;
247 criteria.maxCount = 2;
250 const float* sample = data.ptr<float>();
251 for( j = 0; j < dims; j++ )
252 box[j] = Vec2f(sample[j], sample[j]);
254 for( i = 1; i < N; i++ )
256 sample = data.ptr<float>(i);
257 for( j = 0; j < dims; j++ )
260 box[j][0] = std::min(box[j][0], v);
261 box[j][1] = std::max(box[j][1], v);
265 for( a = 0; a < attempts; a++ )
267 double max_center_shift = DBL_MAX;
270 swap(centers, old_centers);
272 if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) )
274 if( flags & KMEANS_PP_CENTERS )
275 generateCentersPP(data, centers, K, rng, SPP_TRIALS);
278 for( k = 0; k < K; k++ )
279 generateRandomCenter(_box, centers.ptr<float>(k), rng);
284 if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) )
286 for( i = 0; i < N; i++ )
287 CV_Assert( (unsigned)labels[i] < (unsigned)K );
292 for( k = 0; k < K; k++ )
295 for( i = 0; i < N; i++ )
297 sample = data.ptr<float>(i);
299 float* center = centers.ptr<float>(k);
301 #if CV_ENABLE_UNROLLED
302 for(; j <= dims - 4; j += 4 )
304 float t0 = center[j] + sample[j];
305 float t1 = center[j+1] + sample[j+1];
310 t0 = center[j+2] + sample[j+2];
311 t1 = center[j+3] + sample[j+3];
317 for( ; j < dims; j++ )
318 center[j] += sample[j];
323 max_center_shift = 0;
325 for( k = 0; k < K; k++ )
327 if( counters[k] != 0 )
330 // if some cluster appeared to be empty then:
331 // 1. find the biggest cluster
332 // 2. find the farthest from the center point in the biggest cluster
333 // 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster.
335 for( int k1 = 1; k1 < K; k1++ )
337 if( counters[max_k] < counters[k1] )
343 float* new_center = centers.ptr<float>(k);
344 float* old_center = centers.ptr<float>(max_k);
345 float* _old_center = temp.ptr<float>(); // normalized
346 float scale = 1.f/counters[max_k];
347 for( j = 0; j < dims; j++ )
348 _old_center[j] = old_center[j]*scale;
350 for( i = 0; i < N; i++ )
352 if( labels[i] != max_k )
354 sample = data.ptr<float>(i);
355 double dist = normL2Sqr_(sample, _old_center, dims);
357 if( max_dist <= dist )
366 labels[farthest_i] = k;
367 sample = data.ptr<float>(farthest_i);
369 for( j = 0; j < dims; j++ )
371 old_center[j] -= sample[j];
372 new_center[j] += sample[j];
376 for( k = 0; k < K; k++ )
378 float* center = centers.ptr<float>(k);
379 CV_Assert( counters[k] != 0 );
381 float scale = 1.f/counters[k];
382 for( j = 0; j < dims; j++ )
388 const float* old_center = old_centers.ptr<float>(k);
389 for( j = 0; j < dims; j++ )
391 double t = center[j] - old_center[j];
394 max_center_shift = std::max(max_center_shift, dist);
399 if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon )
403 oclMat _dists(1, N, CV_64F);
405 _bestLabels.upload(_labels);
406 _centers.upload(centers);
407 distanceToCenters(_dists, _bestLabels, _src, _centers);
410 _dists.download(dists);
411 _bestLabels.download(_labels);
413 double* dist = dists.ptr<double>(0);
415 for( i = 0; i < N; i++ )
417 compactness += dist[i];
421 if( compactness < best_compactness )
423 best_compactness = compactness;
427 return best_compactness;