modules/ocl/src/kmeans.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // @Authors
  18 //     Xiaopeng Fu, fuxiaopeng2222@163.com
  19 //
  20 // Redistribution and use in source and binary forms, with or without modification,
  21 // are permitted provided that the following conditions are met:
  22 //
  23 //   * Redistribution's of source code must retain the above copyright notice,
  24 //     this list of conditions and the following disclaimer.
  25 //
  26 //   * Redistribution's in binary form must reproduce the above copyright notice,
  27 //     this list of conditions and the following disclaimer in the documentation
  28 //     and/or other oclMaterials provided with the distribution.
  29 //
  30 //   * The name of the copyright holders may not be used to endorse or promote products
  31 //     derived from this software without specific prior written permission.
  32 //
  33 // This software is provided by the copyright holders and contributors as is and
  34 // any express or implied warranties, including, but not limited to, the implied
  35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  36 // In no event shall the Intel Corporation or contributors be liable for any direct,
  37 // indirect, incidental, special, exemplary, or consequential damages
  38 // (including, but not limited to, procurement of substitute goods or services;
  39 // loss of use, data, or profits; or business interruption) however caused
  40 // and on any theory of liability, whether in contract, strict liability,
  41 // or tort (including negligence or otherwise) arising in any way out of
  42 // the use of this software, even if advised of the possibility of such damage.
  43 //
  44 //M*/
  45
  46 #include "precomp.hpp"
  47 #include "opencl_kernels.hpp"
  48
  49 using namespace cv;
  50 using namespace cv::ocl;
  51
  52 static void generateRandomCenter(const vector<Vec2f>& box, float* center, RNG& rng)
  53 {
  54     size_t j, dims = box.size();
  55     float margin = 1.f/dims;
  56     for( j = 0; j < dims; j++ )
  57         center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0];
  58 }
  59
  60 // This class is copied from matrix.cpp in core module.
  61 class KMeansPPDistanceComputer : public ParallelLoopBody
  62 {
  63 public:
  64     KMeansPPDistanceComputer( float *_tdist2,
  65                               const float *_data,
  66                               const float *_dist,
  67                               int _dims,
  68                               size_t _step,
  69                               size_t _stepci )
  70         : tdist2(_tdist2),
  71           data(_data),
  72           dist(_dist),
  73           dims(_dims),
  74           step(_step),
  75           stepci(_stepci) { }
  76
  77     void operator()( const cv::Range& range ) const
  78     {
  79         const int begin = range.start;
  80         const int end = range.end;
  81
  82         for ( int i = begin; i<end; i++ )
  83         {
  84             tdist2[i] = std::min(normL2Sqr_(data + step*i, data + stepci, dims), dist[i]);
  85         }
  86     }
  87
  88 private:
  89     KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // to quiet MSVC
  90
  91     float *tdist2;
  92     const float *data;
  93     const float *dist;
  94     const int dims;
  95     const size_t step;
  96     const size_t stepci;
  97 };
  98 /*
  99 k-means center initialization using the following algorithm:
 100 Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding
 101 */
 102 static void generateCentersPP(const Mat& _data, Mat& _out_centers,
 103                               int K, RNG& rng, int trials)
 104 {
 105     int i, j, k, dims = _data.cols, N = _data.rows;
 106     const float* data = (float*)_data.data;
 107     size_t step = _data.step/sizeof(data[0]);
 108     vector<int> _centers(K);
 109     int* centers = &_centers[0];
 110     vector<float> _dist(N*3);
 111     float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N;
 112     double sum0 = 0;
 113
 114     centers[0] = (unsigned)rng % N;
 115
 116     for( i = 0; i < N; i++ )
 117     {
 118         dist[i] = normL2Sqr_(data + step*i, data + step*centers[0], dims);
 119         sum0 += dist[i];
 120     }
 121
 122     for( k = 1; k < K; k++ )
 123     {
 124         double bestSum = DBL_MAX;
 125         int bestCenter = -1;
 126
 127         for( j = 0; j < trials; j++ )
 128         {
 129             double p = (double)rng*sum0, s = 0;
 130             for( i = 0; i < N-1; i++ )
 131                 if( (p -= dist[i]) <= 0 )
 132                     break;
 133             int ci = i;
 134
 135             parallel_for_(Range(0, N),
 136                           KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci));
 137             for( i = 0; i < N; i++ )
 138             {
 139                 s += tdist2[i];
 140             }
 141
 142             if( s < bestSum )
 143             {
 144                 bestSum = s;
 145                 bestCenter = ci;
 146                 std::swap(tdist, tdist2);
 147             }
 148         }
 149         centers[k] = bestCenter;
 150         sum0 = bestSum;
 151         std::swap(dist, tdist);
 152     }
 153
 154     for( k = 0; k < K; k++ )
 155     {
 156         const float* src = data + step*centers[k];
 157         float* dst = _out_centers.ptr<float>(k);
 158         for( j = 0; j < dims; j++ )
 159             dst[j] = src[j];
 160     }
 161 }
 162
 163 void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers)
 164 {
 165     //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
 166     //{
 167     //    CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
 168     //    return;
 169     //}
 170
 171     Context  *clCxt = src.clCxt;
 172     int labels_step = (int)(labels.step/labels.elemSize());
 173     string kernelname = "distanceToCenters";
 174     int threadNum = src.rows > 256 ? 256 : src.rows;
 175     size_t localThreads[3]  = {1, threadNum, 1};
 176     size_t globalThreads[3] = {1, src.rows, 1};
 177
 178     vector<pair<size_t, const void *> > args;
 179     args.push_back(make_pair(sizeof(cl_int), (void *)&labels_step));
 180     args.push_back(make_pair(sizeof(cl_int), (void *)&centers.rows));
 181     args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
 182     args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data));
 183     args.push_back(make_pair(sizeof(cl_int), (void *)&centers.cols));
 184     args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
 185     args.push_back(make_pair(sizeof(cl_mem), (void *)&centers.data));
 186     args.push_back(make_pair(sizeof(cl_mem), (void*)&dists.data));
 187
 188     openCLExecuteKernel(clCxt, &kmeans_kernel, kernelname, globalThreads, localThreads, args, -1, -1, NULL);
 189 }
 190 ///////////////////////////////////k - means /////////////////////////////////////////////////////////
 191 double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
 192                        TermCriteria criteria, int attempts, int flags, oclMat &_centers)
 193 {
 194     const int SPP_TRIALS = 3;
 195     bool isrow = _src.rows == 1 && _src.oclchannels() > 1;
 196     int N = !isrow ? _src.rows : _src.cols;
 197     int dims = (!isrow ? _src.cols : 1) * _src.oclchannels();
 198     int type = _src.depth();
 199
 200     attempts = std::max(attempts, 1);
 201     CV_Assert(type == CV_32F && K > 0 );
 202     CV_Assert( N >= K );
 203
 204     Mat _labels;
 205     if( flags & CV_KMEANS_USE_INITIAL_LABELS )
 206     {
 207         CV_Assert( (_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
 208                    _bestLabels.cols * _bestLabels.rows == N &&
 209                    _bestLabels.type() == CV_32S );
 210         _bestLabels.download(_labels);
 211     }
 212     else
 213     {
 214         if( !((_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
 215                 _bestLabels.cols * _bestLabels.rows == N &&
 216                 _bestLabels.type() == CV_32S &&
 217                 _bestLabels.isContinuous()))
 218             _bestLabels.create(N, 1, CV_32S);
 219         _labels.create(_bestLabels.size(), _bestLabels.type());
 220     }
 221     int* labels = _labels.ptr<int>();
 222
 223     Mat data;
 224     _src.download(data);
 225     Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type);
 226     vector<int> counters(K);
 227     vector<Vec2f> _box(dims);
 228     Vec2f* box = &_box[0];
 229     double best_compactness = DBL_MAX, compactness = 0;
 230     RNG& rng = theRNG();
 231     int a, iter, i, j, k;
 232
 233     if( criteria.type & TermCriteria::EPS )
 234         criteria.epsilon = std::max(criteria.epsilon, 0.);
 235     else
 236         criteria.epsilon = FLT_EPSILON;
 237     criteria.epsilon *= criteria.epsilon;
 238
 239     if( criteria.type & TermCriteria::COUNT )
 240         criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100);
 241     else
 242         criteria.maxCount = 100;
 243
 244     if( K == 1 )
 245     {
 246         attempts = 1;
 247         criteria.maxCount = 2;
 248     }
 249
 250     const float* sample = data.ptr<float>();
 251     for( j = 0; j < dims; j++ )
 252         box[j] = Vec2f(sample[j], sample[j]);
 253
 254     for( i = 1; i < N; i++ )
 255     {
 256         sample = data.ptr<float>(i);
 257         for( j = 0; j < dims; j++ )
 258         {
 259             float v = sample[j];
 260             box[j][0] = std::min(box[j][0], v);
 261             box[j][1] = std::max(box[j][1], v);
 262         }
 263     }
 264
 265     for( a = 0; a < attempts; a++ )
 266     {
 267         double max_center_shift = DBL_MAX;
 268         for( iter = 0;; )
 269         {
 270             swap(centers, old_centers);
 271
 272             if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) )
 273             {
 274                 if( flags & KMEANS_PP_CENTERS )
 275                     generateCentersPP(data, centers, K, rng, SPP_TRIALS);
 276                 else
 277                 {
 278                     for( k = 0; k < K; k++ )
 279                         generateRandomCenter(_box, centers.ptr<float>(k), rng);
 280                 }
 281             }
 282             else
 283             {
 284                 if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) )
 285                 {
 286                     for( i = 0; i < N; i++ )
 287                         CV_Assert( (unsigned)labels[i] < (unsigned)K );
 288                 }
 289
 290                 // compute centers
 291                 centers = Scalar(0);
 292                 for( k = 0; k < K; k++ )
 293                     counters[k] = 0;
 294
 295                 for( i = 0; i < N; i++ )
 296                 {
 297                     sample = data.ptr<float>(i);
 298                     k = labels[i];
 299                     float* center = centers.ptr<float>(k);
 300                     j=0;
 301 #if CV_ENABLE_UNROLLED
 302                     for(; j <= dims - 4; j += 4 )
 303                     {
 304                         float t0 = center[j] + sample[j];
 305                         float t1 = center[j+1] + sample[j+1];
 306
 307                         center[j] = t0;
 308                         center[j+1] = t1;
 309
 310                         t0 = center[j+2] + sample[j+2];
 311                         t1 = center[j+3] + sample[j+3];
 312
 313                         center[j+2] = t0;
 314                         center[j+3] = t1;
 315                     }
 316 #endif
 317                     for( ; j < dims; j++ )
 318                         center[j] += sample[j];
 319                     counters[k]++;
 320                 }
 321
 322                 if( iter > 0 )
 323                     max_center_shift = 0;
 324
 325                 for( k = 0; k < K; k++ )
 326                 {
 327                     if( counters[k] != 0 )
 328                         continue;
 329
 330                     // if some cluster appeared to be empty then:
 331                     //   1. find the biggest cluster
 332                     //   2. find the farthest from the center point in the biggest cluster
 333                     //   3. exclude the farthest point from the biggest cluster and form a new 1-point cluster.
 334                     int max_k = 0;
 335                     for( int k1 = 1; k1 < K; k1++ )
 336                     {
 337                         if( counters[max_k] < counters[k1] )
 338                             max_k = k1;
 339                     }
 340
 341                     double max_dist = 0;
 342                     int farthest_i = -1;
 343                     float* new_center =  centers.ptr<float>(k);
 344                     float* old_center =  centers.ptr<float>(max_k);
 345                     float* _old_center = temp.ptr<float>(); // normalized
 346                     float scale = 1.f/counters[max_k];
 347                     for( j = 0; j < dims; j++ )
 348                         _old_center[j] = old_center[j]*scale;
 349
 350                     for( i = 0; i < N; i++ )
 351                     {
 352                         if( labels[i] != max_k )
 353                             continue;
 354                         sample = data.ptr<float>(i);
 355                         double dist = normL2Sqr_(sample, _old_center, dims);
 356
 357                         if( max_dist <= dist )
 358                         {
 359                             max_dist = dist;
 360                             farthest_i = i;
 361                         }
 362                     }
 363
 364                     counters[max_k]--;
 365                     counters[k]++;
 366                     labels[farthest_i] = k;
 367                     sample = data.ptr<float>(farthest_i);
 368
 369                     for( j = 0; j < dims; j++ )
 370                     {
 371                         old_center[j] -= sample[j];
 372                         new_center[j] += sample[j];
 373                     }
 374                 }
 375
 376                 for( k = 0; k < K; k++ )
 377                 {
 378                     float* center = centers.ptr<float>(k);
 379                     CV_Assert( counters[k] != 0 );
 380
 381                     float scale = 1.f/counters[k];
 382                     for( j = 0; j < dims; j++ )
 383                         center[j] *= scale;
 384
 385                     if( iter > 0 )
 386                     {
 387                         double dist = 0;
 388                         const float* old_center = old_centers.ptr<float>(k);
 389                         for( j = 0; j < dims; j++ )
 390                         {
 391                             double t = center[j] - old_center[j];
 392                             dist += t*t;
 393                         }
 394                         max_center_shift = std::max(max_center_shift, dist);
 395                     }
 396                 }
 397             }
 398
 399             if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon )
 400                 break;
 401
 402             // assign labels
 403             oclMat _dists(1, N, CV_64F);
 404
 405             _bestLabels.upload(_labels);
 406             _centers.upload(centers);
 407             distanceToCenters(_dists, _bestLabels, _src, _centers);
 408
 409             Mat dists;
 410             _dists.download(dists);
 411             _bestLabels.download(_labels);
 412
 413             double* dist = dists.ptr<double>(0);
 414             compactness = 0;
 415             for( i = 0; i < N; i++ )
 416             {
 417                 compactness += dist[i];
 418             }
 419         }
 420
 421         if( compactness < best_compactness )
 422         {
 423             best_compactness = compactness;
 424         }
 425     }
 426
 427     return best_compactness;
 428 }