From 90aac764dd6e8c77265b1b5d616b0a70d4c74a42 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 22 Jan 2018 14:14:02 +0300 Subject: [PATCH] core: kmeans refactoring - reduce scope of i,k,j variables - use cv::AutoBuffer - template class KMeansDistanceComputer - eliminate manual unrolling: CV_ENABLE_UNROLLED --- modules/core/src/kmeans.cpp | 324 +++++++++++++++++++++----------------------- 1 file changed, 152 insertions(+), 172 deletions(-) diff --git a/modules/core/src/kmeans.cpp b/modules/core/src/kmeans.cpp index 10c0874..cbf9e47 100644 --- a/modules/core/src/kmeans.cpp +++ b/modules/core/src/kmeans.cpp @@ -10,7 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2000-2018, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. @@ -51,101 +51,91 @@ namespace cv static int CV_KMEANS_PARALLEL_GRANULARITY = (int)utils::getConfigurationParameterSizeT("OPENCV_KMEANS_PARALLEL_GRANULARITY", 1000); - -static void generateRandomCenter(const std::vector& box, float* center, RNG& rng) +static void generateRandomCenter(int dims, const Vec2f* box, float* center, RNG& rng) { - size_t j, dims = box.size(); float margin = 1.f/dims; - for( j = 0; j < dims; j++ ) + for (int j = 0; j < dims; j++) center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0]; } class KMeansPPDistanceComputer : public ParallelLoopBody { public: - KMeansPPDistanceComputer( float *_tdist2, - const float *_data, - const float *_dist, - int _dims, - size_t _step, - size_t _stepci ) - : tdist2(_tdist2), - data(_data), - dist(_dist), - dims(_dims), - step(_step), - stepci(_stepci) { } + KMeansPPDistanceComputer(float *tdist2_, const Mat& data_, const float *dist_, int ci_) : + tdist2(tdist2_), data(data_), dist(dist_), ci(ci_) + { } void operator()( const cv::Range& range ) const { CV_TRACE_FUNCTION(); const int begin = range.start; const int end = range.end; + const int dims = data.cols; - for ( int i = begin; i(i), data.ptr(ci), dims), dist[i]); } } private: - KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // to quiet MSVC + KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // = delete float *tdist2; - const float *data; + const Mat& data; const float *dist; - const int dims; - const size_t step; - const size_t stepci; + const int ci; }; /* k-means center initialization using the following algorithm: Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding */ -static void generateCentersPP(const Mat& _data, Mat& _out_centers, +static void generateCentersPP(const Mat& data, Mat& _out_centers, int K, RNG& rng, int trials) { CV_TRACE_FUNCTION(); - int i, j, k, dims = _data.cols, N = _data.rows; - const float* data = _data.ptr(0); - size_t step = _data.step/sizeof(data[0]); - std::vector _centers(K); + const int dims = data.cols, N = data.rows; + cv::AutoBuffer _centers(K); int* centers = &_centers[0]; - std::vector _dist(N*3); + cv::AutoBuffer _dist(N*3); float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N; double sum0 = 0; centers[0] = (unsigned)rng % N; - for( i = 0; i < N; i++ ) + for (int i = 0; i < N; i++) { - dist[i] = normL2Sqr(data + step*i, data + step*centers[0], dims); + dist[i] = normL2Sqr(data.ptr(i), data.ptr(centers[0]), dims); sum0 += dist[i]; } - for( k = 1; k < K; k++ ) + for (int k = 1; k < K; k++) { double bestSum = DBL_MAX; int bestCenter = -1; - for( j = 0; j < trials; j++ ) + for (int j = 0; j < trials; j++) { - double p = (double)rng*sum0, s = 0; - for( i = 0; i < N-1; i++ ) - if( (p -= dist[i]) <= 0 ) + double p = (double)rng*sum0; + int ci = 0; + for (; ci < N - 1; ci++) + { + p -= dist[ci]; + if (p <= 0) break; - int ci = i; + } parallel_for_(Range(0, N), - KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci), + KMeansPPDistanceComputer(tdist2, data, dist, ci), divUp(dims * N, CV_KMEANS_PARALLEL_GRANULARITY)); - for( i = 0; i < N; i++ ) + double s = 0; + for (int i = 0; i < N; i++) { s += tdist2[i]; } - if( s < bestSum ) + if (s < bestSum) { bestSum = s; bestCenter = ci; @@ -157,39 +147,39 @@ static void generateCentersPP(const Mat& _data, Mat& _out_centers, std::swap(dist, tdist); } - for( k = 0; k < K; k++ ) + for (int k = 0; k < K; k++) { - const float* src = data + step*centers[k]; + const float* src = data.ptr(centers[k]); float* dst = _out_centers.ptr(k); - for( j = 0; j < dims; j++ ) + for (int j = 0; j < dims; j++) dst[j] = src[j]; } } +template class KMeansDistanceComputer : public ParallelLoopBody { public: - KMeansDistanceComputer( double *_distances, - int *_labels, - const Mat& _data, - const Mat& _centers, - bool _onlyDistance = false ) - : distances(_distances), - labels(_labels), - data(_data), - centers(_centers), - onlyDistance(_onlyDistance) + KMeansDistanceComputer( double *distances_, + int *labels_, + const Mat& data_, + const Mat& centers_) + : distances(distances_), + labels(labels_), + data(data_), + centers(centers_) { } void operator()( const Range& range ) const { + CV_TRACE_FUNCTION(); const int begin = range.start; const int end = range.end; const int K = centers.rows; const int dims = centers.cols; - for( int i = begin; i(i); if (onlyDistance) @@ -198,34 +188,36 @@ public: distances[i] = normL2Sqr(sample, center, dims); continue; } - int k_best = 0; - double min_dist = DBL_MAX; - - for( int k = 0; k < K; k++ ) + else { - const float* center = centers.ptr(k); - const double dist = normL2Sqr(sample, center, dims); + int k_best = 0; + double min_dist = DBL_MAX; - if( min_dist > dist ) + for (int k = 0; k < K; k++) { - min_dist = dist; - k_best = k; + const float* center = centers.ptr(k); + const double dist = normL2Sqr(sample, center, dims); + + if (min_dist > dist) + { + min_dist = dist; + k_best = k; + } } - } - distances[i] = min_dist; - labels[i] = k_best; + distances[i] = min_dist; + labels[i] = k_best; + } } } private: - KMeansDistanceComputer& operator=(const KMeansDistanceComputer&); // to quiet MSVC + KMeansDistanceComputer& operator=(const KMeansDistanceComputer&); // = delete double *distances; int *labels; const Mat& data; const Mat& centers; - bool onlyDistance; }; } @@ -236,13 +228,12 @@ double cv::kmeans( InputArray _data, int K, int flags, OutputArray _centers ) { CV_INSTRUMENT_REGION() - const int SPP_TRIALS = 3; Mat data0 = _data.getMat(); - bool isrow = data0.rows == 1; - int N = isrow ? data0.cols : data0.rows; - int dims = (isrow ? 1 : data0.cols)*data0.channels(); - int type = data0.depth(); + const bool isrow = data0.rows == 1; + const int N = isrow ? data0.cols : data0.rows; + const int dims = (isrow ? 1 : data0.cols)*data0.channels(); + const int type = data0.depth(); attempts = std::max(attempts, 1); CV_Assert( data0.dims <= 2 && type == CV_32F && K > 0 ); @@ -253,129 +244,115 @@ double cv::kmeans( InputArray _data, int K, _bestLabels.create(N, 1, CV_32S, -1, true); Mat _labels, best_labels = _bestLabels.getMat(); - if( flags & CV_KMEANS_USE_INITIAL_LABELS ) + if (flags & CV_KMEANS_USE_INITIAL_LABELS) { CV_Assert( (best_labels.cols == 1 || best_labels.rows == 1) && best_labels.cols*best_labels.rows == N && best_labels.type() == CV_32S && best_labels.isContinuous()); - best_labels.copyTo(_labels); + best_labels.reshape(1, N).copyTo(_labels); + for (int i = 0; i < N; i++) + { + CV_Assert((unsigned)_labels.at(i) < (unsigned)K); + } } else { - if( !((best_labels.cols == 1 || best_labels.rows == 1) && + if (!((best_labels.cols == 1 || best_labels.rows == 1) && best_labels.cols*best_labels.rows == N && - best_labels.type() == CV_32S && - best_labels.isContinuous())) - best_labels.create(N, 1, CV_32S); + best_labels.type() == CV_32S && + best_labels.isContinuous())) + { + _bestLabels.create(N, 1, CV_32S); + best_labels = _bestLabels.getMat(); + } _labels.create(best_labels.size(), best_labels.type()); } int* labels = _labels.ptr(); Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); - std::vector counters(K); - std::vector _box(dims); - Mat dists(1, N, CV_64F); - Vec2f* box = &_box[0]; - double best_compactness = DBL_MAX, compactness = 0; + cv::AutoBuffer counters(K); + cv::AutoBuffer dists(N); RNG& rng = theRNG(); - int a, iter, i, j, k; - if( criteria.type & TermCriteria::EPS ) + if (criteria.type & TermCriteria::EPS) criteria.epsilon = std::max(criteria.epsilon, 0.); else criteria.epsilon = FLT_EPSILON; criteria.epsilon *= criteria.epsilon; - if( criteria.type & TermCriteria::COUNT ) + if (criteria.type & TermCriteria::COUNT) criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100); else criteria.maxCount = 100; - if( K == 1 ) + if (K == 1) { attempts = 1; criteria.maxCount = 2; } - const float* sample = data.ptr(0); - for( j = 0; j < dims; j++ ) - box[j] = Vec2f(sample[j], sample[j]); - - for( i = 1; i < N; i++ ) + cv::AutoBuffer box(dims); + if (!(flags & KMEANS_PP_CENTERS)) { - sample = data.ptr(i); - for( j = 0; j < dims; j++ ) { - float v = sample[j]; - box[j][0] = std::min(box[j][0], v); - box[j][1] = std::max(box[j][1], v); + const float* sample = data.ptr(0); + for (int j = 0; j < dims; j++) + box[j] = Vec2f(sample[j], sample[j]); + } + for (int i = 1; i < N; i++) + { + const float* sample = data.ptr(i); + for (int j = 0; j < dims; j++) + { + float v = sample[j]; + box[j][0] = std::min(box[j][0], v); + box[j][1] = std::max(box[j][1], v); + } } } - for( a = 0; a < attempts; a++ ) + double best_compactness = DBL_MAX; + for (int a = 0; a < attempts; a++) { - double max_center_shift = DBL_MAX; - for( iter = 0;; ) + double compactness = 0; + + for (int iter = 0; ;) { + double max_center_shift = iter == 0 ? DBL_MAX : 0.0; + swap(centers, old_centers); - if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) ) + if (iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS))) { - if( flags & KMEANS_PP_CENTERS ) + if (flags & KMEANS_PP_CENTERS) generateCentersPP(data, centers, K, rng, SPP_TRIALS); else { - for( k = 0; k < K; k++ ) - generateRandomCenter(_box, centers.ptr(k), rng); + for (int k = 0; k < K; k++) + generateRandomCenter(dims, box, centers.ptr(k), rng); } } else { - if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) ) - { - for( i = 0; i < N; i++ ) - CV_Assert( (unsigned)labels[i] < (unsigned)K ); - } - // compute centers centers = Scalar(0); - for( k = 0; k < K; k++ ) + for (int k = 0; k < K; k++) counters[k] = 0; - for( i = 0; i < N; i++ ) + for (int i = 0; i < N; i++) { - sample = data.ptr(i); - k = labels[i]; + const float* sample = data.ptr(i); + int k = labels[i]; float* center = centers.ptr(k); - j=0; - #if CV_ENABLE_UNROLLED - for(; j <= dims - 4; j += 4 ) - { - float t0 = center[j] + sample[j]; - float t1 = center[j+1] + sample[j+1]; - - center[j] = t0; - center[j+1] = t1; - - t0 = center[j+2] + sample[j+2]; - t1 = center[j+3] + sample[j+3]; - - center[j+2] = t0; - center[j+3] = t1; - } - #endif - for( ; j < dims; j++ ) + for (int j = 0; j < dims; j++) center[j] += sample[j]; counters[k]++; } - if( iter > 0 ) - max_center_shift = 0; - - for( k = 0; k < K; k++ ) + for (int k = 0; k < K; k++) { - if( counters[k] != 0 ) + if (counters[k] != 0) continue; // if some cluster appeared to be empty then: @@ -383,29 +360,28 @@ double cv::kmeans( InputArray _data, int K, // 2. find the farthest from the center point in the biggest cluster // 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster. int max_k = 0; - for( int k1 = 1; k1 < K; k1++ ) + for (int k1 = 1; k1 < K; k1++) { - if( counters[max_k] < counters[k1] ) + if (counters[max_k] < counters[k1]) max_k = k1; } double max_dist = 0; int farthest_i = -1; - float* new_center = centers.ptr(k); - float* old_center = centers.ptr(max_k); - float* _old_center = temp.ptr(); // normalized + float* base_center = centers.ptr(max_k); + float* _base_center = temp.ptr(); // normalized float scale = 1.f/counters[max_k]; - for( j = 0; j < dims; j++ ) - _old_center[j] = old_center[j]*scale; + for (int j = 0; j < dims; j++) + _base_center[j] = base_center[j]*scale; - for( i = 0; i < N; i++ ) + for (int i = 0; i < N; i++) { - if( labels[i] != max_k ) + if (labels[i] != max_k) continue; - sample = data.ptr(i); - double dist = normL2Sqr(sample, _old_center, dims); + const float* sample = data.ptr(i); + double dist = normL2Sqr(sample, _base_center, dims); - if( max_dist <= dist ) + if (max_dist <= dist) { max_dist = dist; farthest_i = i; @@ -415,29 +391,30 @@ double cv::kmeans( InputArray _data, int K, counters[max_k]--; counters[k]++; labels[farthest_i] = k; - sample = data.ptr(farthest_i); - for( j = 0; j < dims; j++ ) + const float* sample = data.ptr(farthest_i); + float* cur_center = centers.ptr(k); + for (int j = 0; j < dims; j++) { - old_center[j] -= sample[j]; - new_center[j] += sample[j]; + base_center[j] -= sample[j]; + cur_center[j] += sample[j]; } } - for( k = 0; k < K; k++ ) + for (int k = 0; k < K; k++) { float* center = centers.ptr(k); CV_Assert( counters[k] != 0 ); float scale = 1.f/counters[k]; - for( j = 0; j < dims; j++ ) + for (int j = 0; j < dims; j++) center[j] *= scale; - if( iter > 0 ) + if (iter > 0) { double dist = 0; const float* old_center = old_centers.ptr(k); - for( j = 0; j < dims; j++ ) + for (int j = 0; j < dims; j++) { double t = center[j] - old_center[j]; dist += t*t; @@ -449,26 +426,29 @@ double cv::kmeans( InputArray _data, int K, bool isLastIter = (++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon); - // assign labels - dists = 0; - double* dist = dists.ptr(0); - parallel_for_(Range(0, N), KMeansDistanceComputer(dist, labels, data, centers, isLastIter), - divUp(dims * N * (isLastIter ? 1 : K), CV_KMEANS_PARALLEL_GRANULARITY)); - compactness = sum(dists)[0]; - if (isLastIter) + { + // don't re-assign labels to avoid creation of empty clusters + parallel_for_(Range(0, N), KMeansDistanceComputer(dists, labels, data, centers), divUp(dims * N, CV_KMEANS_PARALLEL_GRANULARITY)); + compactness = sum(Mat(Size(N, 1), CV_64F, &dists[0]))[0]; break; + } + else + { + // assign labels + parallel_for_(Range(0, N), KMeansDistanceComputer(dists, labels, data, centers), divUp(dims * N * K, CV_KMEANS_PARALLEL_GRANULARITY)); + } } - if( compactness < best_compactness ) + if (compactness < best_compactness) { best_compactness = compactness; - if( _centers.needed() ) + if (_centers.needed()) { - Mat reshaped = centers; - if(_centers.fixedType() && _centers.channels() == dims) - reshaped = centers.reshape(dims); - reshaped.copyTo(_centers); + if (_centers.fixedType() && _centers.channels() == dims) + centers.reshape(dims).copyTo(_centers); + else + centers.copyTo(_centers); } _labels.copyTo(best_labels); } -- 2.7.4