From 68a8a1116186b9fe77ae31c74120f8989df6f27c Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 8 Oct 2013 15:49:40 +0800 Subject: [PATCH] Rewrite distanceToCenters. It supports NORM_L1 distance types now and can use user provided indices. Also fixed a bug of kmeans where distance pointers should be float instead of double. NORM_L2 changed to NORM_L2SQR, Accuracy and Perf tests are added added ROI support in accuracy test of distanceToCenters --- modules/ocl/doc/ml_machine_learning.rst | 26 ++++++++- modules/ocl/include/opencv2/ocl/ocl.hpp | 5 +- modules/ocl/perf/perf_imgproc.cpp | 61 ++++++++++++++++++++ modules/ocl/src/kmeans.cpp | 75 +++++++++++++++++-------- modules/ocl/src/opencl/kmeans_kernel.cl | 98 +++++++++++++++++++++++---------- modules/ocl/test/test_kmeans.cpp | 91 +++++++++++++++++++++++++++++- 6 files changed, 299 insertions(+), 57 deletions(-) diff --git a/modules/ocl/doc/ml_machine_learning.rst b/modules/ocl/doc/ml_machine_learning.rst index 321cec9..eb72cbe 100644 --- a/modules/ocl/doc/ml_machine_learning.rst +++ b/modules/ocl/doc/ml_machine_learning.rst @@ -85,4 +85,28 @@ Finds centers of clusters and groups input samples around the clusters. * **KMEANS_USE_INITIAL_LABELS** During the first (and possibly the only) attempt, use the user-supplied labels instead of computing them from the initial centers. For the second and further attempts, use the random or semi-random centers. Use one of ``KMEANS_*_CENTERS`` flag to specify the exact method. - :param centers: Output matrix of the cluster centers, one row per each cluster center. \ No newline at end of file + :param centers: Output matrix of the cluster centers, one row per each cluster center. + +ocl::distanceToCenters +---------------------- +For each samples in ``source``, find its closest neighour in ``centers``. + +.. ocv:function:: void ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers, int distType = NORM_L2SQR, const oclMat &indices = oclMat()) + + :param dists: The output distances calculated from each sample to the best matched center. + + :param labels: The output index of best matched center for each row of sample. + + :param src: Floating-point matrix of input samples. One row per sample. + + :param centers: Floating-point matrix of center candidates. One row per center. + + :param distType: Distance metric to calculate distances. Supports ``NORM_L1`` and ``NORM_L2SQR``. + + :param indices: Optional source indices. If not empty: + + * only the indexed source samples will be processed + * outputs, i.e., ``dists`` and ``labels``, have the same size of indices + * outputs are in the same order of indices instead of the order of src + +The method is a utility function which maybe used for multiple clustering algorithms such as K-means. diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index bf911f4..dd87f8a 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -877,7 +877,10 @@ namespace cv //! Compute closest centers for each lines in source and lable it after center's index // supports CV_32FC1/CV_32FC2/CV_32FC4 data type - CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers); + // supports NORM_L1 and NORM_L2 distType + // if indices is provided, only the indexed rows will be calculated and their results are in the same + // order of indices + CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers, int distType = NORM_L2SQR, const oclMat &indices = oclMat()); //!Does k-means procedure on GPU // supports CV_32FC1/CV_32FC2/CV_32FC4 data type diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index 5eb32b4..b926ba7 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -860,3 +860,64 @@ PERF_TEST_P(columnSumFixture, columnSum, OCL_TYPICAL_MAT_SIZES) else OCL_PERF_ELSE } + +//////////////////////////////distanceToCenters//////////////////////////////////////////////// + +CV_ENUM(DistType, NORM_L1, NORM_L2SQR); +typedef tuple distanceToCentersParameters; +typedef TestBaseWithParam distanceToCentersFixture; + +static void distanceToCentersPerfTest(Mat& src, Mat& centers, Mat& dists, Mat& labels, int distType) +{ + Mat batch_dists; + cv::batchDistance(src,centers,batch_dists, CV_32FC1, noArray(), distType); + std::vector dists_v; + std::vector labels_v; + for(int i = 0; i(GetParam()); + int distType = get<1>(GetParam()); + Mat src(size, CV_32FC1); + Mat centers(size, CV_32FC1); + Mat dists(cv::Size(src.rows,1), CV_32FC1); + Mat labels(cv::Size(src.rows,1), CV_32SC1); + declare.in(src, centers, WARMUP_RNG).out(dists, labels); + if (RUN_OCL_IMPL) + { + ocl::oclMat ocl_src(src); + ocl::oclMat ocl_centers(centers); + ocl::oclMat ocl_dists(dists); + ocl::oclMat ocl_labels(labels); + + OCL_TEST_CYCLE() ocl::distanceToCenters(ocl_dists,ocl_labels,ocl_src, ocl_centers, distType); + + ocl_dists.download(dists); + ocl_labels.download(labels); + + SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE); + SANITY_CHECK(labels); + } + else if (RUN_PLAIN_IMPL) + { + TEST_CYCLE() distanceToCentersPerfTest(src,centers,dists,labels,distType); + SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE); + SANITY_CHECK(labels); + } + else + OCL_PERF_ELSE +} diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp index 112c482..6d53eb7 100644 --- a/modules/ocl/src/kmeans.cpp +++ b/modules/ocl/src/kmeans.cpp @@ -160,32 +160,61 @@ static void generateCentersPP(const Mat& _data, Mat& _out_centers, } } -void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers) +void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers, int distType, const oclMat &indices) { - //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) - //{ - // CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double"); - // return; - //} - - Context *clCxt = src.clCxt; - int labels_step = (int)(labels.step/labels.elemSize()); - string kernelname = "distanceToCenters"; - int threadNum = src.rows > 256 ? 256 : src.rows; - size_t localThreads[3] = {1, threadNum, 1}; - size_t globalThreads[3] = {1, src.rows, 1}; + CV_Assert(src.cols*src.oclchannels() == centers.cols*centers.oclchannels()); + CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F); + bool is_label_row_major = false; + ensureSizeIsEnough(1, src.rows, CV_32FC1, dists); + if(labels.empty() || (!labels.empty() && labels.rows == src.rows && labels.cols == 1)) + { + ensureSizeIsEnough(src.rows, 1, CV_32SC1, labels); + is_label_row_major = true; + } + CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR); + + std::stringstream build_opt_ss; + build_opt_ss + << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST") + << (indices.empty() ? "" : " -D USE_INDEX"); + + String build_opt = build_opt_ss.str(); + + const int src_step = (int)(src.oclchannels() * src.step / src.elemSize()); + const int centers_step = (int)(centers.oclchannels() * centers.step / centers.elemSize()); + + const int colsNumb = centers.cols*centers.oclchannels(); + + const int label_step = is_label_row_major ? (int)(labels.step / labels.elemSize()) : 1; + String kernelname = "distanceToCenters"; + + const int number_of_input = indices.empty() ? src.rows : indices.size().area(); + + const int src_offset = (int)src.offset/src.elemSize(); + const int centers_offset = (int)centers.offset/centers.elemSize(); + + size_t globalThreads[3] = {number_of_input, 1, 1}; vector > args; - args.push_back(make_pair(sizeof(cl_int), (void *)&labels_step)); - args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.rows)); args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data)); - args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.cols)); - args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back(make_pair(sizeof(cl_mem), (void *)¢ers.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&dists.data)); + if(!indices.empty()) + { + args.push_back(make_pair(sizeof(cl_mem), (void *)&indices.data)); + } + args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data)); + args.push_back(make_pair(sizeof(cl_mem), (void *)&dists.data)); + args.push_back(make_pair(sizeof(cl_int), (void *)&colsNumb)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src_step)); + args.push_back(make_pair(sizeof(cl_int), (void *)¢ers_step)); + args.push_back(make_pair(sizeof(cl_int), (void *)&label_step)); + args.push_back(make_pair(sizeof(cl_int), (void *)&number_of_input)); + args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.rows)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset)); + args.push_back(make_pair(sizeof(cl_int), (void *)¢ers_offset)); - openCLExecuteKernel(clCxt, &kmeans_kernel, kernelname, globalThreads, localThreads, args, -1, -1, NULL); + openCLExecuteKernel(Context::getContext(), &kmeans_kernel, + kernelname, globalThreads, NULL, args, -1, -1, build_opt.c_str()); } ///////////////////////////////////k - means ///////////////////////////////////////////////////////// double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, @@ -404,17 +433,17 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, _bestLabels.upload(_labels); _centers.upload(centers); + distanceToCenters(_dists, _bestLabels, _src, _centers); Mat dists; _dists.download(dists); _bestLabels.download(_labels); - - double* dist = dists.ptr(0); + float* dist = dists.ptr(0); compactness = 0; for( i = 0; i < N; i++ ) { - compactness += dist[i]; + compactness += (double)dist[i]; } } diff --git a/modules/ocl/src/opencl/kmeans_kernel.cl b/modules/ocl/src/opencl/kmeans_kernel.cl index c6af0ad..f5f93b7 100644 --- a/modules/ocl/src/opencl/kmeans_kernel.cl +++ b/modules/ocl/src/opencl/kmeans_kernel.cl @@ -16,6 +16,7 @@ // // @Authors // Xiaopeng Fu, fuxiaopeng2222@163.com +// Peng Xiao, pengxiao@outlook.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -43,42 +44,81 @@ // //M*/ -__kernel void distanceToCenters( - int label_step, int K, - __global float *src, - __global int *labels, int dims, int rows, - __global float *centers, - __global float *dists) +#ifdef L1_DIST +# define DISTANCE(A, B) fabs((A) - (B)) +#elif defined L2SQR_DIST +# define DISTANCE(A, B) ((A) - (B)) * ((A) - (B)) +#else +# define DISTANCE(A, B) ((A) - (B)) * ((A) - (B)) +#endif + +inline float dist(__global const float * center, __global const float * src, int feature_cols) { - int gid = get_global_id(1); + float res = 0; + float4 tmp4; + int i; + for(i = 0; i < feature_cols / 4; i += 4, center += 4, src += 4) + { + tmp4 = vload4(0, center) - vload4(0, src); +#ifdef L1_DIST + tmp4 = fabs(tmp4); +#else + tmp4 *= tmp4; +#endif + res += tmp4.x + tmp4.y + tmp4.z + tmp4.w; + } - float dist, euDist, min; - int minCentroid; + for(; i < feature_cols; ++i, ++center, ++src) + { + res += DISTANCE(*src, *center); + } + return res; +} - if(gid >= rows) +// to be distinguished with distanceToCenters in kmeans_kernel.cl +__kernel void distanceToCenters( + __global const float *src, + __global const float *centers, +#ifdef USE_INDEX + __global const int *indices, +#endif + __global int *labels, + __global float *dists, + int feature_cols, + int src_step, + int centers_step, + int label_step, + int input_size, + int K, + int offset_src, + int offset_centers +) +{ + int gid = get_global_id(0); + float euDist, minval; + int minCentroid; + if(gid >= input_size) + { return; - - for(int i = 0 ; i < K; i++) + } + src += offset_src; + centers += offset_centers; +#ifdef USE_INDEX + src += indices[gid] * src_step; +#else + src += gid * src_step; +#endif + minval = dist(centers, src, feature_cols); + minCentroid = 0; + for(int i = 1 ; i < K; i++) { - euDist = 0; - for(int j = 0; j < dims; j++) - { - dist = (src[j + gid * dims] - - centers[j + i * dims]); - euDist += dist * dist; - } - - if(i == 0) - { - min = euDist; - minCentroid = 0; - } - else if(euDist < min) + euDist = dist(centers + i * centers_step, src, feature_cols); + if(euDist < minval) { - min = euDist; + minval = euDist; minCentroid = i; } } - dists[gid] = min; - labels[label_step * gid] = minCentroid; + labels[gid * label_step] = minCentroid; + dists[gid] = minval; } diff --git a/modules/ocl/test/test_kmeans.cpp b/modules/ocl/test/test_kmeans.cpp index c99148a..dc5eded 100644 --- a/modules/ocl/test/test_kmeans.cpp +++ b/modules/ocl/test/test_kmeans.cpp @@ -99,7 +99,6 @@ PARAM_TEST_CASE(Kmeans, int, int, int) } }; OCL_TEST_P(Kmeans, Mat){ - if(flags & KMEANS_USE_INITIAL_LABELS) { // inital a given labels @@ -116,11 +115,9 @@ OCL_TEST_P(Kmeans, Mat){ kmeans(src, K, labels, TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, 0), 1, flags, centers); - ocl::kmeans(d_src, K, d_labels, TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, 0), 1, flags, d_centers); - Mat dd_labels(d_labels); Mat dd_centers(d_centers); if(flags & KMEANS_USE_INITIAL_LABELS) @@ -153,9 +150,97 @@ OCL_TEST_P(Kmeans, Mat){ } } } + INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine( Values(3, 5, 8), Values(CV_32FC1, CV_32FC2, CV_32FC4), Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/))); + +/////////////////////////////// DistanceToCenters ////////////////////////////////////////// + +CV_ENUM(DistType, NORM_L1, NORM_L2SQR); + +PARAM_TEST_CASE(distanceToCenters, DistType, bool) +{ + cv::Size size; + int distType; + bool useRoi; + cv::Mat src, centers, src_roi, centers_roi; + cv::ocl::oclMat ocl_src, ocl_centers, ocl_src_roi, ocl_centers_roi; + + virtual void SetUp() + { + distType = GET_PARAM(0); + useRoi = GET_PARAM(1); + } + + void random_roi() + { + Size roiSize_src = randomSize(10,1000); + Size roiSize_centers = randomSize(10, 1000); + roiSize_src.width = roiSize_centers.width; + + Border srcBorder = randomBorder(0, useRoi ? 500 : 0); + randomSubMat(src, src_roi, roiSize_src, srcBorder, CV_32FC1, -SHRT_MAX, SHRT_MAX); + + Border centersBorder = randomBorder(0, useRoi ? 500 : 0); + randomSubMat(centers, centers_roi, roiSize_centers, centersBorder, CV_32FC1, -SHRT_MAX, SHRT_MAX); + + for(int i = 0; i(i, randomInt(0,centers.cols-1)) = (float)randomDouble(SHRT_MAX, INT_MAX); + + generateOclMat(ocl_src, ocl_src_roi, src, roiSize_src, srcBorder); + generateOclMat(ocl_centers, ocl_centers_roi, centers, roiSize_centers, centersBorder); + + } + +}; + +OCL_TEST_P(distanceToCenters, Accuracy) +{ + for(int j = 0; j< LOOP_TIMES; j++) + { + random_roi(); + + cv::ocl::oclMat ocl_dists; + cv::ocl::oclMat ocl_labels; + + cv::ocl::distanceToCenters(ocl_dists,ocl_labels,ocl_src_roi, ocl_centers_roi, distType); + + Mat labels, dists; + ocl_labels.download(labels); + ocl_dists.download(dists); + + ASSERT_EQ(ocl_dists.cols, ocl_labels.rows); + + Mat batch_dists; + + cv::batchDistance(src_roi, centers_roi, batch_dists, CV_32FC1, noArray(), distType); + + std::vector gold_dists_v; + + for(int i = 0; i