Mix of 32 and 64bits vector types prevents vectorisation for distance computation.
authorPierre-Emmanuel Viel <p.emmanuel.viel@gmail.com>
Wed, 1 Jul 2020 16:27:07 +0000 (18:27 +0200)
committerPierre-Emmanuel Viel <p.emmanuel.viel@gmail.com>
Wed, 1 Jul 2020 16:27:07 +0000 (18:27 +0200)
Argument "a" is of type ElementType* that is either int* or float*, while b was double*.
Mixing types prevents the possibility to use SSE or AVX instructions.
On implementation without SIMD instructions, this doesn't show any impact on performance.

modules/flann/include/opencv2/flann/kmeans_index.h

index f743d75224176fbbdeab883c629cd319b29a9731..b556b4ad8abddaf357eb19f583a7c3712a3736dc 100644 (file)
@@ -726,15 +726,6 @@ private:
         }
 
 
-        cv::AutoBuffer<double> dcenters_buf(branching*veclen_);
-        Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
-        for (int i=0; i<centers_length; ++i) {
-            ElementType* vec = dataset_[centers_idx[i]];
-            for (size_t k=0; k<veclen_; ++k) {
-                dcenters[i][k] = double(vec[k]);
-            }
-        }
-
         std::vector<DistanceType> radiuses(branching);
         cv::AutoBuffer<int> count_buf(branching);
         int* count = count_buf.data();
@@ -748,10 +739,10 @@ private:
         int* belongs_to = belongs_to_buf.data();
         for (int i=0; i<indices_length; ++i) {
 
-            DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_);
+            DistanceType sq_dist = distance_(dataset_[indices[i]], dataset_[centers_idx[0]], veclen_);
             belongs_to[i] = 0;
             for (int j=1; j<branching; ++j) {
-                DistanceType new_sq_dist = distance_(dataset_[indices[i]], dcenters[j], veclen_);
+                DistanceType new_sq_dist = distance_(dataset_[indices[i]], dataset_[centers_idx[j]], veclen_);
                 if (sq_dist>new_sq_dist) {
                     belongs_to[i] = j;
                     sq_dist = new_sq_dist;
@@ -763,6 +754,15 @@ private:
             count[belongs_to[i]]++;
         }
 
+        cv::AutoBuffer<double> dcenters_buf(branching*veclen_);
+        Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
+        for (int i=0; i<centers_length; ++i) {
+            ElementType* vec = dataset_[centers_idx[i]];
+            for (size_t k=0; k<veclen_; ++k) {
+                dcenters[i][k] = double(vec[k]);
+            }
+        }
+
         bool converged = false;
         int iteration = 0;
         while (!converged && iteration<iterations_) {