1 /***********************************************************************
2 * Software License Agreement (BSD License)
4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *************************************************************************/
31 #ifndef OPENCV_FLANN_DIST_H_
32 #define OPENCV_FLANN_DIST_H_
38 typedef unsigned __int32 uint32_t;
39 typedef unsigned __int64 uint64_t;
46 #if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
51 # include "arm_neon.h"
58 inline T abs(T x) { return (x<0) ? -x : x; }
61 inline int abs<int>(int x) { return ::abs(x); }
64 inline float abs<float>(float x) { return fabsf(x); }
67 inline double abs<double>(double x) { return fabs(x); }
70 struct Accumulator { typedef T Type; };
72 struct Accumulator<unsigned char> { typedef float Type; };
74 struct Accumulator<unsigned short> { typedef float Type; };
76 struct Accumulator<unsigned int> { typedef float Type; };
78 struct Accumulator<char> { typedef float Type; };
80 struct Accumulator<short> { typedef float Type; };
82 struct Accumulator<int> { typedef float Type; };
97 * Squared Euclidean distance functor.
99 * This is the simpler, unrolled version. This is preferable for
100 * very low dimensionality data (eg 3D points)
105 typedef True is_kdtree_distance;
106 typedef True is_vector_space_distance;
108 typedef T ElementType;
109 typedef typename Accumulator<T>::Type ResultType;
111 template <typename Iterator1, typename Iterator2>
112 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
114 ResultType result = ResultType();
116 for(size_t i = 0; i < size; ++i ) {
123 template <typename U, typename V>
124 inline ResultType accum_dist(const U& a, const V& b, int) const
133 * Squared Euclidean distance functor, optimized version
138 typedef True is_kdtree_distance;
139 typedef True is_vector_space_distance;
141 typedef T ElementType;
142 typedef typename Accumulator<T>::Type ResultType;
145 * Compute the squared Euclidean distance between two vectors.
147 * This is highly optimised, with loop unrolling, as it is one
148 * of the most expensive inner loops.
150 * The computation of squared root at the end is omitted for
153 template <typename Iterator1, typename Iterator2>
154 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
156 ResultType result = ResultType();
157 ResultType diff0, diff1, diff2, diff3;
158 Iterator1 last = a + size;
159 Iterator1 lastgroup = last - 3;
161 /* Process 4 items with each loop for efficiency. */
162 while (a < lastgroup) {
163 diff0 = (ResultType)(a[0] - b[0]);
164 diff1 = (ResultType)(a[1] - b[1]);
165 diff2 = (ResultType)(a[2] - b[2]);
166 diff3 = (ResultType)(a[3] - b[3]);
167 result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
171 if ((worst_dist>0)&&(result>worst_dist)) {
175 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
177 diff0 = (ResultType)(*a++ - *b++);
178 result += diff0 * diff0;
184 * Partial euclidean distance, using just one dimension. This is used by the
185 * kd-tree when computing partial distances while traversing the tree.
187 * Squared root is omitted for efficiency.
189 template <typename U, typename V>
190 inline ResultType accum_dist(const U& a, const V& b, int) const
198 * Manhattan distance functor, optimized version
203 typedef True is_kdtree_distance;
204 typedef True is_vector_space_distance;
206 typedef T ElementType;
207 typedef typename Accumulator<T>::Type ResultType;
210 * Compute the Manhattan (L_1) distance between two vectors.
212 * This is highly optimised, with loop unrolling, as it is one
213 * of the most expensive inner loops.
215 template <typename Iterator1, typename Iterator2>
216 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
218 ResultType result = ResultType();
219 ResultType diff0, diff1, diff2, diff3;
220 Iterator1 last = a + size;
221 Iterator1 lastgroup = last - 3;
223 /* Process 4 items with each loop for efficiency. */
224 while (a < lastgroup) {
225 diff0 = (ResultType)abs(a[0] - b[0]);
226 diff1 = (ResultType)abs(a[1] - b[1]);
227 diff2 = (ResultType)abs(a[2] - b[2]);
228 diff3 = (ResultType)abs(a[3] - b[3]);
229 result += diff0 + diff1 + diff2 + diff3;
233 if ((worst_dist>0)&&(result>worst_dist)) {
237 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
239 diff0 = (ResultType)abs(*a++ - *b++);
246 * Partial distance, used by the kd-tree.
248 template <typename U, typename V>
249 inline ResultType accum_dist(const U& a, const V& b, int) const
258 struct MinkowskiDistance
260 typedef True is_kdtree_distance;
261 typedef True is_vector_space_distance;
263 typedef T ElementType;
264 typedef typename Accumulator<T>::Type ResultType;
268 MinkowskiDistance(int order_) : order(order_) {}
271 * Compute the Minkowsky (L_p) distance between two vectors.
273 * This is highly optimised, with loop unrolling, as it is one
274 * of the most expensive inner loops.
276 * The computation of squared root at the end is omitted for
279 template <typename Iterator1, typename Iterator2>
280 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
282 ResultType result = ResultType();
283 ResultType diff0, diff1, diff2, diff3;
284 Iterator1 last = a + size;
285 Iterator1 lastgroup = last - 3;
287 /* Process 4 items with each loop for efficiency. */
288 while (a < lastgroup) {
289 diff0 = (ResultType)abs(a[0] - b[0]);
290 diff1 = (ResultType)abs(a[1] - b[1]);
291 diff2 = (ResultType)abs(a[2] - b[2]);
292 diff3 = (ResultType)abs(a[3] - b[3]);
293 result += pow(diff0,order) + pow(diff1,order) + pow(diff2,order) + pow(diff3,order);
297 if ((worst_dist>0)&&(result>worst_dist)) {
301 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
303 diff0 = (ResultType)abs(*a++ - *b++);
304 result += pow(diff0,order);
310 * Partial distance, used by the kd-tree.
312 template <typename U, typename V>
313 inline ResultType accum_dist(const U& a, const V& b, int) const
315 return pow(static_cast<ResultType>(abs(a-b)),order);
324 typedef False is_kdtree_distance;
325 typedef True is_vector_space_distance;
327 typedef T ElementType;
328 typedef typename Accumulator<T>::Type ResultType;
331 * Compute the max distance (L_infinity) between two vectors.
333 * This distance is not a valid kdtree distance, it's not dimensionwise additive.
335 template <typename Iterator1, typename Iterator2>
336 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
338 ResultType result = ResultType();
339 ResultType diff0, diff1, diff2, diff3;
340 Iterator1 last = a + size;
341 Iterator1 lastgroup = last - 3;
343 /* Process 4 items with each loop for efficiency. */
344 while (a < lastgroup) {
345 diff0 = abs(a[0] - b[0]);
346 diff1 = abs(a[1] - b[1]);
347 diff2 = abs(a[2] - b[2]);
348 diff3 = abs(a[3] - b[3]);
349 if (diff0>result) {result = diff0; }
350 if (diff1>result) {result = diff1; }
351 if (diff2>result) {result = diff2; }
352 if (diff3>result) {result = diff3; }
356 if ((worst_dist>0)&&(result>worst_dist)) {
360 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
362 diff0 = abs(*a++ - *b++);
363 result = (diff0>result) ? diff0 : result;
368 /* This distance functor is not dimension-wise additive, which
369 * makes it an invalid kd-tree distance, not implementing the accum_dist method */
373 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
376 * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
377 * bit count of A exclusive XOR'ed with B
381 typedef False is_kdtree_distance;
382 typedef False is_vector_space_distance;
384 typedef unsigned char ElementType;
385 typedef int ResultType;
387 /** this will count the bits in a ^ b
389 ResultType operator()(const unsigned char* a, const unsigned char* b, size_t size) const
391 static const uchar popCountTable[] =
393 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
394 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
395 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
396 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
397 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
398 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
399 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
400 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
402 ResultType result = 0;
403 for (size_t i = 0; i < size; i++) {
404 result += popCountTable[a[i] ^ b[i]];
411 * Hamming distance functor (pop count between two binary vectors, i.e. xor them and count the number of bits set)
412 * That code was taken from brief.cpp in OpenCV
417 typedef False is_kdtree_distance;
418 typedef False is_vector_space_distance;
421 typedef T ElementType;
422 typedef int ResultType;
424 template<typename Iterator1, typename Iterator2>
425 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
427 ResultType result = 0;
430 uint32x4_t bits = vmovq_n_u32(0);
431 for (size_t i = 0; i < size; i += 16) {
432 uint8x16_t A_vec = vld1q_u8 (a + i);
433 uint8x16_t B_vec = vld1q_u8 (b + i);
434 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
435 uint8x16_t bitsSet = vcntq_u8 (AxorB);
436 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
437 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
438 bits = vaddq_u32(bits, bitSet4);
440 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
441 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
442 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
446 //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll)
447 typedef unsigned long long pop_t;
448 const size_t modulo = size % sizeof(pop_t);
449 const pop_t* a2 = reinterpret_cast<const pop_t*> (a);
450 const pop_t* b2 = reinterpret_cast<const pop_t*> (b);
451 const pop_t* a2_end = a2 + (size / sizeof(pop_t));
453 for (; a2 != a2_end; ++a2, ++b2) result += __builtin_popcountll((*a2) ^ (*b2));
456 //in the case where size is not dividable by sizeof(size_t)
457 //need to mask off the bits at the end
458 pop_t a_final = 0, b_final = 0;
459 memcpy(&a_final, a2, modulo);
460 memcpy(&b_final, b2, modulo);
461 result += __builtin_popcountll(a_final ^ b_final);
464 #else // NO NEON and NOT GNUC
465 typedef unsigned long long pop_t;
467 result = lut(reinterpret_cast<const unsigned char*> (a),
468 reinterpret_cast<const unsigned char*> (b), size * sizeof(pop_t));
477 typedef False is_kdtree_distance;
478 typedef False is_vector_space_distance;
480 typedef T ElementType;
481 typedef int ResultType;
483 /** This is popcount_3() from:
484 * http://en.wikipedia.org/wiki/Hamming_weight */
485 unsigned int popcnt32(uint32_t n) const
487 n -= ((n >> 1) & 0x55555555);
488 n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
489 return (((n + (n >> 4))& 0xF0F0F0F)* 0x1010101) >> 24;
492 #ifdef FLANN_PLATFORM_64_BIT
493 unsigned int popcnt64(uint64_t n) const
495 n -= ((n >> 1) & 0x5555555555555555);
496 n = (n & 0x3333333333333333) + ((n >> 2) & 0x3333333333333333);
497 return (((n + (n >> 4))& 0x0f0f0f0f0f0f0f0f)* 0x0101010101010101) >> 56;
501 template <typename Iterator1, typename Iterator2>
502 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
504 #ifdef FLANN_PLATFORM_64_BIT
505 const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
506 const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
507 ResultType result = 0;
508 size /= (sizeof(uint64_t)/sizeof(unsigned char));
509 for(size_t i = 0; i < size; ++i ) {
510 result += popcnt64(*pa ^ *pb);
515 const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
516 const uint32_t* pb = reinterpret_cast<const uint32_t*>(b);
517 ResultType result = 0;
518 size /= (sizeof(uint32_t)/sizeof(unsigned char));
519 for(size_t i = 0; i < size; ++i ) {
520 result += popcnt32(*pa ^ *pb);
531 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
534 struct HistIntersectionDistance
536 typedef True is_kdtree_distance;
537 typedef True is_vector_space_distance;
539 typedef T ElementType;
540 typedef typename Accumulator<T>::Type ResultType;
543 * Compute the histogram intersection distance
545 template <typename Iterator1, typename Iterator2>
546 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
548 ResultType result = ResultType();
549 ResultType min0, min1, min2, min3;
550 Iterator1 last = a + size;
551 Iterator1 lastgroup = last - 3;
553 /* Process 4 items with each loop for efficiency. */
554 while (a < lastgroup) {
555 min0 = (ResultType)(a[0] < b[0] ? a[0] : b[0]);
556 min1 = (ResultType)(a[1] < b[1] ? a[1] : b[1]);
557 min2 = (ResultType)(a[2] < b[2] ? a[2] : b[2]);
558 min3 = (ResultType)(a[3] < b[3] ? a[3] : b[3]);
559 result += min0 + min1 + min2 + min3;
562 if ((worst_dist>0)&&(result>worst_dist)) {
566 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
568 min0 = (ResultType)(*a < *b ? *a : *b);
577 * Partial distance, used by the kd-tree.
579 template <typename U, typename V>
580 inline ResultType accum_dist(const U& a, const V& b, int) const
589 struct HellingerDistance
591 typedef True is_kdtree_distance;
592 typedef True is_vector_space_distance;
594 typedef T ElementType;
595 typedef typename Accumulator<T>::Type ResultType;
598 * Compute the Hellinger distance
600 template <typename Iterator1, typename Iterator2>
601 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
603 ResultType result = ResultType();
604 ResultType diff0, diff1, diff2, diff3;
605 Iterator1 last = a + size;
606 Iterator1 lastgroup = last - 3;
608 /* Process 4 items with each loop for efficiency. */
609 while (a < lastgroup) {
610 diff0 = sqrt(static_cast<ResultType>(a[0])) - sqrt(static_cast<ResultType>(b[0]));
611 diff1 = sqrt(static_cast<ResultType>(a[1])) - sqrt(static_cast<ResultType>(b[1]));
612 diff2 = sqrt(static_cast<ResultType>(a[2])) - sqrt(static_cast<ResultType>(b[2]));
613 diff3 = sqrt(static_cast<ResultType>(a[3])) - sqrt(static_cast<ResultType>(b[3]));
614 result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
619 diff0 = sqrt(static_cast<ResultType>(*a++)) - sqrt(static_cast<ResultType>(*b++));
620 result += diff0 * diff0;
626 * Partial distance, used by the kd-tree.
628 template <typename U, typename V>
629 inline ResultType accum_dist(const U& a, const V& b, int) const
631 ResultType diff = sqrt(static_cast<ResultType>(a)) - sqrt(static_cast<ResultType>(b));
638 struct ChiSquareDistance
640 typedef True is_kdtree_distance;
641 typedef True is_vector_space_distance;
643 typedef T ElementType;
644 typedef typename Accumulator<T>::Type ResultType;
647 * Compute the chi-square distance
649 template <typename Iterator1, typename Iterator2>
650 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
652 ResultType result = ResultType();
653 ResultType sum, diff;
654 Iterator1 last = a + size;
657 sum = (ResultType)(*a + *b);
659 diff = (ResultType)(*a - *b);
660 result += diff*diff/sum;
665 if ((worst_dist>0)&&(result>worst_dist)) {
673 * Partial distance, used by the kd-tree.
675 template <typename U, typename V>
676 inline ResultType accum_dist(const U& a, const V& b, int) const
678 ResultType result = ResultType();
679 ResultType sum, diff;
681 sum = (ResultType)(a+b);
683 diff = (ResultType)(a-b);
684 result = diff*diff/sum;
694 typedef True is_kdtree_distance;
695 typedef True is_vector_space_distance;
697 typedef T ElementType;
698 typedef typename Accumulator<T>::Type ResultType;
701 * Compute the Kullback–Leibler divergence
703 template <typename Iterator1, typename Iterator2>
704 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
706 ResultType result = ResultType();
707 Iterator1 last = a + size;
711 ResultType ratio = (ResultType)(*a / *b);
713 result += *a * log(ratio);
719 if ((worst_dist>0)&&(result>worst_dist)) {
727 * Partial distance, used by the kd-tree.
729 template <typename U, typename V>
730 inline ResultType accum_dist(const U& a, const V& b, int) const
732 ResultType result = ResultType();
734 ResultType ratio = (ResultType)(a / b);
736 result = a * log(ratio);
746 * This is a "zero iterator". It basically behaves like a zero filled
747 * array to all algorithms that use arrays as iterators (STL style).
748 * It's useful when there's a need to compute the distance between feature
749 * and origin it and allows for better compiler optimisation than using a
752 template <typename T>
766 const ZeroIterator<T>& operator ++()
771 ZeroIterator<T> operator ++(int)
776 ZeroIterator<T>& operator+=(int)
785 * Depending on processed distances, some of them are already squared (e.g. L2)
786 * and some are not (e.g.Hamming). In KMeans++ for instance we want to be sure
787 * we are working on ^2 distances, thus following templates to ensure that.
789 template <typename Distance, typename ElementType>
790 struct squareDistance
792 typedef typename Distance::ResultType ResultType;
793 ResultType operator()( ResultType dist ) { return dist*dist; }
797 template <typename ElementType>
798 struct squareDistance<L2_Simple<ElementType>, ElementType>
800 typedef typename L2_Simple<ElementType>::ResultType ResultType;
801 ResultType operator()( ResultType dist ) { return dist; }
804 template <typename ElementType>
805 struct squareDistance<L2<ElementType>, ElementType>
807 typedef typename L2<ElementType>::ResultType ResultType;
808 ResultType operator()( ResultType dist ) { return dist; }
812 template <typename ElementType>
813 struct squareDistance<MinkowskiDistance<ElementType>, ElementType>
815 typedef typename MinkowskiDistance<ElementType>::ResultType ResultType;
816 ResultType operator()( ResultType dist ) { return dist; }
819 template <typename ElementType>
820 struct squareDistance<HellingerDistance<ElementType>, ElementType>
822 typedef typename HellingerDistance<ElementType>::ResultType ResultType;
823 ResultType operator()( ResultType dist ) { return dist; }
826 template <typename ElementType>
827 struct squareDistance<ChiSquareDistance<ElementType>, ElementType>
829 typedef typename ChiSquareDistance<ElementType>::ResultType ResultType;
830 ResultType operator()( ResultType dist ) { return dist; }
834 template <typename Distance>
835 typename Distance::ResultType ensureSquareDistance( typename Distance::ResultType dist )
837 typedef typename Distance::ElementType ElementType;
839 squareDistance<Distance, ElementType> dummy;
840 return dummy( dist );
845 #endif //OPENCV_FLANN_DIST_H_