1 /***********************************************************************
2 * Software License Agreement (BSD License)
4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *************************************************************************/
31 #ifndef OPENCV_FLANN_DIST_H_
32 #define OPENCV_FLANN_DIST_H_
38 typedef unsigned __int32 uint32_t;
39 typedef unsigned __int64 uint64_t;
54 inline T abs(T x) { return (x<0) ? -x : x; }
57 inline int abs<int>(int x) { return ::abs(x); }
60 inline float abs<float>(float x) { return fabsf(x); }
63 inline double abs<double>(double x) { return fabs(x); }
66 struct Accumulator { typedef T Type; };
68 struct Accumulator<unsigned char> { typedef float Type; };
70 struct Accumulator<unsigned short> { typedef float Type; };
72 struct Accumulator<unsigned int> { typedef float Type; };
74 struct Accumulator<char> { typedef float Type; };
76 struct Accumulator<short> { typedef float Type; };
78 struct Accumulator<int> { typedef float Type; };
93 * Squared Euclidean distance functor.
95 * This is the simpler, unrolled version. This is preferable for
96 * very low dimensionality data (eg 3D points)
101 typedef True is_kdtree_distance;
102 typedef True is_vector_space_distance;
104 typedef T ElementType;
105 typedef typename Accumulator<T>::Type ResultType;
107 template <typename Iterator1, typename Iterator2>
108 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
110 ResultType result = ResultType();
112 for(size_t i = 0; i < size; ++i ) {
119 template <typename U, typename V>
120 inline ResultType accum_dist(const U& a, const V& b, int) const
129 * Squared Euclidean distance functor, optimized version
134 typedef True is_kdtree_distance;
135 typedef True is_vector_space_distance;
137 typedef T ElementType;
138 typedef typename Accumulator<T>::Type ResultType;
141 * Compute the squared Euclidean distance between two vectors.
143 * This is highly optimised, with loop unrolling, as it is one
144 * of the most expensive inner loops.
146 * The computation of squared root at the end is omitted for
149 template <typename Iterator1, typename Iterator2>
150 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
152 ResultType result = ResultType();
153 ResultType diff0, diff1, diff2, diff3;
154 Iterator1 last = a + size;
155 Iterator1 lastgroup = last - 3;
157 /* Process 4 items with each loop for efficiency. */
158 while (a < lastgroup) {
159 diff0 = (ResultType)(a[0] - b[0]);
160 diff1 = (ResultType)(a[1] - b[1]);
161 diff2 = (ResultType)(a[2] - b[2]);
162 diff3 = (ResultType)(a[3] - b[3]);
163 result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
167 if ((worst_dist>0)&&(result>worst_dist)) {
171 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
173 diff0 = (ResultType)(*a++ - *b++);
174 result += diff0 * diff0;
180 * Partial euclidean distance, using just one dimension. This is used by the
181 * kd-tree when computing partial distances while traversing the tree.
183 * Squared root is omitted for efficiency.
185 template <typename U, typename V>
186 inline ResultType accum_dist(const U& a, const V& b, int) const
194 * Manhattan distance functor, optimized version
199 typedef True is_kdtree_distance;
200 typedef True is_vector_space_distance;
202 typedef T ElementType;
203 typedef typename Accumulator<T>::Type ResultType;
206 * Compute the Manhattan (L_1) distance between two vectors.
208 * This is highly optimised, with loop unrolling, as it is one
209 * of the most expensive inner loops.
211 template <typename Iterator1, typename Iterator2>
212 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
214 ResultType result = ResultType();
215 ResultType diff0, diff1, diff2, diff3;
216 Iterator1 last = a + size;
217 Iterator1 lastgroup = last - 3;
219 /* Process 4 items with each loop for efficiency. */
220 while (a < lastgroup) {
221 diff0 = (ResultType)abs(a[0] - b[0]);
222 diff1 = (ResultType)abs(a[1] - b[1]);
223 diff2 = (ResultType)abs(a[2] - b[2]);
224 diff3 = (ResultType)abs(a[3] - b[3]);
225 result += diff0 + diff1 + diff2 + diff3;
229 if ((worst_dist>0)&&(result>worst_dist)) {
233 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
235 diff0 = (ResultType)abs(*a++ - *b++);
242 * Partial distance, used by the kd-tree.
244 template <typename U, typename V>
245 inline ResultType accum_dist(const U& a, const V& b, int) const
254 struct MinkowskiDistance
256 typedef True is_kdtree_distance;
257 typedef True is_vector_space_distance;
259 typedef T ElementType;
260 typedef typename Accumulator<T>::Type ResultType;
264 MinkowskiDistance(int order_) : order(order_) {}
267 * Compute the Minkowsky (L_p) distance between two vectors.
269 * This is highly optimised, with loop unrolling, as it is one
270 * of the most expensive inner loops.
272 * The computation of squared root at the end is omitted for
275 template <typename Iterator1, typename Iterator2>
276 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
278 ResultType result = ResultType();
279 ResultType diff0, diff1, diff2, diff3;
280 Iterator1 last = a + size;
281 Iterator1 lastgroup = last - 3;
283 /* Process 4 items with each loop for efficiency. */
284 while (a < lastgroup) {
285 diff0 = (ResultType)abs(a[0] - b[0]);
286 diff1 = (ResultType)abs(a[1] - b[1]);
287 diff2 = (ResultType)abs(a[2] - b[2]);
288 diff3 = (ResultType)abs(a[3] - b[3]);
289 result += pow(diff0,order) + pow(diff1,order) + pow(diff2,order) + pow(diff3,order);
293 if ((worst_dist>0)&&(result>worst_dist)) {
297 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
299 diff0 = (ResultType)abs(*a++ - *b++);
300 result += pow(diff0,order);
306 * Partial distance, used by the kd-tree.
308 template <typename U, typename V>
309 inline ResultType accum_dist(const U& a, const V& b, int) const
311 return pow(static_cast<ResultType>(abs(a-b)),order);
320 typedef False is_kdtree_distance;
321 typedef True is_vector_space_distance;
323 typedef T ElementType;
324 typedef typename Accumulator<T>::Type ResultType;
327 * Compute the max distance (L_infinity) between two vectors.
329 * This distance is not a valid kdtree distance, it's not dimensionwise additive.
331 template <typename Iterator1, typename Iterator2>
332 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
334 ResultType result = ResultType();
335 ResultType diff0, diff1, diff2, diff3;
336 Iterator1 last = a + size;
337 Iterator1 lastgroup = last - 3;
339 /* Process 4 items with each loop for efficiency. */
340 while (a < lastgroup) {
341 diff0 = abs(a[0] - b[0]);
342 diff1 = abs(a[1] - b[1]);
343 diff2 = abs(a[2] - b[2]);
344 diff3 = abs(a[3] - b[3]);
345 if (diff0>result) {result = diff0; }
346 if (diff1>result) {result = diff1; }
347 if (diff2>result) {result = diff2; }
348 if (diff3>result) {result = diff3; }
352 if ((worst_dist>0)&&(result>worst_dist)) {
356 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
358 diff0 = abs(*a++ - *b++);
359 result = (diff0>result) ? diff0 : result;
364 /* This distance functor is not dimension-wise additive, which
365 * makes it an invalid kd-tree distance, not implementing the accum_dist method */
369 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
372 * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
373 * bit count of A exclusive XOR'ed with B
377 typedef False is_kdtree_distance;
378 typedef False is_vector_space_distance;
380 typedef unsigned char ElementType;
381 typedef int ResultType;
383 /** this will count the bits in a ^ b
385 ResultType operator()(const unsigned char* a, const unsigned char* b, int size) const
387 static const uchar popCountTable[] =
389 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
390 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
391 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
392 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
393 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
394 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
395 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
396 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
398 ResultType result = 0;
399 for (int i = 0; i < size; i++) {
400 result += popCountTable[a[i] ^ b[i]];
407 * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
408 * bit count of A exclusive XOR'ed with B
412 typedef False is_kdtree_distance;
413 typedef False is_vector_space_distance;
415 typedef unsigned char ElementType;
416 typedef int ResultType;
418 /** this will count the bits in a ^ b
420 ResultType operator()(const unsigned char* a, const unsigned char* b, size_t size) const
422 static const uchar popCountTable[] =
424 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
425 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
426 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
427 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
428 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
429 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
430 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
431 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
433 ResultType result = 0;
434 for (size_t i = 0; i < size; i++) {
435 result += popCountTable[a[i] ^ b[i]];
442 * Hamming distance functor (pop count between two binary vectors, i.e. xor them and count the number of bits set)
443 * That code was taken from brief.cpp in OpenCV
448 typedef False is_kdtree_distance;
449 typedef False is_vector_space_distance;
452 typedef T ElementType;
453 typedef int ResultType;
455 template<typename Iterator1, typename Iterator2>
456 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
458 ResultType result = 0;
461 uint32x4_t bits = vmovq_n_u32(0);
462 for (size_t i = 0; i < size; i += 16) {
463 uint8x16_t A_vec = vld1q_u8 (a + i);
464 uint8x16_t B_vec = vld1q_u8 (b + i);
465 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
466 uint8x16_t bitsSet = vcntq_u8 (AxorB);
467 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
468 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
469 bits = vaddq_u32(bits, bitSet4);
471 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
472 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
473 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
477 //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll)
478 typedef unsigned long long pop_t;
479 const size_t modulo = size % sizeof(pop_t);
480 const pop_t* a2 = reinterpret_cast<const pop_t*> (a);
481 const pop_t* b2 = reinterpret_cast<const pop_t*> (b);
482 const pop_t* a2_end = a2 + (size / sizeof(pop_t));
484 for (; a2 != a2_end; ++a2, ++b2) result += __builtin_popcountll((*a2) ^ (*b2));
487 //in the case where size is not dividable by sizeof(size_t)
488 //need to mask off the bits at the end
489 pop_t a_final = 0, b_final = 0;
490 memcpy(&a_final, a2, modulo);
491 memcpy(&b_final, b2, modulo);
492 result += __builtin_popcountll(a_final ^ b_final);
495 #else // NO NEON and NOT GNUC
496 typedef unsigned long long pop_t;
498 result = lut(reinterpret_cast<const unsigned char*> (a),
499 reinterpret_cast<const unsigned char*> (b), size * sizeof(pop_t));
508 typedef False is_kdtree_distance;
509 typedef False is_vector_space_distance;
511 typedef T ElementType;
512 typedef int ResultType;
514 /** This is popcount_3() from:
515 * http://en.wikipedia.org/wiki/Hamming_weight */
516 unsigned int popcnt32(uint32_t n) const
518 n -= ((n >> 1) & 0x55555555);
519 n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
520 return (((n + (n >> 4))& 0xF0F0F0F)* 0x1010101) >> 24;
523 #ifdef FLANN_PLATFORM_64_BIT
524 unsigned int popcnt64(uint64_t n) const
526 n -= ((n >> 1) & 0x5555555555555555);
527 n = (n & 0x3333333333333333) + ((n >> 2) & 0x3333333333333333);
528 return (((n + (n >> 4))& 0x0f0f0f0f0f0f0f0f)* 0x0101010101010101) >> 56;
532 template <typename Iterator1, typename Iterator2>
533 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
535 #ifdef FLANN_PLATFORM_64_BIT
536 const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
537 const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
538 ResultType result = 0;
539 size /= (sizeof(uint64_t)/sizeof(unsigned char));
540 for(size_t i = 0; i < size; ++i ) {
541 result += popcnt64(*pa ^ *pb);
546 const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
547 const uint32_t* pb = reinterpret_cast<const uint32_t*>(b);
548 ResultType result = 0;
549 size /= (sizeof(uint32_t)/sizeof(unsigned char));
550 for(size_t i = 0; i < size; ++i ) {
551 result += popcnt32(*pa ^ *pb);
562 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
565 struct HistIntersectionDistance
567 typedef True is_kdtree_distance;
568 typedef True is_vector_space_distance;
570 typedef T ElementType;
571 typedef typename Accumulator<T>::Type ResultType;
574 * Compute the histogram intersection distance
576 template <typename Iterator1, typename Iterator2>
577 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
579 ResultType result = ResultType();
580 ResultType min0, min1, min2, min3;
581 Iterator1 last = a + size;
582 Iterator1 lastgroup = last - 3;
584 /* Process 4 items with each loop for efficiency. */
585 while (a < lastgroup) {
586 min0 = (ResultType)(a[0] < b[0] ? a[0] : b[0]);
587 min1 = (ResultType)(a[1] < b[1] ? a[1] : b[1]);
588 min2 = (ResultType)(a[2] < b[2] ? a[2] : b[2]);
589 min3 = (ResultType)(a[3] < b[3] ? a[3] : b[3]);
590 result += min0 + min1 + min2 + min3;
593 if ((worst_dist>0)&&(result>worst_dist)) {
597 /* Process last 0-3 pixels. Not needed for standard vector lengths. */
599 min0 = (ResultType)(*a < *b ? *a : *b);
608 * Partial distance, used by the kd-tree.
610 template <typename U, typename V>
611 inline ResultType accum_dist(const U& a, const V& b, int) const
620 struct HellingerDistance
622 typedef True is_kdtree_distance;
623 typedef True is_vector_space_distance;
625 typedef T ElementType;
626 typedef typename Accumulator<T>::Type ResultType;
629 * Compute the histogram intersection distance
631 template <typename Iterator1, typename Iterator2>
632 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
634 ResultType result = ResultType();
635 ResultType diff0, diff1, diff2, diff3;
636 Iterator1 last = a + size;
637 Iterator1 lastgroup = last - 3;
639 /* Process 4 items with each loop for efficiency. */
640 while (a < lastgroup) {
641 diff0 = sqrt(static_cast<ResultType>(a[0])) - sqrt(static_cast<ResultType>(b[0]));
642 diff1 = sqrt(static_cast<ResultType>(a[1])) - sqrt(static_cast<ResultType>(b[1]));
643 diff2 = sqrt(static_cast<ResultType>(a[2])) - sqrt(static_cast<ResultType>(b[2]));
644 diff3 = sqrt(static_cast<ResultType>(a[3])) - sqrt(static_cast<ResultType>(b[3]));
645 result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
650 diff0 = sqrt(static_cast<ResultType>(*a++)) - sqrt(static_cast<ResultType>(*b++));
651 result += diff0 * diff0;
657 * Partial distance, used by the kd-tree.
659 template <typename U, typename V>
660 inline ResultType accum_dist(const U& a, const V& b, int) const
662 return sqrt(static_cast<ResultType>(a)) - sqrt(static_cast<ResultType>(b));
668 struct ChiSquareDistance
670 typedef True is_kdtree_distance;
671 typedef True is_vector_space_distance;
673 typedef T ElementType;
674 typedef typename Accumulator<T>::Type ResultType;
677 * Compute the chi-square distance
679 template <typename Iterator1, typename Iterator2>
680 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
682 ResultType result = ResultType();
683 ResultType sum, diff;
684 Iterator1 last = a + size;
687 sum = (ResultType)(*a + *b);
689 diff = (ResultType)(*a - *b);
690 result += diff*diff/sum;
695 if ((worst_dist>0)&&(result>worst_dist)) {
703 * Partial distance, used by the kd-tree.
705 template <typename U, typename V>
706 inline ResultType accum_dist(const U& a, const V& b, int) const
708 ResultType result = ResultType();
709 ResultType sum, diff;
711 sum = (ResultType)(a+b);
713 diff = (ResultType)(a-b);
714 result = diff*diff/sum;
724 typedef True is_kdtree_distance;
725 typedef True is_vector_space_distance;
727 typedef T ElementType;
728 typedef typename Accumulator<T>::Type ResultType;
731 * Compute the Kullback–Leibler divergence
733 template <typename Iterator1, typename Iterator2>
734 ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
736 ResultType result = ResultType();
737 Iterator1 last = a + size;
741 ResultType ratio = (ResultType)(*a / *b);
743 result += *a * log(ratio);
749 if ((worst_dist>0)&&(result>worst_dist)) {
757 * Partial distance, used by the kd-tree.
759 template <typename U, typename V>
760 inline ResultType accum_dist(const U& a, const V& b, int) const
762 ResultType result = ResultType();
763 ResultType ratio = (ResultType)(a / b);
765 result = a * log(ratio);
774 * This is a "zero iterator". It basically behaves like a zero filled
775 * array to all algorithms that use arrays as iterators (STL style).
776 * It's useful when there's a need to compute the distance between feature
777 * and origin it and allows for better compiler optimisation than using a
780 template <typename T>
794 const ZeroIterator<T>& operator ++()
799 ZeroIterator<T> operator ++(int)
804 ZeroIterator<T>& operator+=(int)
813 #endif //OPENCV_FLANN_DIST_H_