1 /***********************************************************************
2 * Software License Agreement (BSD License)
4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *************************************************************************/
31 /***********************************************************************
32 * Author: Vincent Rabaud
33 *************************************************************************/
35 #ifndef OPENCV_FLANN_LSH_INDEX_H_
36 #define OPENCV_FLANN_LSH_INDEX_H_
47 #include "result_set.h"
49 #include "lsh_table.h"
50 #include "allocator.h"
57 struct LshIndexParams : public IndexParams
59 LshIndexParams(unsigned int table_number = 12, unsigned int key_size = 20, unsigned int multi_probe_level = 2)
61 (* this)["algorithm"] = FLANN_INDEX_LSH;
62 // The number of hash tables to use
63 (*this)["table_number"] = table_number;
64 // The length of the key in the hash tables
65 (*this)["key_size"] = key_size;
66 // Number of levels to use in multi-probe (0 for standard LSH)
67 (*this)["multi_probe_level"] = multi_probe_level;
72 * Randomized kd-tree index
74 * Contains the k-d trees and other information for indexing a set of points
75 * for nearest-neighbor matching.
77 template<typename Distance>
78 class LshIndex : public NNIndex<Distance>
81 typedef typename Distance::ElementType ElementType;
82 typedef typename Distance::ResultType DistanceType;
85 * @param input_data dataset with the input features
86 * @param params parameters passed to the LSH algorithm
87 * @param d the distance used
89 LshIndex(const Matrix<ElementType>& input_data, const IndexParams& params = LshIndexParams(),
90 Distance d = Distance()) :
91 dataset_(input_data), index_params_(params), distance_(d)
93 // cv::flann::IndexParams sets integer params as 'int', so it is used with get_param
94 // in place of 'unsigned int'
95 table_number_ = (unsigned int)get_param<int>(index_params_,"table_number",12);
96 key_size_ = (unsigned int)get_param<int>(index_params_,"key_size",20);
97 multi_probe_level_ = (unsigned int)get_param<int>(index_params_,"multi_probe_level",2);
99 feature_size_ = (unsigned)dataset_.cols;
100 fill_xor_mask(0, key_size_, multi_probe_level_, xor_masks_);
104 LshIndex(const LshIndex&);
105 LshIndex& operator=(const LshIndex&);
112 std::vector<size_t> indices(feature_size_ * CHAR_BIT);
114 tables_.resize(table_number_);
115 for (unsigned int i = 0; i < table_number_; ++i) {
117 //re-initialize the random indices table that the LshTable will use to pick its sub-dimensions
118 if( (indices.size() == feature_size_ * CHAR_BIT) || (indices.size() < key_size_) )
120 indices.resize( feature_size_ * CHAR_BIT );
121 for (size_t i = 0; i < feature_size_ * CHAR_BIT; ++i)
123 std::random_shuffle(indices.begin(), indices.end());
126 lsh::LshTable<ElementType>& table = tables_[i];
127 table = lsh::LshTable<ElementType>(feature_size_, key_size_, indices);
129 // Add the features to the table
134 flann_algorithm_t getType() const
136 return FLANN_INDEX_LSH;
140 void saveIndex(FILE* stream)
142 save_value(stream,table_number_);
143 save_value(stream,key_size_);
144 save_value(stream,multi_probe_level_);
145 save_value(stream, dataset_);
148 void loadIndex(FILE* stream)
150 load_value(stream, table_number_);
151 load_value(stream, key_size_);
152 load_value(stream, multi_probe_level_);
153 load_value(stream, dataset_);
154 // Building the index is so fast we can afford not storing it
157 index_params_["algorithm"] = getType();
158 index_params_["table_number"] = table_number_;
159 index_params_["key_size"] = key_size_;
160 index_params_["multi_probe_level"] = multi_probe_level_;
164 * Returns size of index.
168 return dataset_.rows;
172 * Returns the length of an index feature.
174 size_t veclen() const
176 return feature_size_;
180 * Computes the index memory usage
181 * Returns: memory used by the index
183 int usedMemory() const
185 return (int)(dataset_.rows * sizeof(int));
189 IndexParams getParameters() const
191 return index_params_;
195 * \brief Perform k-nearest neighbor search
196 * \param[in] queries The query points for which to find the nearest neighbors
197 * \param[out] indices The indices of the nearest neighbors found
198 * \param[out] dists Distances to the nearest neighbors found
199 * \param[in] knn Number of nearest neighbors to return
200 * \param[in] params Search parameters
202 virtual void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params)
204 assert(queries.cols == veclen());
205 assert(indices.rows >= queries.rows);
206 assert(dists.rows >= queries.rows);
207 assert(int(indices.cols) >= knn);
208 assert(int(dists.cols) >= knn);
211 KNNUniqueResultSet<DistanceType> resultSet(knn);
212 for (size_t i = 0; i < queries.rows; i++) {
214 std::fill_n(indices[i], knn, -1);
215 std::fill_n(dists[i], knn, std::numeric_limits<DistanceType>::max());
216 findNeighbors(resultSet, queries[i], params);
217 if (get_param(params,"sorted",true)) resultSet.sortAndCopy(indices[i], dists[i], knn);
218 else resultSet.copy(indices[i], dists[i], knn);
224 * Find set of nearest neighbors to vec. Their indices are stored inside
228 * result = the result object in which the indices of the nearest-neighbors are stored
229 * vec = the vector for which to search the nearest neighbors
230 * maxCheck = the maximum number of restarts (in a best-bin-first manner)
232 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& /*searchParams*/)
234 getNeighbors(vec, result);
238 /** Defines the comparator on score and index
240 typedef std::pair<float, unsigned int> ScoreIndexPair;
241 struct SortScoreIndexPairOnSecond
243 bool operator()(const ScoreIndexPair& left, const ScoreIndexPair& right) const
245 return left.second < right.second;
249 /** Fills the different xor masks to use when getting the neighbors in multi-probe LSH
250 * @param key the key we build neighbors from
251 * @param lowest_index the lowest index of the bit set
252 * @param level the multi-probe level we are at
253 * @param xor_masks all the xor mask
255 void fill_xor_mask(lsh::BucketKey key, int lowest_index, unsigned int level,
256 std::vector<lsh::BucketKey>& xor_masks)
258 xor_masks.push_back(key);
259 if (level == 0) return;
260 for (int index = lowest_index - 1; index >= 0; --index) {
262 lsh::BucketKey new_key = key | (1 << index);
263 fill_xor_mask(new_key, index, level - 1, xor_masks);
267 /** Performs the approximate nearest-neighbor search.
268 * @param vec the feature to analyze
269 * @param do_radius flag indicating if we check the radius too
270 * @param radius the radius if it is a radius search
271 * @param do_k flag indicating if we limit the number of nn
272 * @param k_nn the number of nearest neighbors
273 * @param checked_average used for debugging
275 void getNeighbors(const ElementType* vec, bool /*do_radius*/, float radius, bool do_k, unsigned int k_nn,
276 float& /*checked_average*/)
278 static std::vector<ScoreIndexPair> score_index_heap;
281 unsigned int worst_score = std::numeric_limits<unsigned int>::max();
282 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
283 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
284 for (; table != table_end; ++table) {
285 size_t key = table->getKey(vec);
286 std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
287 std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
288 for (; xor_mask != xor_mask_end; ++xor_mask) {
289 size_t sub_key = key ^ (*xor_mask);
290 const lsh::Bucket* bucket = table->getBucketFromKey(sub_key);
291 if (bucket == 0) continue;
293 // Go over each descriptor index
294 std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
295 std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
296 DistanceType hamming_distance;
298 // Process the rest of the candidates
299 for (; training_index < last_training_index; ++training_index) {
300 hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols);
302 if (hamming_distance < worst_score) {
303 // Insert the new element
304 score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index));
305 std::push_heap(score_index_heap.begin(), score_index_heap.end());
307 if (score_index_heap.size() > (unsigned int)k_nn) {
308 // Remove the highest distance value as we have too many elements
309 std::pop_heap(score_index_heap.begin(), score_index_heap.end());
310 score_index_heap.pop_back();
311 // Keep track of the worst score
312 worst_score = score_index_heap.front().first;
320 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
321 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
322 for (; table != table_end; ++table) {
323 size_t key = table->getKey(vec);
324 std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
325 std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
326 for (; xor_mask != xor_mask_end; ++xor_mask) {
327 size_t sub_key = key ^ (*xor_mask);
328 const lsh::Bucket* bucket = table->getBucketFromKey(sub_key);
329 if (bucket == 0) continue;
331 // Go over each descriptor index
332 std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
333 std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
334 DistanceType hamming_distance;
336 // Process the rest of the candidates
337 for (; training_index < last_training_index; ++training_index) {
338 // Compute the Hamming distance
339 hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols);
340 if (hamming_distance < radius) score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index));
347 /** Performs the approximate nearest-neighbor search.
348 * This is a slower version than the above as it uses the ResultSet
349 * @param vec the feature to analyze
351 void getNeighbors(const ElementType* vec, ResultSet<DistanceType>& result)
353 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
354 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
355 for (; table != table_end; ++table) {
356 size_t key = table->getKey(vec);
357 std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
358 std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
359 for (; xor_mask != xor_mask_end; ++xor_mask) {
360 size_t sub_key = key ^ (*xor_mask);
361 const lsh::Bucket* bucket = table->getBucketFromKey((lsh::BucketKey)sub_key);
362 if (bucket == 0) continue;
364 // Go over each descriptor index
365 std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
366 std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
367 DistanceType hamming_distance;
369 // Process the rest of the candidates
370 for (; training_index < last_training_index; ++training_index) {
371 // Compute the Hamming distance
372 hamming_distance = distance_(vec, dataset_[*training_index], (int)dataset_.cols);
373 result.addPoint(hamming_distance, *training_index);
379 /** The different hash tables */
380 std::vector<lsh::LshTable<ElementType> > tables_;
382 /** The data the LSH tables where built from */
383 Matrix<ElementType> dataset_;
385 /** The size of the features (as ElementType[]) */
386 unsigned int feature_size_;
388 IndexParams index_params_;
391 unsigned int table_number_;
393 unsigned int key_size_;
394 /** How far should we look for neighbors in multi-probe LSH */
395 unsigned int multi_probe_level_;
397 /** The XOR masks to apply to a key to get the neighboring buckets */
398 std::vector<lsh::BucketKey> xor_masks_;
404 #endif //OPENCV_FLANN_LSH_INDEX_H_