1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
54 * \brief Get the label [0,8) of the single bit set in quantized.
56 static inline int getLabel(int quantized)
69 CV_Error(CV_StsBadArg, "Invalid value of quantized parameter");
70 return -1; //avoid warning
74 void Feature::read(const FileNode& fn)
76 FileNodeIterator fni = fn.begin();
77 fni >> x >> y >> label;
80 void Feature::write(FileStorage& fs) const
82 fs << "[:" << x << y << label << "]";
88 * \brief Crop a set of overlapping templates from different modalities.
90 * \param[in,out] templates Set of templates representing the same object view.
92 * \return The bounding box of all the templates in original image coordinates.
94 static Rect cropTemplates(std::vector<Template>& templates)
96 int min_x = std::numeric_limits<int>::max();
97 int min_y = std::numeric_limits<int>::max();
98 int max_x = std::numeric_limits<int>::min();
99 int max_y = std::numeric_limits<int>::min();
101 // First pass: find min/max feature x,y over all pyramid levels and modalities
102 for (int i = 0; i < (int)templates.size(); ++i)
104 Template& templ = templates[i];
106 for (int j = 0; j < (int)templ.features.size(); ++j)
108 int x = templ.features[j].x << templ.pyramid_level;
109 int y = templ.features[j].y << templ.pyramid_level;
110 min_x = std::min(min_x, x);
111 min_y = std::min(min_y, y);
112 max_x = std::max(max_x, x);
113 max_y = std::max(max_y, y);
117 /// @todo Why require even min_x, min_y?
118 if (min_x % 2 == 1) --min_x;
119 if (min_y % 2 == 1) --min_y;
121 // Second pass: set width/height and shift all feature positions
122 for (int i = 0; i < (int)templates.size(); ++i)
124 Template& templ = templates[i];
125 templ.width = (max_x - min_x) >> templ.pyramid_level;
126 templ.height = (max_y - min_y) >> templ.pyramid_level;
127 int offset_x = min_x >> templ.pyramid_level;
128 int offset_y = min_y >> templ.pyramid_level;
130 for (int j = 0; j < (int)templ.features.size(); ++j)
132 templ.features[j].x -= offset_x;
133 templ.features[j].y -= offset_y;
137 return Rect(min_x, min_y, max_x - min_x, max_y - min_y);
140 void Template::read(const FileNode& fn)
143 height = fn["height"];
144 pyramid_level = fn["pyramid_level"];
146 FileNode features_fn = fn["features"];
147 features.resize(features_fn.size());
148 FileNodeIterator it = features_fn.begin(), it_end = features_fn.end();
149 for (int i = 0; it != it_end; ++it, ++i)
151 features[i].read(*it);
155 void Template::write(FileStorage& fs) const
157 fs << "width" << width;
158 fs << "height" << height;
159 fs << "pyramid_level" << pyramid_level;
161 fs << "features" << "[";
162 for (int i = 0; i < (int)features.size(); ++i)
164 features[i].write(fs);
166 fs << "]"; // features
169 /****************************************************************************************\
170 * Modality interfaces *
171 \****************************************************************************************/
173 void QuantizedPyramid::selectScatteredFeatures(const std::vector<Candidate>& candidates,
174 std::vector<Feature>& features,
175 size_t num_features, float distance)
178 float distance_sq = CV_SQR(distance);
180 while (features.size() < num_features)
182 Candidate c = candidates[i];
184 // Add if sufficient distance away from any previously chosen feature
186 for (int j = 0; (j < (int)features.size()) && keep; ++j)
188 Feature f = features[j];
189 keep = CV_SQR(c.f.x - f.x) + CV_SQR(c.f.y - f.y) >= distance_sq;
192 features.push_back(c.f);
194 if (++i == (int)candidates.size())
196 // Start back at beginning, and relax required distance
199 distance_sq = CV_SQR(distance);
204 Ptr<Modality> Modality::create(const std::string& modality_type)
206 if (modality_type == "ColorGradient")
207 return new ColorGradient();
208 else if (modality_type == "DepthNormal")
209 return new DepthNormal();
214 Ptr<Modality> Modality::create(const FileNode& fn)
216 std::string type = fn["type"];
217 Ptr<Modality> modality = create(type);
222 void colormap(const Mat& quantized, Mat& dst)
224 std::vector<Vec3b> lut(8);
225 lut[0] = Vec3b( 0, 0, 255);
226 lut[1] = Vec3b( 0, 170, 255);
227 lut[2] = Vec3b( 0, 255, 170);
228 lut[3] = Vec3b( 0, 255, 0);
229 lut[4] = Vec3b(170, 255, 0);
230 lut[5] = Vec3b(255, 170, 0);
231 lut[6] = Vec3b(255, 0, 0);
232 lut[7] = Vec3b(255, 0, 170);
234 dst = Mat::zeros(quantized.size(), CV_8UC3);
235 for (int r = 0; r < dst.rows; ++r)
237 const uchar* quant_r = quantized.ptr(r);
238 Vec3b* dst_r = dst.ptr<Vec3b>(r);
239 for (int c = 0; c < dst.cols; ++c)
241 uchar q = quant_r[c];
243 dst_r[c] = lut[getLabel(q)];
248 /****************************************************************************************\
249 * Color gradient modality *
250 \****************************************************************************************/
252 // Forward declaration
253 void hysteresisGradient(Mat& magnitude, Mat& angle,
254 Mat& ap_tmp, float threshold);
257 * \brief Compute quantized orientation image from color image.
259 * Implements section 2.2 "Computing the Gradient Orientations."
261 * \param[in] src The source 8-bit, 3-channel image.
262 * \param[out] magnitude Destination floating-point array of squared magnitudes.
263 * \param[out] angle Destination 8-bit array of orientations. Each bit
264 * represents one bin of the orientation space.
265 * \param threshold Magnitude threshold. Keep only gradients whose norms are
268 static void quantizedOrientations(const Mat& src, Mat& magnitude,
269 Mat& angle, float threshold)
271 magnitude.create(src.size(), CV_32F);
273 // Allocate temporary buffers
274 Size size = src.size();
275 Mat sobel_3dx; // per-channel horizontal derivative
276 Mat sobel_3dy; // per-channel vertical derivative
277 Mat sobel_dx(size, CV_32F); // maximum horizontal derivative
278 Mat sobel_dy(size, CV_32F); // maximum vertical derivative
279 Mat sobel_ag; // final gradient orientation (unquantized)
282 // Compute horizontal and vertical image derivatives on all color channels separately
283 static const int KERNEL_SIZE = 7;
284 // For some reason cvSmooth/cv::GaussianBlur, cvSobel/cv::Sobel have different defaults for border handling...
285 GaussianBlur(src, smoothed, Size(KERNEL_SIZE, KERNEL_SIZE), 0, 0, BORDER_REPLICATE);
286 Sobel(smoothed, sobel_3dx, CV_16S, 1, 0, 3, 1.0, 0.0, BORDER_REPLICATE);
287 Sobel(smoothed, sobel_3dy, CV_16S, 0, 1, 3, 1.0, 0.0, BORDER_REPLICATE);
289 short * ptrx = (short *)sobel_3dx.data;
290 short * ptry = (short *)sobel_3dy.data;
291 float * ptr0x = (float *)sobel_dx.data;
292 float * ptr0y = (float *)sobel_dy.data;
293 float * ptrmg = (float *)magnitude.data;
295 const int length1 = static_cast<const int>(sobel_3dx.step1());
296 const int length2 = static_cast<const int>(sobel_3dy.step1());
297 const int length3 = static_cast<const int>(sobel_dx.step1());
298 const int length4 = static_cast<const int>(sobel_dy.step1());
299 const int length5 = static_cast<const int>(magnitude.step1());
300 const int length0 = sobel_3dy.cols * 3;
302 for (int r = 0; r < sobel_3dy.rows; ++r)
306 for (int i = 0; i < length0; i += 3)
308 // Use the gradient orientation of the channel whose magnitude is largest
309 int mag1 = CV_SQR(ptrx[i]) + CV_SQR(ptry[i]);
310 int mag2 = CV_SQR(ptrx[i + 1]) + CV_SQR(ptry[i + 1]);
311 int mag3 = CV_SQR(ptrx[i + 2]) + CV_SQR(ptry[i + 2]);
313 if (mag1 >= mag2 && mag1 >= mag3)
315 ptr0x[ind] = ptrx[i];
316 ptr0y[ind] = ptry[i];
317 ptrmg[ind] = (float)mag1;
319 else if (mag2 >= mag1 && mag2 >= mag3)
321 ptr0x[ind] = ptrx[i + 1];
322 ptr0y[ind] = ptry[i + 1];
323 ptrmg[ind] = (float)mag2;
327 ptr0x[ind] = ptrx[i + 2];
328 ptr0y[ind] = ptry[i + 2];
329 ptrmg[ind] = (float)mag3;
340 // Calculate the final gradient orientations
341 phase(sobel_dx, sobel_dy, sobel_ag, true);
342 hysteresisGradient(magnitude, angle, sobel_ag, CV_SQR(threshold));
345 void hysteresisGradient(Mat& magnitude, Mat& quantized_angle,
346 Mat& angle, float threshold)
348 // Quantize 360 degree range of orientations into 16 buckets
349 // Note that [0, 11.25), [348.75, 360) both get mapped in the end to label 0,
350 // for stability of horizontal and vertical features.
351 Mat_<unsigned char> quantized_unfiltered;
352 angle.convertTo(quantized_unfiltered, CV_8U, 16.0 / 360.0);
354 // Zero out top and bottom rows
355 /// @todo is this necessary, or even correct?
356 memset(quantized_unfiltered.ptr(), 0, quantized_unfiltered.cols);
357 memset(quantized_unfiltered.ptr(quantized_unfiltered.rows - 1), 0, quantized_unfiltered.cols);
358 // Zero out first and last columns
359 for (int r = 0; r < quantized_unfiltered.rows; ++r)
361 quantized_unfiltered(r, 0) = 0;
362 quantized_unfiltered(r, quantized_unfiltered.cols - 1) = 0;
365 // Mask 16 buckets into 8 quantized orientations
366 for (int r = 1; r < angle.rows - 1; ++r)
368 uchar* quant_r = quantized_unfiltered.ptr<uchar>(r);
369 for (int c = 1; c < angle.cols - 1; ++c)
375 // Filter the raw quantized image. Only accept pixels where the magnitude is above some
376 // threshold, and there is local agreement on the quantization.
377 quantized_angle = Mat::zeros(angle.size(), CV_8U);
378 for (int r = 1; r < angle.rows - 1; ++r)
380 float* mag_r = magnitude.ptr<float>(r);
382 for (int c = 1; c < angle.cols - 1; ++c)
384 if (mag_r[c] > threshold)
386 // Compute histogram of quantized bins in 3x3 patch around pixel
387 int histogram[8] = {0, 0, 0, 0, 0, 0, 0, 0};
389 uchar* patch3x3_row = &quantized_unfiltered(r-1, c-1);
390 histogram[patch3x3_row[0]]++;
391 histogram[patch3x3_row[1]]++;
392 histogram[patch3x3_row[2]]++;
394 patch3x3_row += quantized_unfiltered.step1();
395 histogram[patch3x3_row[0]]++;
396 histogram[patch3x3_row[1]]++;
397 histogram[patch3x3_row[2]]++;
399 patch3x3_row += quantized_unfiltered.step1();
400 histogram[patch3x3_row[0]]++;
401 histogram[patch3x3_row[1]]++;
402 histogram[patch3x3_row[2]]++;
404 // Find bin with the most votes from the patch
407 for (int i = 0; i < 8; ++i)
409 if (max_votes < histogram[i])
412 max_votes = histogram[i];
416 // Only accept the quantization if majority of pixels in the patch agree
417 static const int NEIGHBOR_THRESHOLD = 5;
418 if (max_votes >= NEIGHBOR_THRESHOLD)
419 quantized_angle.at<uchar>(r, c) = uchar(1 << index);
425 class ColorGradientPyramid : public QuantizedPyramid
428 ColorGradientPyramid(const Mat& src, const Mat& mask,
429 float weak_threshold, size_t num_features,
430 float strong_threshold);
432 virtual void quantize(Mat& dst) const;
434 virtual bool extractTemplate(Template& templ) const;
436 virtual void pyrDown();
439 /// Recalculate angle and magnitude images
449 float weak_threshold;
451 float strong_threshold;
454 ColorGradientPyramid::ColorGradientPyramid(const Mat& _src, const Mat& _mask,
455 float _weak_threshold, size_t _num_features,
456 float _strong_threshold)
460 weak_threshold(_weak_threshold),
461 num_features(_num_features),
462 strong_threshold(_strong_threshold)
467 void ColorGradientPyramid::update()
469 quantizedOrientations(src, magnitude, angle, weak_threshold);
472 void ColorGradientPyramid::pyrDown()
474 // Some parameters need to be adjusted
475 num_features /= 2; /// @todo Why not 4?
478 // Downsample the current inputs
479 Size size(src.cols / 2, src.rows / 2);
481 cv::pyrDown(src, next_src, size);
486 resize(mask, next_mask, size, 0.0, 0.0, CV_INTER_NN);
493 void ColorGradientPyramid::quantize(Mat& dst) const
495 dst = Mat::zeros(angle.size(), CV_8U);
496 angle.copyTo(dst, mask);
499 bool ColorGradientPyramid::extractTemplate(Template& templ) const
501 // Want features on the border to distinguish from background
505 erode(mask, local_mask, Mat(), Point(-1,-1), 1, BORDER_REPLICATE);
506 subtract(mask, local_mask, local_mask);
509 // Create sorted list of all pixels with magnitude greater than a threshold
510 std::vector<Candidate> candidates;
511 bool no_mask = local_mask.empty();
512 float threshold_sq = CV_SQR(strong_threshold);
513 for (int r = 0; r < magnitude.rows; ++r)
515 const uchar* angle_r = angle.ptr<uchar>(r);
516 const float* magnitude_r = magnitude.ptr<float>(r);
517 const uchar* mask_r = no_mask ? NULL : local_mask.ptr<uchar>(r);
519 for (int c = 0; c < magnitude.cols; ++c)
521 if (no_mask || mask_r[c])
523 uchar quantized = angle_r[c];
526 float score = magnitude_r[c];
527 if (score > threshold_sq)
529 candidates.push_back(Candidate(c, r, getLabel(quantized), score));
535 // We require a certain number of features
536 if (candidates.size() < num_features)
538 // NOTE: Stable sort to agree with old code, which used std::list::sort()
539 std::stable_sort(candidates.begin(), candidates.end());
541 // Use heuristic based on surplus of candidates in narrow outline for initial distance threshold
542 float distance = static_cast<float>(candidates.size() / num_features + 1);
543 selectScatteredFeatures(candidates, templ.features, num_features, distance);
545 // Size determined externally, needs to match templates for other modalities
548 templ.pyramid_level = pyramid_level;
553 ColorGradient::ColorGradient()
554 : weak_threshold(10.0f),
556 strong_threshold(55.0f)
560 ColorGradient::ColorGradient(float _weak_threshold, size_t _num_features, float _strong_threshold)
561 : weak_threshold(_weak_threshold),
562 num_features(_num_features),
563 strong_threshold(_strong_threshold)
567 static const char CG_NAME[] = "ColorGradient";
569 std::string ColorGradient::name() const
574 Ptr<QuantizedPyramid> ColorGradient::processImpl(const Mat& src,
575 const Mat& mask) const
577 return new ColorGradientPyramid(src, mask, weak_threshold, num_features, strong_threshold);
580 void ColorGradient::read(const FileNode& fn)
582 std::string type = fn["type"];
583 CV_Assert(type == CG_NAME);
585 weak_threshold = fn["weak_threshold"];
586 num_features = int(fn["num_features"]);
587 strong_threshold = fn["strong_threshold"];
590 void ColorGradient::write(FileStorage& fs) const
592 fs << "type" << CG_NAME;
593 fs << "weak_threshold" << weak_threshold;
594 fs << "num_features" << int(num_features);
595 fs << "strong_threshold" << strong_threshold;
598 /****************************************************************************************\
599 * Depth normal modality *
600 \****************************************************************************************/
602 // Contains GRANULARITY and NORMAL_LUT
603 #include "normal_lut.i"
605 static void accumBilateral(long delta, long i, long j, long * A, long * b, int threshold)
607 long f = std::abs(delta) < threshold ? 1 : 0;
609 const long fi = f * i;
610 const long fj = f * j;
620 * \brief Compute quantized normal image from depth image.
622 * Implements section 2.6 "Extension to Dense Depth Sensors."
624 * \param[in] src The source 16-bit depth image (in mm).
625 * \param[out] dst The destination 8-bit image. Each bit represents one bin of
627 * \param distance_threshold Ignore pixels beyond this distance.
628 * \param difference_threshold When computing normals, ignore contributions of pixels whose
629 * depth difference with the central pixel is above this threshold.
631 * \todo Should also need camera model, or at least focal lengths? Replace distance_threshold with mask?
633 static void quantizedNormals(const Mat& src, Mat& dst, int distance_threshold,
634 int difference_threshold)
636 dst = Mat::zeros(src.size(), CV_8U);
638 IplImage src_ipl = src;
639 IplImage* ap_depth_data = &src_ipl;
640 IplImage dst_ipl = dst;
641 IplImage* dst_ipl_ptr = &dst_ipl;
642 IplImage** m_dep = &dst_ipl_ptr;
644 unsigned short * lp_depth = (unsigned short *)ap_depth_data->imageData;
645 unsigned char * lp_normals = (unsigned char *)m_dep[0]->imageData;
647 const int l_W = ap_depth_data->width;
648 const int l_H = ap_depth_data->height;
650 const int l_r = 5; // used to be 7
651 const int l_offset0 = -l_r - l_r * l_W;
652 const int l_offset1 = 0 - l_r * l_W;
653 const int l_offset2 = +l_r - l_r * l_W;
654 const int l_offset3 = -l_r;
655 const int l_offset4 = +l_r;
656 const int l_offset5 = -l_r + l_r * l_W;
657 const int l_offset6 = 0 + l_r * l_W;
658 const int l_offset7 = +l_r + l_r * l_W;
660 const int l_offsetx = GRANULARITY / 2;
661 const int l_offsety = GRANULARITY / 2;
663 for (int l_y = l_r; l_y < l_H - l_r - 1; ++l_y)
665 unsigned short * lp_line = lp_depth + (l_y * l_W + l_r);
666 unsigned char * lp_norm = lp_normals + (l_y * l_W + l_r);
668 for (int l_x = l_r; l_x < l_W - l_r - 1; ++l_x)
670 long l_d = lp_line[0];
672 if (l_d < distance_threshold)
675 long l_A[4]; l_A[0] = l_A[1] = l_A[2] = l_A[3] = 0;
676 long l_b[2]; l_b[0] = l_b[1] = 0;
677 accumBilateral(lp_line[l_offset0] - l_d, -l_r, -l_r, l_A, l_b, difference_threshold);
678 accumBilateral(lp_line[l_offset1] - l_d, 0, -l_r, l_A, l_b, difference_threshold);
679 accumBilateral(lp_line[l_offset2] - l_d, +l_r, -l_r, l_A, l_b, difference_threshold);
680 accumBilateral(lp_line[l_offset3] - l_d, -l_r, 0, l_A, l_b, difference_threshold);
681 accumBilateral(lp_line[l_offset4] - l_d, +l_r, 0, l_A, l_b, difference_threshold);
682 accumBilateral(lp_line[l_offset5] - l_d, -l_r, +l_r, l_A, l_b, difference_threshold);
683 accumBilateral(lp_line[l_offset6] - l_d, 0, +l_r, l_A, l_b, difference_threshold);
684 accumBilateral(lp_line[l_offset7] - l_d, +l_r, +l_r, l_A, l_b, difference_threshold);
687 long l_det = l_A[0] * l_A[3] - l_A[1] * l_A[1];
688 long l_ddx = l_A[3] * l_b[0] - l_A[1] * l_b[1];
689 long l_ddy = -l_A[1] * l_b[0] + l_A[0] * l_b[1];
691 /// @todo Magic number 1150 is focal length? This is something like
692 /// f in SXGA mode, but in VGA is more like 530.
693 float l_nx = static_cast<float>(1150 * l_ddx);
694 float l_ny = static_cast<float>(1150 * l_ddy);
695 float l_nz = static_cast<float>(-l_det * l_d);
697 float l_sqrt = sqrtf(l_nx * l_nx + l_ny * l_ny + l_nz * l_nz);
701 float l_norminv = 1.0f / (l_sqrt);
707 //*lp_norm = fabs(l_nz)*255;
709 int l_val1 = static_cast<int>(l_nx * l_offsetx + l_offsetx);
710 int l_val2 = static_cast<int>(l_ny * l_offsety + l_offsety);
711 int l_val3 = static_cast<int>(l_nz * GRANULARITY + GRANULARITY);
713 *lp_norm = NORMAL_LUT[l_val3][l_val2][l_val1];
717 *lp_norm = 0; // Discard shadows from depth sensor
722 *lp_norm = 0; //out of depth
728 cvSmooth(m_dep[0], m_dep[0], CV_MEDIAN, 5, 5);
731 class DepthNormalPyramid : public QuantizedPyramid
734 DepthNormalPyramid(const Mat& src, const Mat& mask,
735 int distance_threshold, int difference_threshold, size_t num_features,
736 int extract_threshold);
738 virtual void quantize(Mat& dst) const;
740 virtual bool extractTemplate(Template& templ) const;
742 virtual void pyrDown();
751 int extract_threshold;
754 DepthNormalPyramid::DepthNormalPyramid(const Mat& src, const Mat& _mask,
755 int distance_threshold, int difference_threshold, size_t _num_features,
756 int _extract_threshold)
759 num_features(_num_features),
760 extract_threshold(_extract_threshold)
762 quantizedNormals(src, normal, distance_threshold, difference_threshold);
765 void DepthNormalPyramid::pyrDown()
767 // Some parameters need to be adjusted
768 num_features /= 2; /// @todo Why not 4?
769 extract_threshold /= 2;
772 // In this case, NN-downsample the quantized image
774 Size size(normal.cols / 2, normal.rows / 2);
775 resize(normal, next_normal, size, 0.0, 0.0, CV_INTER_NN);
776 normal = next_normal;
780 resize(mask, next_mask, size, 0.0, 0.0, CV_INTER_NN);
785 void DepthNormalPyramid::quantize(Mat& dst) const
787 dst = Mat::zeros(normal.size(), CV_8U);
788 normal.copyTo(dst, mask);
791 bool DepthNormalPyramid::extractTemplate(Template& templ) const
793 // Features right on the object border are unreliable
797 erode(mask, local_mask, Mat(), Point(-1,-1), 2, BORDER_REPLICATE);
800 // Compute distance transform for each individual quantized orientation
801 Mat temp = Mat::zeros(normal.size(), CV_8U);
803 for (int i = 0; i < 8; ++i)
805 temp.setTo(1 << i, local_mask);
806 bitwise_and(temp, normal, temp);
807 // temp is now non-zero at pixels in the mask with quantized orientation i
808 distanceTransform(temp, distances[i], CV_DIST_C, 3);
811 // Count how many features taken for each label
812 int label_counts[8] = {0, 0, 0, 0, 0, 0, 0, 0};
814 // Create sorted list of candidate features
815 std::vector<Candidate> candidates;
816 bool no_mask = local_mask.empty();
817 for (int r = 0; r < normal.rows; ++r)
819 const uchar* normal_r = normal.ptr<uchar>(r);
820 const uchar* mask_r = no_mask ? NULL : local_mask.ptr<uchar>(r);
822 for (int c = 0; c < normal.cols; ++c)
824 if (no_mask || mask_r[c])
826 uchar quantized = normal_r[c];
828 if (quantized != 0 && quantized != 255) // background and shadow
830 int label = getLabel(quantized);
832 // Accept if distance to a pixel belonging to a different label is greater than
833 // some threshold. IOW, ideal feature is in the center of a large homogeneous
835 float score = distances[label].at<float>(r, c);
836 if (score >= extract_threshold)
838 candidates.push_back( Candidate(c, r, label, score) );
839 ++label_counts[label];
845 // We require a certain number of features
846 if (candidates.size() < num_features)
849 // Prefer large distances, but also want to collect features over all 8 labels.
850 // So penalize labels with lots of candidates.
851 for (size_t i = 0; i < candidates.size(); ++i)
853 Candidate& c = candidates[i];
854 c.score /= (float)label_counts[c.f.label];
856 std::stable_sort(candidates.begin(), candidates.end());
858 // Use heuristic based on object area for initial distance threshold
859 float area = no_mask ? (float)normal.total() : (float)countNonZero(local_mask);
860 float distance = sqrtf(area) / sqrtf((float)num_features) + 1.5f;
861 selectScatteredFeatures(candidates, templ.features, num_features, distance);
863 // Size determined externally, needs to match templates for other modalities
866 templ.pyramid_level = pyramid_level;
871 DepthNormal::DepthNormal()
872 : distance_threshold(2000),
873 difference_threshold(50),
879 DepthNormal::DepthNormal(int _distance_threshold, int _difference_threshold, size_t _num_features,
880 int _extract_threshold)
881 : distance_threshold(_distance_threshold),
882 difference_threshold(_difference_threshold),
883 num_features(_num_features),
884 extract_threshold(_extract_threshold)
888 static const char DN_NAME[] = "DepthNormal";
890 std::string DepthNormal::name() const
895 Ptr<QuantizedPyramid> DepthNormal::processImpl(const Mat& src,
896 const Mat& mask) const
898 return new DepthNormalPyramid(src, mask, distance_threshold, difference_threshold,
899 num_features, extract_threshold);
902 void DepthNormal::read(const FileNode& fn)
904 std::string type = fn["type"];
905 CV_Assert(type == DN_NAME);
907 distance_threshold = fn["distance_threshold"];
908 difference_threshold = fn["difference_threshold"];
909 num_features = int(fn["num_features"]);
910 extract_threshold = fn["extract_threshold"];
913 void DepthNormal::write(FileStorage& fs) const
915 fs << "type" << DN_NAME;
916 fs << "distance_threshold" << distance_threshold;
917 fs << "difference_threshold" << difference_threshold;
918 fs << "num_features" << int(num_features);
919 fs << "extract_threshold" << extract_threshold;
922 /****************************************************************************************\
924 \****************************************************************************************/
926 static void orUnaligned8u(const uchar * src, const int src_stride,
927 uchar * dst, const int dst_stride,
928 const int width, const int height)
931 volatile bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
933 volatile bool haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
935 bool src_aligned = reinterpret_cast<unsigned long long>(src) % 16 == 0;
938 for (int r = 0; r < height; ++r)
943 // Use aligned loads if possible
944 if (haveSSE2 && src_aligned)
946 for ( ; c < width - 15; c += 16)
948 const __m128i* src_ptr = reinterpret_cast<const __m128i*>(src + c);
949 __m128i* dst_ptr = reinterpret_cast<__m128i*>(dst + c);
950 *dst_ptr = _mm_or_si128(*dst_ptr, *src_ptr);
954 // Use LDDQU for fast unaligned load
957 for ( ; c < width - 15; c += 16)
959 __m128i val = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(src + c));
960 __m128i* dst_ptr = reinterpret_cast<__m128i*>(dst + c);
961 *dst_ptr = _mm_or_si128(*dst_ptr, val);
965 // Fall back to MOVDQU
968 for ( ; c < width - 15; c += 16)
970 __m128i val = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src + c));
971 __m128i* dst_ptr = reinterpret_cast<__m128i*>(dst + c);
972 *dst_ptr = _mm_or_si128(*dst_ptr, val);
976 for ( ; c < width; ++c)
979 // Advance to next row
986 * \brief Spread binary labels in a quantized image.
988 * Implements section 2.3 "Spreading the Orientations."
990 * \param[in] src The source 8-bit quantized image.
991 * \param[out] dst Destination 8-bit spread image.
992 * \param T Sampling step. Spread labels T/2 pixels in each direction.
994 static void spread(const Mat& src, Mat& dst, int T)
996 // Allocate and zero-initialize spread (OR'ed) image
997 dst = Mat::zeros(src.size(), CV_8U);
999 // Fill in spread gradient image (section 2.3)
1000 for (int r = 0; r < T; ++r)
1002 int height = src.rows - r;
1003 for (int c = 0; c < T; ++c)
1005 orUnaligned8u(&src.at<unsigned char>(r, c), static_cast<const int>(src.step1()), dst.ptr(),
1006 static_cast<const int>(dst.step1()), src.cols - c, height);
1011 // Auto-generated by create_similarity_lut.py
1012 CV_DECL_ALIGNED(16) static const unsigned char SIMILARITY_LUT[256] = {0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 4, 4, 3, 3, 4, 4, 2, 3, 4, 4, 3, 3, 4, 4, 0, 1, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 0, 2, 1, 2, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 0, 3, 2, 3, 1, 3, 2, 3, 0, 3, 2, 3, 1, 3, 2, 3, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, 0, 1, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 3, 4, 4, 3, 3, 4, 4, 2, 3, 4, 4, 3, 3, 4, 4, 0, 2, 1, 2, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 0, 2, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 0, 3, 2, 3, 1, 3, 2, 3, 0, 3, 2, 3, 1, 3, 2, 3, 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4};
1015 * \brief Precompute response maps for a spread quantized image.
1017 * Implements section 2.4 "Precomputing Response Maps."
1019 * \param[in] src The source 8-bit spread quantized image.
1020 * \param[out] response_maps Vector of 8 response maps, one for each bit label.
1022 static void computeResponseMaps(const Mat& src, std::vector<Mat>& response_maps)
1024 CV_Assert((src.rows * src.cols) % 16 == 0);
1026 // Allocate response maps
1027 response_maps.resize(8);
1028 for (int i = 0; i < 8; ++i)
1029 response_maps[i].create(src.size(), CV_8U);
1031 Mat lsb4(src.size(), CV_8U);
1032 Mat msb4(src.size(), CV_8U);
1034 for (int r = 0; r < src.rows; ++r)
1036 const uchar* src_r = src.ptr(r);
1037 uchar* lsb4_r = lsb4.ptr(r);
1038 uchar* msb4_r = msb4.ptr(r);
1040 for (int c = 0; c < src.cols; ++c)
1042 // Least significant 4 bits of spread image pixel
1043 lsb4_r[c] = src_r[c] & 15;
1044 // Most significant 4 bits, right-shifted to be in [0, 16)
1045 msb4_r[c] = (src_r[c] & 240) >> 4;
1050 volatile bool haveSSSE3 = checkHardwareSupport(CV_CPU_SSSE3);
1053 const __m128i* lut = reinterpret_cast<const __m128i*>(SIMILARITY_LUT);
1054 for (int ori = 0; ori < 8; ++ori)
1056 __m128i* map_data = response_maps[ori].ptr<__m128i>();
1057 __m128i* lsb4_data = lsb4.ptr<__m128i>();
1058 __m128i* msb4_data = msb4.ptr<__m128i>();
1060 // Precompute the 2D response map S_i (section 2.4)
1061 for (int i = 0; i < (src.rows * src.cols) / 16; ++i)
1063 // Using SSE shuffle for table lookup on 4 orientations at a time
1064 // The most/least significant 4 bits are used as the LUT index
1065 __m128i res1 = _mm_shuffle_epi8(lut[2*ori + 0], lsb4_data[i]);
1066 __m128i res2 = _mm_shuffle_epi8(lut[2*ori + 1], msb4_data[i]);
1068 // Combine the results into a single similarity score
1069 map_data[i] = _mm_max_epu8(res1, res2);
1076 // For each of the 8 quantized orientations...
1077 for (int ori = 0; ori < 8; ++ori)
1079 uchar* map_data = response_maps[ori].ptr<uchar>();
1080 uchar* lsb4_data = lsb4.ptr<uchar>();
1081 uchar* msb4_data = msb4.ptr<uchar>();
1082 const uchar* lut_low = SIMILARITY_LUT + 32*ori;
1083 const uchar* lut_hi = lut_low + 16;
1085 for (int i = 0; i < src.rows * src.cols; ++i)
1087 map_data[i] = std::max(lut_low[ lsb4_data[i] ], lut_hi[ msb4_data[i] ]);
1094 * \brief Convert a response map to fast linearized ordering.
1096 * Implements section 2.5 "Linearizing the Memory for Parallelization."
1098 * \param[in] response_map The 2D response map, an 8-bit image.
1099 * \param[out] linearized The response map in linearized order. It has T*T rows,
1100 * each of which is a linear memory of length (W/T)*(H/T).
1101 * \param T Sampling step.
1103 static void linearize(const Mat& response_map, Mat& linearized, int T)
1105 CV_Assert(response_map.rows % T == 0);
1106 CV_Assert(response_map.cols % T == 0);
1108 // linearized has T^2 rows, where each row is a linear memory
1109 int mem_width = response_map.cols / T;
1110 int mem_height = response_map.rows / T;
1111 linearized.create(T*T, mem_width * mem_height, CV_8U);
1113 // Outer two for loops iterate over top-left T^2 starting pixels
1115 for (int r_start = 0; r_start < T; ++r_start)
1117 for (int c_start = 0; c_start < T; ++c_start)
1119 uchar* memory = linearized.ptr(index);
1122 // Inner two loops copy every T-th pixel into the linear memory
1123 for (int r = r_start; r < response_map.rows; r += T)
1125 const uchar* response_data = response_map.ptr(r);
1126 for (int c = c_start; c < response_map.cols; c += T)
1127 *memory++ = response_data[c];
1133 /****************************************************************************************\
1134 * Linearized similarities *
1135 \****************************************************************************************/
1137 static const unsigned char* accessLinearMemory(const std::vector<Mat>& linear_memories,
1138 const Feature& f, int T, int W)
1140 // Retrieve the TxT grid of linear memories associated with the feature label
1141 const Mat& memory_grid = linear_memories[f.label];
1142 CV_DbgAssert(memory_grid.rows == T*T);
1143 CV_DbgAssert(f.x >= 0);
1144 CV_DbgAssert(f.y >= 0);
1145 // The LM we want is at (x%T, y%T) in the TxT grid (stored as the rows of memory_grid)
1146 int grid_x = f.x % T;
1147 int grid_y = f.y % T;
1148 int grid_index = grid_y * T + grid_x;
1149 CV_DbgAssert(grid_index >= 0);
1150 CV_DbgAssert(grid_index < memory_grid.rows);
1151 const unsigned char* memory = memory_grid.ptr(grid_index);
1152 // Within the LM, the feature is at (x/T, y/T). W is the "width" of the LM, the
1153 // input image width decimated by T.
1156 int lm_index = lm_y * W + lm_x;
1157 CV_DbgAssert(lm_index >= 0);
1158 CV_DbgAssert(lm_index < memory_grid.cols);
1159 return memory + lm_index;
1163 * \brief Compute similarity measure for a given template at each sampled image location.
1165 * Uses linear memories to compute the similarity measure as described in Fig. 7.
1167 * \param[in] linear_memories Vector of 8 linear memories, one for each label.
1168 * \param[in] templ Template to match against.
1169 * \param[out] dst Destination 8-bit similarity image of size (W/T, H/T).
1170 * \param size Size (W, H) of the original input image.
1171 * \param T Sampling step.
1173 static void similarity(const std::vector<Mat>& linear_memories, const Template& templ,
1174 Mat& dst, Size size, int T)
1176 // 63 features or less is a special case because the max similarity per-feature is 4.
1177 // 255/4 = 63, so up to that many we can add up similarities in 8 bits without worrying
1178 // about overflow. Therefore here we use _mm_add_epi8 as the workhorse, whereas a more
1179 // general function would use _mm_add_epi16.
1180 CV_Assert(templ.features.size() <= 63);
1181 /// @todo Handle more than 255/MAX_RESPONSE features!!
1183 // Decimate input image size by factor of T
1184 int W = size.width / T;
1185 int H = size.height / T;
1187 // Feature dimensions, decimated by factor T and rounded up
1188 int wf = (templ.width - 1) / T + 1;
1189 int hf = (templ.height - 1) / T + 1;
1191 // Span is the range over which we can shift the template around the input image
1192 int span_x = W - wf;
1193 int span_y = H - hf;
1195 // Compute number of contiguous (in memory) pixels to check when sliding feature over
1196 // image. This allows template to wrap around left/right border incorrectly, so any
1197 // wrapped template matches must be filtered out!
1198 int template_positions = span_y * W + span_x + 1; // why add 1?
1199 //int template_positions = (span_y - 1) * W + span_x; // More correct?
1201 /// @todo In old code, dst is buffer of size m_U. Could make it something like
1202 /// (span_x)x(span_y) instead?
1203 dst = Mat::zeros(H, W, CV_8U);
1204 uchar* dst_ptr = dst.ptr<uchar>();
1207 volatile bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
1209 volatile bool haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
1213 // Compute the similarity measure for this template by accumulating the contribution of
1215 for (int i = 0; i < (int)templ.features.size(); ++i)
1217 // Add the linear memory at the appropriate offset computed from the location of
1218 // the feature in the template
1219 Feature f = templ.features[i];
1220 // Discard feature if out of bounds
1221 /// @todo Shouldn't actually see x or y < 0 here?
1222 if (f.x < 0 || f.x >= size.width || f.y < 0 || f.y >= size.height)
1224 const uchar* lm_ptr = accessLinearMemory(linear_memories, f, T, W);
1226 // Now we do an aligned/unaligned add of dst_ptr and lm_ptr with template_positions elements
1228 // Process responses 16 at a time if vectorization possible
1233 // LDDQU may be more efficient than MOVDQU for unaligned load of next 16 responses
1234 for ( ; j < template_positions - 15; j += 16)
1236 __m128i responses = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(lm_ptr + j));
1237 __m128i* dst_ptr_sse = reinterpret_cast<__m128i*>(dst_ptr + j);
1238 *dst_ptr_sse = _mm_add_epi8(*dst_ptr_sse, responses);
1245 // Fall back to MOVDQU
1246 for ( ; j < template_positions - 15; j += 16)
1248 __m128i responses = _mm_loadu_si128(reinterpret_cast<const __m128i*>(lm_ptr + j));
1249 __m128i* dst_ptr_sse = reinterpret_cast<__m128i*>(dst_ptr + j);
1250 *dst_ptr_sse = _mm_add_epi8(*dst_ptr_sse, responses);
1254 for ( ; j < template_positions; ++j)
1255 dst_ptr[j] = uchar(dst_ptr[j] + lm_ptr[j]);
1260 * \brief Compute similarity measure for a given template in a local region.
1262 * \param[in] linear_memories Vector of 8 linear memories, one for each label.
1263 * \param[in] templ Template to match against.
1264 * \param[out] dst Destination 8-bit similarity image, 16x16.
1265 * \param size Size (W, H) of the original input image.
1266 * \param T Sampling step.
1267 * \param center Center of the local region.
1269 static void similarityLocal(const std::vector<Mat>& linear_memories, const Template& templ,
1270 Mat& dst, Size size, int T, Point center)
1272 // Similar to whole-image similarity() above. This version takes a position 'center'
1273 // and computes the energy in the 16x16 patch centered on it.
1274 CV_Assert(templ.features.size() <= 63);
1276 // Compute the similarity map in a 16x16 patch around center
1277 int W = size.width / T;
1278 dst = Mat::zeros(16, 16, CV_8U);
1280 // Offset each feature point by the requested center. Further adjust to (-8,-8) from the
1281 // center to get the top-left corner of the 16x16 patch.
1282 // NOTE: We make the offsets multiples of T to agree with results of the original code.
1283 int offset_x = (center.x / T - 8) * T;
1284 int offset_y = (center.y / T - 8) * T;
1287 volatile bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
1289 volatile bool haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
1291 __m128i* dst_ptr_sse = dst.ptr<__m128i>();
1294 for (int i = 0; i < (int)templ.features.size(); ++i)
1296 Feature f = templ.features[i];
1299 // Discard feature if out of bounds, possibly due to applying the offset
1300 if (f.x < 0 || f.y < 0 || f.x >= size.width || f.y >= size.height)
1303 const uchar* lm_ptr = accessLinearMemory(linear_memories, f, T, W);
1305 // Process whole row at a time if vectorization possible
1310 // LDDQU may be more efficient than MOVDQU for unaligned load of 16 responses from current row
1311 for (int row = 0; row < 16; ++row)
1313 __m128i aligned = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(lm_ptr));
1314 dst_ptr_sse[row] = _mm_add_epi8(dst_ptr_sse[row], aligned);
1315 lm_ptr += W; // Step to next row
1322 // Fall back to MOVDQU
1323 for (int row = 0; row < 16; ++row)
1325 __m128i aligned = _mm_loadu_si128(reinterpret_cast<const __m128i*>(lm_ptr));
1326 dst_ptr_sse[row] = _mm_add_epi8(dst_ptr_sse[row], aligned);
1327 lm_ptr += W; // Step to next row
1333 uchar* dst_ptr = dst.ptr<uchar>();
1334 for (int row = 0; row < 16; ++row)
1336 for (int col = 0; col < 16; ++col)
1337 dst_ptr[col] = uchar(dst_ptr[col] + lm_ptr[col]);
1345 static void addUnaligned8u16u(const uchar * src1, const uchar * src2, ushort * res, int length)
1347 const uchar * end = src1 + length;
1351 *res = *src1 + *src2;
1360 * \brief Accumulate one or more 8-bit similarity images.
1362 * \param[in] similarities Source 8-bit similarity images.
1363 * \param[out] dst Destination 16-bit similarity image.
1365 static void addSimilarities(const std::vector<Mat>& similarities, Mat& dst)
1367 if (similarities.size() == 1)
1369 similarities[0].convertTo(dst, CV_16U);
1373 // NOTE: add() seems to be rather slow in the 8U + 8U -> 16U case
1374 dst.create(similarities[0].size(), CV_16U);
1375 addUnaligned8u16u(similarities[0].ptr(), similarities[1].ptr(), dst.ptr<ushort>(), static_cast<int>(dst.total()));
1377 /// @todo Optimize 16u + 8u -> 16u when more than 2 modalities
1378 for (size_t i = 2; i < similarities.size(); ++i)
1379 add(dst, similarities[i], dst, noArray(), CV_16U);
1383 /****************************************************************************************\
1384 * High-level Detector API *
1385 \****************************************************************************************/
1387 Detector::Detector()
1391 Detector::Detector(const std::vector< Ptr<Modality> >& _modalities,
1392 const std::vector<int>& T_pyramid)
1393 : modalities(_modalities),
1394 pyramid_levels(static_cast<int>(T_pyramid.size())),
1395 T_at_level(T_pyramid)
1399 void Detector::match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
1400 const std::vector<std::string>& class_ids, OutputArrayOfArrays quantized_images,
1401 const std::vector<Mat>& masks) const
1404 if (quantized_images.needed())
1405 quantized_images.create(1, static_cast<int>(pyramid_levels * modalities.size()), CV_8U);
1407 assert(sources.size() == modalities.size());
1408 // Initialize each modality with our sources
1409 std::vector< Ptr<QuantizedPyramid> > quantizers;
1410 for (int i = 0; i < (int)modalities.size(); ++i){
1412 source = sources[i];
1414 assert(masks.size() == modalities.size());
1417 assert(mask.empty() || mask.size() == source.size());
1418 quantizers.push_back(modalities[i]->process(source, mask));
1420 // pyramid level -> modality -> quantization
1421 LinearMemoryPyramid lm_pyramid(pyramid_levels,
1422 std::vector<LinearMemories>(modalities.size(), LinearMemories(8)));
1424 // For each pyramid level, precompute linear memories for each modality
1425 std::vector<Size> sizes;
1426 for (int l = 0; l < pyramid_levels; ++l)
1428 int T = T_at_level[l];
1429 std::vector<LinearMemories>& lm_level = lm_pyramid[l];
1433 for (int i = 0; i < (int)quantizers.size(); ++i)
1434 quantizers[i]->pyrDown();
1437 Mat quantized, spread_quantized;
1438 std::vector<Mat> response_maps;
1439 for (int i = 0; i < (int)quantizers.size(); ++i)
1441 quantizers[i]->quantize(quantized);
1442 spread(quantized, spread_quantized, T);
1443 computeResponseMaps(spread_quantized, response_maps);
1445 LinearMemories& memories = lm_level[i];
1446 for (int j = 0; j < 8; ++j)
1447 linearize(response_maps[j], memories[j], T);
1449 if (quantized_images.needed()) //use copyTo here to side step reference semantics.
1450 quantized.copyTo(quantized_images.getMatRef(static_cast<int>(l*quantizers.size() + i)));
1453 sizes.push_back(quantized.size());
1456 if (class_ids.empty())
1458 // Match all templates
1459 TemplatesMap::const_iterator it = class_templates.begin(), itend = class_templates.end();
1460 for ( ; it != itend; ++it)
1461 matchClass(lm_pyramid, sizes, threshold, matches, it->first, it->second);
1465 // Match only templates for the requested class IDs
1466 for (int i = 0; i < (int)class_ids.size(); ++i)
1468 TemplatesMap::const_iterator it = class_templates.find(class_ids[i]);
1469 if (it != class_templates.end())
1470 matchClass(lm_pyramid, sizes, threshold, matches, it->first, it->second);
1474 // Sort matches by similarity, and prune any duplicates introduced by pyramid refinement
1475 std::sort(matches.begin(), matches.end());
1476 std::vector<Match>::iterator new_end = std::unique(matches.begin(), matches.end());
1477 matches.erase(new_end, matches.end());
1480 // Used to filter out weak matches
1481 struct MatchPredicate
1483 MatchPredicate(float _threshold) : threshold(_threshold) {}
1484 bool operator() (const Match& m) { return m.similarity < threshold; }
1488 void Detector::matchClass(const LinearMemoryPyramid& lm_pyramid,
1489 const std::vector<Size>& sizes,
1490 float threshold, std::vector<Match>& matches,
1491 const std::string& class_id,
1492 const std::vector<TemplatePyramid>& template_pyramids) const
1494 // For each template...
1495 for (size_t template_id = 0; template_id < template_pyramids.size(); ++template_id)
1497 const TemplatePyramid& tp = template_pyramids[template_id];
1499 // First match over the whole image at the lowest pyramid level
1500 /// @todo Factor this out into separate function
1501 const std::vector<LinearMemories>& lowest_lm = lm_pyramid.back();
1503 // Compute similarity maps for each modality at lowest pyramid level
1504 std::vector<Mat> similarities(modalities.size());
1505 int lowest_start = static_cast<int>(tp.size() - modalities.size());
1506 int lowest_T = T_at_level.back();
1507 int num_features = 0;
1508 for (int i = 0; i < (int)modalities.size(); ++i)
1510 const Template& templ = tp[lowest_start + i];
1511 num_features += static_cast<int>(templ.features.size());
1512 similarity(lowest_lm[i], templ, similarities[i], sizes.back(), lowest_T);
1515 // Combine into overall similarity
1516 /// @todo Support weighting the modalities
1517 Mat total_similarity;
1518 addSimilarities(similarities, total_similarity);
1520 // Convert user-friendly percentage to raw similarity threshold. The percentage
1521 // threshold scales from half the max response (what you would expect from applying
1522 // the template to a completely random image) to the max response.
1523 // NOTE: This assumes max per-feature response is 4, so we scale between [2*nf, 4*nf].
1524 int raw_threshold = static_cast<int>(2*num_features + (threshold / 100.f) * (2*num_features) + 0.5f);
1526 // Find initial matches
1527 std::vector<Match> candidates;
1528 for (int r = 0; r < total_similarity.rows; ++r)
1530 ushort* row = total_similarity.ptr<ushort>(r);
1531 for (int c = 0; c < total_similarity.cols; ++c)
1533 int raw_score = row[c];
1534 if (raw_score > raw_threshold)
1536 int offset = lowest_T / 2 + (lowest_T % 2 - 1);
1537 int x = c * lowest_T + offset;
1538 int y = r * lowest_T + offset;
1539 float score =(raw_score * 100.f) / (4 * num_features) + 0.5f;
1540 candidates.push_back(Match(x, y, score, class_id, static_cast<int>(template_id)));
1545 // Locally refine each match by marching up the pyramid
1546 for (int l = pyramid_levels - 2; l >= 0; --l)
1548 const std::vector<LinearMemories>& lms = lm_pyramid[l];
1549 int T = T_at_level[l];
1550 int start = static_cast<int>(l * modalities.size());
1551 Size size = sizes[l];
1553 int offset = T / 2 + (T % 2 - 1);
1554 int max_x = size.width - tp[start].width - border;
1555 int max_y = size.height - tp[start].height - border;
1557 std::vector<Mat> similarities2(modalities.size());
1558 Mat total_similarity2;
1559 for (int m = 0; m < (int)candidates.size(); ++m)
1561 Match& match2 = candidates[m];
1562 int x = match2.x * 2 + 1; /// @todo Support other pyramid distance
1563 int y = match2.y * 2 + 1;
1565 // Require 8 (reduced) row/cols to the up/left
1566 x = std::max(x, border);
1567 y = std::max(y, border);
1569 // Require 8 (reduced) row/cols to the down/left, plus the template size
1570 x = std::min(x, max_x);
1571 y = std::min(y, max_y);
1573 // Compute local similarity maps for each modality
1574 int numFeatures = 0;
1575 for (int i = 0; i < (int)modalities.size(); ++i)
1577 const Template& templ = tp[start + i];
1578 numFeatures += static_cast<int>(templ.features.size());
1579 similarityLocal(lms[i], templ, similarities2[i], size, T, Point(x, y));
1581 addSimilarities(similarities2, total_similarity2);
1583 // Find best local adjustment
1585 int best_r = -1, best_c = -1;
1586 for (int r = 0; r < total_similarity2.rows; ++r)
1588 ushort* row = total_similarity2.ptr<ushort>(r);
1589 for (int c = 0; c < total_similarity2.cols; ++c)
1592 if (score > best_score)
1600 // Update current match
1601 match2.x = (x / T - 8 + best_c) * T + offset;
1602 match2.y = (y / T - 8 + best_r) * T + offset;
1603 match2.similarity = (best_score * 100.f) / (4 * numFeatures);
1606 // Filter out any matches that drop below the similarity threshold
1607 std::vector<Match>::iterator new_end = std::remove_if(candidates.begin(), candidates.end(),
1608 MatchPredicate(threshold));
1609 candidates.erase(new_end, candidates.end());
1612 matches.insert(matches.end(), candidates.begin(), candidates.end());
1616 int Detector::addTemplate(const std::vector<Mat>& sources, const std::string& class_id,
1617 const Mat& object_mask, Rect* bounding_box)
1619 int num_modalities = static_cast<int>(modalities.size());
1620 std::vector<TemplatePyramid>& template_pyramids = class_templates[class_id];
1621 int template_id = static_cast<int>(template_pyramids.size());
1624 tp.resize(num_modalities * pyramid_levels);
1626 // For each modality...
1627 for (int i = 0; i < num_modalities; ++i)
1629 // Extract a template at each pyramid level
1630 Ptr<QuantizedPyramid> qp = modalities[i]->process(sources[i], object_mask);
1631 for (int l = 0; l < pyramid_levels; ++l)
1633 /// @todo Could do mask subsampling here instead of in pyrDown()
1637 bool success = qp->extractTemplate(tp[l*num_modalities + i]);
1643 Rect bb = cropTemplates(tp);
1647 /// @todo Can probably avoid a copy of tp here with swap
1648 template_pyramids.push_back(tp);
1652 int Detector::addSyntheticTemplate(const std::vector<Template>& templates, const std::string& class_id)
1654 std::vector<TemplatePyramid>& template_pyramids = class_templates[class_id];
1655 int template_id = static_cast<int>(template_pyramids.size());
1656 template_pyramids.push_back(templates);
1660 const std::vector<Template>& Detector::getTemplates(const std::string& class_id, int template_id) const
1662 TemplatesMap::const_iterator i = class_templates.find(class_id);
1663 CV_Assert(i != class_templates.end());
1664 CV_Assert(i->second.size() > size_t(template_id));
1665 return i->second[template_id];
1668 int Detector::numTemplates() const
1671 TemplatesMap::const_iterator i = class_templates.begin(), iend = class_templates.end();
1672 for ( ; i != iend; ++i)
1673 ret += static_cast<int>(i->second.size());
1677 int Detector::numTemplates(const std::string& class_id) const
1679 TemplatesMap::const_iterator i = class_templates.find(class_id);
1680 if (i == class_templates.end())
1682 return static_cast<int>(i->second.size());
1685 std::vector<std::string> Detector::classIds() const
1687 std::vector<std::string> ids;
1688 TemplatesMap::const_iterator i = class_templates.begin(), iend = class_templates.end();
1689 for ( ; i != iend; ++i)
1691 ids.push_back(i->first);
1697 void Detector::read(const FileNode& fn)
1699 class_templates.clear();
1700 pyramid_levels = fn["pyramid_levels"];
1701 fn["T"] >> T_at_level;
1704 FileNode modalities_fn = fn["modalities"];
1705 FileNodeIterator it = modalities_fn.begin(), it_end = modalities_fn.end();
1706 for ( ; it != it_end; ++it)
1708 modalities.push_back(Modality::create(*it));
1712 void Detector::write(FileStorage& fs) const
1714 fs << "pyramid_levels" << pyramid_levels;
1715 fs << "T" << T_at_level;
1717 fs << "modalities" << "[";
1718 for (int i = 0; i < (int)modalities.size(); ++i)
1721 modalities[i]->write(fs);
1724 fs << "]"; // modalities
1727 std::string Detector::readClass(const FileNode& fn, const std::string &class_id_override)
1729 // Verify compatible with Detector settings
1730 FileNode mod_fn = fn["modalities"];
1731 CV_Assert(mod_fn.size() == modalities.size());
1732 FileNodeIterator mod_it = mod_fn.begin(), mod_it_end = mod_fn.end();
1734 for ( ; mod_it != mod_it_end; ++mod_it, ++i)
1735 CV_Assert(modalities[i]->name() == (std::string)(*mod_it));
1736 CV_Assert((int)fn["pyramid_levels"] == pyramid_levels);
1738 // Detector should not already have this class
1739 std::string class_id;
1740 if (class_id_override.empty())
1742 std::string class_id_tmp = fn["class_id"];
1743 CV_Assert(class_templates.find(class_id_tmp) == class_templates.end());
1744 class_id = class_id_tmp;
1748 class_id = class_id_override;
1751 TemplatesMap::value_type v(class_id, std::vector<TemplatePyramid>());
1752 std::vector<TemplatePyramid>& tps = v.second;
1753 int expected_id = 0;
1755 FileNode tps_fn = fn["template_pyramids"];
1756 tps.resize(tps_fn.size());
1757 FileNodeIterator tps_it = tps_fn.begin(), tps_it_end = tps_fn.end();
1758 for ( ; tps_it != tps_it_end; ++tps_it, ++expected_id)
1760 int template_id = (*tps_it)["template_id"];
1761 CV_Assert(template_id == expected_id);
1762 FileNode templates_fn = (*tps_it)["templates"];
1763 tps[template_id].resize(templates_fn.size());
1765 FileNodeIterator templ_it = templates_fn.begin(), templ_it_end = templates_fn.end();
1767 for ( ; templ_it != templ_it_end; ++templ_it)
1769 tps[template_id][idx++].read(*templ_it);
1773 class_templates.insert(v);
1777 void Detector::writeClass(const std::string& class_id, FileStorage& fs) const
1779 TemplatesMap::const_iterator it = class_templates.find(class_id);
1780 CV_Assert(it != class_templates.end());
1781 const std::vector<TemplatePyramid>& tps = it->second;
1783 fs << "class_id" << it->first;
1784 fs << "modalities" << "[:";
1785 for (size_t i = 0; i < modalities.size(); ++i)
1786 fs << modalities[i]->name();
1787 fs << "]"; // modalities
1788 fs << "pyramid_levels" << pyramid_levels;
1789 fs << "template_pyramids" << "[";
1790 for (size_t i = 0; i < tps.size(); ++i)
1792 const TemplatePyramid& tp = tps[i];
1794 fs << "template_id" << int(i); //TODO is this cast correct? won't be good if rolls over...
1795 fs << "templates" << "[";
1796 for (size_t j = 0; j < tp.size(); ++j)
1800 fs << "}"; // current template
1802 fs << "]"; // templates
1803 fs << "}"; // current pyramid
1805 fs << "]"; // pyramids
1808 void Detector::readClasses(const std::vector<std::string>& class_ids,
1809 const std::string& format)
1811 for (size_t i = 0; i < class_ids.size(); ++i)
1813 const std::string& class_id = class_ids[i];
1814 std::string filename = cv::format(format.c_str(), class_id.c_str());
1815 FileStorage fs(filename, FileStorage::READ);
1816 readClass(fs.root());
1820 void Detector::writeClasses(const std::string& format) const
1822 TemplatesMap::const_iterator it = class_templates.begin(), it_end = class_templates.end();
1823 for ( ; it != it_end; ++it)
1825 const std::string& class_id = it->first;
1826 std::string filename = cv::format(format.c_str(), class_id.c_str());
1827 FileStorage fs(filename, FileStorage::WRITE);
1828 writeClass(class_id, fs);
1832 static const int T_DEFAULTS[] = {5, 8};
1834 Ptr<Detector> getDefaultLINE()
1836 std::vector< Ptr<Modality> > modalities;
1837 modalities.push_back(new ColorGradient);
1838 return new Detector(modalities, std::vector<int>(T_DEFAULTS, T_DEFAULTS + 2));
1841 Ptr<Detector> getDefaultLINEMOD()
1843 std::vector< Ptr<Modality> > modalities;
1844 modalities.push_back(new ColorGradient);
1845 modalities.push_back(new DepthNormal);
1846 return new Detector(modalities, std::vector<int>(T_DEFAULTS, T_DEFAULTS + 2));
1849 } // namespace linemod