samples/cpp/bagofwords_classification.cpp

   1 #include "opencv2/highgui/highgui.hpp"
   2 #include "opencv2/imgproc/imgproc.hpp"
   3 #include "opencv2/features2d/features2d.hpp"
   4 #include "opencv2/nonfree/nonfree.hpp"
   5 #include "opencv2/ml/ml.hpp"
   6
   7 #include <fstream>
   8 #include <iostream>
   9 #include <memory>
  10 #include <functional>
  11
  12 #if defined WIN32 || defined _WIN32
  13 #define WIN32_LEAN_AND_MEAN
  14 #include <windows.h>
  15 #undef min
  16 #undef max
  17 #include "sys/types.h"
  18 #endif
  19 #include <sys/stat.h>
  20
  21 #define DEBUG_DESC_PROGRESS
  22
  23 using namespace cv;
  24 using namespace std;
  25
  26 const string paramsFile = "params.xml";
  27 const string vocabularyFile = "vocabulary.xml.gz";
  28 const string bowImageDescriptorsDir = "/bowImageDescriptors";
  29 const string svmsDir = "/svms";
  30 const string plotsDir = "/plots";
  31
  32 static void help(char** argv)
  33 {
  34     cout << "\nThis program shows how to read in, train on and produce test results for the PASCAL VOC (Visual Object Challenge) data. \n"
  35      << "It shows how to use detectors, descriptors and recognition methods \n"
  36         "Using OpenCV version %s\n" << CV_VERSION << "\n"
  37      << "Call: \n"
  38     << "Format:\n ./" << argv[0] << " [VOC path] [result directory]  \n"
  39     << "       or:  \n"
  40     << " ./" << argv[0] << " [VOC path] [result directory] [feature detector] [descriptor extractor] [descriptor matcher] \n"
  41     << "\n"
  42     << "Input parameters: \n"
  43     << "[VOC path]             Path to Pascal VOC data (e.g. /home/my/VOCdevkit/VOC2010). Note: VOC2007-VOC2010 are supported. \n"
  44     << "[result directory]     Path to result diractory. Following folders will be created in [result directory]: \n"
  45     << "                         bowImageDescriptors - to store image descriptors, \n"
  46     << "                         svms - to store trained svms, \n"
  47     << "                         plots - to store files for plots creating. \n"
  48     << "[feature detector]     Feature detector name (e.g. SURF, FAST...) - see createFeatureDetector() function in detectors.cpp \n"
  49     << "                         Currently 12/2010, this is FAST, STAR, SIFT, SURF, MSER, GFTT, HARRIS \n"
  50     << "[descriptor extractor] Descriptor extractor name (e.g. SURF, SIFT) - see createDescriptorExtractor() function in descriptors.cpp \n"
  51     << "                         Currently 12/2010, this is SURF, OpponentSIFT, SIFT, OpponentSURF, BRIEF \n"
  52     << "[descriptor matcher]   Descriptor matcher name (e.g. BruteForce) - see createDescriptorMatcher() function in matchers.cpp \n"
  53     << "                         Currently 12/2010, this is BruteForce, BruteForce-L1, FlannBased, BruteForce-Hamming, BruteForce-HammingLUT \n"
  54     << "\n";
  55 }
  56
  57 static void makeDir( const string& dir )
  58 {
  59 #if defined WIN32 || defined _WIN32
  60     CreateDirectory( dir.c_str(), 0 );
  61 #else
  62     mkdir( dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
  63 #endif
  64 }
  65
  66 static void makeUsedDirs( const string& rootPath )
  67 {
  68     makeDir(rootPath + bowImageDescriptorsDir);
  69     makeDir(rootPath + svmsDir);
  70     makeDir(rootPath + plotsDir);
  71 }
  72
  73 /****************************************************************************************\
  74 *                    Classes to work with PASCAL VOC dataset                             *
  75 \****************************************************************************************/
  76 //
  77 // TODO: refactor this part of the code
  78 //
  79
  80
  81 //used to specify the (sub-)dataset over which operations are performed
  82 enum ObdDatasetType {CV_OBD_TRAIN, CV_OBD_TEST};
  83
  84 class ObdObject
  85 {
  86 public:
  87     string object_class;
  88     Rect boundingBox;
  89 };
  90
  91 //extended object data specific to VOC
  92 enum VocPose {CV_VOC_POSE_UNSPECIFIED, CV_VOC_POSE_FRONTAL, CV_VOC_POSE_REAR, CV_VOC_POSE_LEFT, CV_VOC_POSE_RIGHT};
  93 class VocObjectData
  94 {
  95 public:
  96     bool difficult;
  97     bool occluded;
  98     bool truncated;
  99     VocPose pose;
 100 };
 101 //enum VocDataset {CV_VOC2007, CV_VOC2008, CV_VOC2009, CV_VOC2010};
 102 enum VocPlotType {CV_VOC_PLOT_SCREEN, CV_VOC_PLOT_PNG};
 103 enum VocGT {CV_VOC_GT_NONE, CV_VOC_GT_DIFFICULT, CV_VOC_GT_PRESENT};
 104 enum VocConfCond {CV_VOC_CCOND_RECALL, CV_VOC_CCOND_SCORETHRESH};
 105 enum VocTask {CV_VOC_TASK_CLASSIFICATION, CV_VOC_TASK_DETECTION};
 106
 107 class ObdImage
 108 {
 109 public:
 110     ObdImage(string p_id, string p_path) : id(p_id), path(p_path) {}
 111     string id;
 112     string path;
 113 };
 114
 115 //used by getDetectorGroundTruth to sort a two dimensional list of floats in descending order
 116 class ObdScoreIndexSorter
 117 {
 118 public:
 119     float score;
 120     int image_idx;
 121     int obj_idx;
 122     bool operator < (const ObdScoreIndexSorter& compare) const {return (score < compare.score);}
 123 };
 124
 125 class VocData
 126 {
 127 public:
 128     VocData( const string& vocPath, bool useTestDataset )
 129         { initVoc( vocPath, useTestDataset ); }
 130     ~VocData(){}
 131     /* functions for returning classification/object data for multiple images given an object class */
 132     void getClassImages(const string& obj_class, const ObdDatasetType dataset, vector<ObdImage>& images, vector<char>& object_present);
 133     void getClassObjects(const string& obj_class, const ObdDatasetType dataset, vector<ObdImage>& images, vector<vector<ObdObject> >& objects);
 134     void getClassObjects(const string& obj_class, const ObdDatasetType dataset, vector<ObdImage>& images, vector<vector<ObdObject> >& objects, vector<vector<VocObjectData> >& object_data, vector<VocGT>& ground_truth);
 135     /* functions for returning object data for a single image given an image id */
 136     ObdImage getObjects(const string& id, vector<ObdObject>& objects);
 137     ObdImage getObjects(const string& id, vector<ObdObject>& objects, vector<VocObjectData>& object_data);
 138     ObdImage getObjects(const string& obj_class, const string& id, vector<ObdObject>& objects, vector<VocObjectData>& object_data, VocGT& ground_truth);
 139     /* functions for returning the ground truth (present/absent) for groups of images */
 140     void getClassifierGroundTruth(const string& obj_class, const vector<ObdImage>& images, vector<char>& ground_truth);
 141     void getClassifierGroundTruth(const string& obj_class, const vector<string>& images, vector<char>& ground_truth);
 142     int getDetectorGroundTruth(const string& obj_class, const ObdDatasetType dataset, const vector<ObdImage>& images, const vector<vector<Rect> >& bounding_boxes, const vector<vector<float> >& scores, vector<vector<char> >& ground_truth, vector<vector<char> >& detection_difficult, bool ignore_difficult = true);
 143     /* functions for writing VOC-compatible results files */
 144     void writeClassifierResultsFile(const string& out_dir, const string& obj_class, const ObdDatasetType dataset, const vector<ObdImage>& images, const vector<float>& scores, const int competition = 1, const bool overwrite_ifexists = false);
 145     /* functions for calculating metrics from a set of classification/detection results */
 146     string getResultsFilename(const string& obj_class, const VocTask task, const ObdDatasetType dataset, const int competition = -1, const int number = -1);
 147     void calcClassifierPrecRecall(const string& obj_class, const vector<ObdImage>& images, const vector<float>& scores, vector<float>& precision, vector<float>& recall, float& ap, vector<size_t>& ranking);
 148     void calcClassifierPrecRecall(const string& obj_class, const vector<ObdImage>& images, const vector<float>& scores, vector<float>& precision, vector<float>& recall, float& ap);
 149     void calcClassifierPrecRecall(const string& input_file, vector<float>& precision, vector<float>& recall, float& ap, bool outputRankingFile = false);
 150     /* functions for calculating confusion matrices */
 151     void calcClassifierConfMatRow(const string& obj_class, const vector<ObdImage>& images, const vector<float>& scores, const VocConfCond cond, const float threshold, vector<string>& output_headers, vector<float>& output_values);
 152     void calcDetectorConfMatRow(const string& obj_class, const ObdDatasetType dataset, const vector<ObdImage>& images, const vector<vector<float> >& scores, const vector<vector<Rect> >& bounding_boxes, const VocConfCond cond, const float threshold, vector<string>& output_headers, vector<float>& output_values, bool ignore_difficult = true);
 153     /* functions for outputting gnuplot output files */
 154     void savePrecRecallToGnuplot(const string& output_file, const vector<float>& precision, const vector<float>& recall, const float ap, const string title = string(), const VocPlotType plot_type = CV_VOC_PLOT_SCREEN);
 155     /* functions for reading in result/ground truth files */
 156     void readClassifierGroundTruth(const string& obj_class, const ObdDatasetType dataset, vector<ObdImage>& images, vector<char>& object_present);
 157     void readClassifierResultsFile(const std:: string& input_file, vector<ObdImage>& images, vector<float>& scores);
 158     void readDetectorResultsFile(const string& input_file, vector<ObdImage>& images, vector<vector<float> >& scores, vector<vector<Rect> >& bounding_boxes);
 159     /* functions for getting dataset info */
 160     const vector<string>& getObjectClasses();
 161     string getResultsDirectory();
 162 protected:
 163     void initVoc( const string& vocPath, const bool useTestDataset );
 164     void initVoc2007to2010( const string& vocPath, const bool useTestDataset);
 165     void readClassifierGroundTruth(const string& filename, vector<string>& image_codes, vector<char>& object_present);
 166     void readClassifierResultsFile(const string& input_file, vector<string>& image_codes, vector<float>& scores);
 167     void readDetectorResultsFile(const string& input_file, vector<string>& image_codes, vector<vector<float> >& scores, vector<vector<Rect> >& bounding_boxes);
 168     void extractVocObjects(const string filename, vector<ObdObject>& objects, vector<VocObjectData>& object_data);
 169     string getImagePath(const string& input_str);
 170
 171     void getClassImages_impl(const string& obj_class, const string& dataset_str, vector<ObdImage>& images, vector<char>& object_present);
 172     void calcPrecRecall_impl(const vector<char>& ground_truth, const vector<float>& scores, vector<float>& precision, vector<float>& recall, float& ap, vector<size_t>& ranking, int recall_normalization = -1);
 173
 174     //test two bounding boxes to see if they meet the overlap criteria defined in the VOC documentation
 175     float testBoundingBoxesForOverlap(const Rect detection, const Rect ground_truth);
 176     //extract class and dataset name from a VOC-standard classification/detection results filename
 177     void extractDataFromResultsFilename(const string& input_file, string& class_name, string& dataset_name);
 178     //get classifier ground truth for a single image
 179     bool getClassifierGroundTruthImage(const string& obj_class, const string& id);
 180
 181     //utility functions
 182     void getSortOrder(const vector<float>& values, vector<size_t>& order, bool descending = true);
 183     int stringToInteger(const string input_str);
 184     void readFileToString(const string filename, string& file_contents);
 185     string integerToString(const int input_int);
 186     string checkFilenamePathsep(const string filename, bool add_trailing_slash = false);
 187     void convertImageCodesToObdImages(const vector<string>& image_codes, vector<ObdImage>& images);
 188     int extractXMLBlock(const string src, const string tag, const int searchpos, string& tag_contents);
 189     //utility sorter
 190     struct orderingSorter
 191     {
 192         bool operator ()(std::pair<size_t, vector<float>::const_iterator> const& a, std::pair<size_t, vector<float>::const_iterator> const& b)
 193         {
 194             return (*a.second) > (*b.second);
 195         }
 196     };
 197     //data members
 198     string m_vocPath;
 199     string m_vocName;
 200     //string m_resPath;
 201
 202     string m_annotation_path;
 203     string m_image_path;
 204     string m_imageset_path;
 205     string m_class_imageset_path;
 206
 207     vector<string> m_classifier_gt_all_ids;
 208     vector<char> m_classifier_gt_all_present;
 209     string m_classifier_gt_class;
 210
 211     //data members
 212     string m_train_set;
 213     string m_test_set;
 214
 215     vector<string> m_object_classes;
 216
 217
 218     float m_min_overlap;
 219     bool m_sampled_ap;
 220 };
 221
 222
 223 //Return the classification ground truth data for all images of a given VOC object class
 224 //--------------------------------------------------------------------------------------
 225 //INPUTS:
 226 // - obj_class          The VOC object class identifier string
 227 // - dataset            Specifies whether to extract images from the training or test set
 228 //OUTPUTS:
 229 // - images             An array of ObdImage containing info of all images extracted from the ground truth file
 230 // - object_present     An array of bools specifying whether the object defined by 'obj_class' is present in each image or not
 231 //NOTES:
 232 // This function is primarily useful for the classification task, where only
 233 // whether a given object is present or not in an image is required, and not each object instance's
 234 // position etc.
 235 void VocData::getClassImages(const string& obj_class, const ObdDatasetType dataset, vector<ObdImage>& images, vector<char>& object_present)
 236 {
 237     string dataset_str;
 238     //generate the filename of the classification ground-truth textfile for the object class
 239     if (dataset == CV_OBD_TRAIN)
 240     {
 241         dataset_str = m_train_set;
 242     } else {
 243         dataset_str = m_test_set;
 244     }
 245
 246     getClassImages_impl(obj_class, dataset_str, images, object_present);
 247 }
 248
 249 void VocData::getClassImages_impl(const string& obj_class, const string& dataset_str, vector<ObdImage>& images, vector<char>& object_present)
 250 {
 251     //generate the filename of the classification ground-truth textfile for the object class
 252     string gtFilename = m_class_imageset_path;
 253     gtFilename.replace(gtFilename.find("%s"),2,obj_class);
 254     gtFilename.replace(gtFilename.find("%s"),2,dataset_str);
 255
 256     //parse the ground truth file, storing in two separate vectors
 257     //for the image code and the ground truth value
 258     vector<string> image_codes;
 259     readClassifierGroundTruth(gtFilename, image_codes, object_present);
 260
 261     //prepare output arrays
 262     images.clear();
 263
 264     convertImageCodesToObdImages(image_codes, images);
 265 }
 266
 267 //Return the object data for all images of a given VOC object class
 268 //-----------------------------------------------------------------
 269 //INPUTS:
 270 // - obj_class          The VOC object class identifier string
 271 // - dataset            Specifies whether to extract images from the training or test set
 272 //OUTPUTS:
 273 // - images             An array of ObdImage containing info of all images in chosen dataset (tag, path etc.)
 274 // - objects            Contains the extended object info (bounding box etc.) for each object instance in each image
 275 // - object_data        Contains VOC-specific extended object info (marked difficult etc.)
 276 // - ground_truth       Specifies whether there are any difficult/non-difficult instances of the current
 277 //                          object class within each image
 278 //NOTES:
 279 // This function returns extended object information in addition to the absent/present
 280 // classification data returned by getClassImages. The objects returned for each image in the 'objects'
 281 // array are of all object classes present in the image, and not just the class defined by 'obj_class'.
 282 // 'ground_truth' can be used to determine quickly whether an object instance of the given class is present
 283 // in an image or not.
 284 void VocData::getClassObjects(const string& obj_class, const ObdDatasetType dataset, vector<ObdImage>& images, vector<vector<ObdObject> >& objects)
 285 {
 286     vector<vector<VocObjectData> > object_data;
 287     vector<VocGT> ground_truth;
 288
 289     getClassObjects(obj_class,dataset,images,objects,object_data,ground_truth);
 290 }
 291
 292 void VocData::getClassObjects(const string& obj_class, const ObdDatasetType dataset, vector<ObdImage>& images, vector<vector<ObdObject> >& objects, vector<vector<VocObjectData> >& object_data, vector<VocGT>& ground_truth)
 293 {
 294     //generate the filename of the classification ground-truth textfile for the object class
 295     string gtFilename = m_class_imageset_path;
 296     gtFilename.replace(gtFilename.find("%s"),2,obj_class);
 297     if (dataset == CV_OBD_TRAIN)
 298     {
 299         gtFilename.replace(gtFilename.find("%s"),2,m_train_set);
 300     } else {
 301         gtFilename.replace(gtFilename.find("%s"),2,m_test_set);
 302     }
 303
 304     //parse the ground truth file, storing in two separate vectors
 305     //for the image code and the ground truth value
 306     vector<string> image_codes;
 307     vector<char> object_present;
 308     readClassifierGroundTruth(gtFilename, image_codes, object_present);
 309
 310     //prepare output arrays
 311     images.clear();
 312     objects.clear();
 313     object_data.clear();
 314     ground_truth.clear();
 315
 316     string annotationFilename;
 317     vector<ObdObject> image_objects;
 318     vector<VocObjectData> image_object_data;
 319     VocGT image_gt;
 320
 321     //transfer to output arrays and read in object data for each image
 322     for (size_t i = 0; i < image_codes.size(); ++i)
 323     {
 324         ObdImage image = getObjects(obj_class, image_codes[i], image_objects, image_object_data, image_gt);
 325
 326         images.push_back(image);
 327         objects.push_back(image_objects);
 328         object_data.push_back(image_object_data);
 329         ground_truth.push_back(image_gt);
 330     }
 331 }
 332
 333 //Return ground truth data for the objects present in an image with a given UID
 334 //-----------------------------------------------------------------------------
 335 //INPUTS:
 336 // - id                 VOC Dataset unique identifier (string code in form YYYY_XXXXXX where YYYY is the year)
 337 //OUTPUTS:
 338 // - obj_class (*3)     Specifies the object class to use to resolve 'ground_truth'
 339 // - objects            Contains the extended object info (bounding box etc.) for each object in the image
 340 // - object_data (*2,3) Contains VOC-specific extended object info (marked difficult etc.)
 341 // - ground_truth (*3)  Specifies whether there are any difficult/non-difficult instances of the current
 342 //                          object class within the image
 343 //RETURN VALUE:
 344 // ObdImage containing path and other details of image file with given code
 345 //NOTES:
 346 // There are three versions of this function
 347 //  * One returns a simple array of objects given an id [1]
 348 //  * One returns the same as (1) plus VOC specific object data [2]
 349 //  * One returns the same as (2) plus the ground_truth flag. This also requires an extra input obj_class [3]
 350 ObdImage VocData::getObjects(const string& id, vector<ObdObject>& objects)
 351 {
 352     vector<VocObjectData> object_data;
 353     ObdImage image = getObjects(id, objects, object_data);
 354
 355     return image;
 356 }
 357
 358 ObdImage VocData::getObjects(const string& id, vector<ObdObject>& objects, vector<VocObjectData>& object_data)
 359 {
 360     //first generate the filename of the annotation file
 361     string annotationFilename = m_annotation_path;
 362
 363     annotationFilename.replace(annotationFilename.find("%s"),2,id);
 364
 365     //extract objects contained in the current image from the xml
 366     extractVocObjects(annotationFilename,objects,object_data);
 367
 368     //generate image path from extracted string code
 369     string path = getImagePath(id);
 370
 371     ObdImage image(id, path);
 372     return image;
 373 }
 374
 375 ObdImage VocData::getObjects(const string& obj_class, const string& id, vector<ObdObject>& objects, vector<VocObjectData>& object_data, VocGT& ground_truth)
 376 {
 377
 378     //extract object data (except for ground truth flag)
 379     ObdImage image = getObjects(id,objects,object_data);
 380
 381     //pregenerate a flag to indicate whether the current class is present or not in the image
 382     ground_truth = CV_VOC_GT_NONE;
 383     //iterate through all objects in current image
 384     for (size_t j = 0; j < objects.size(); ++j)
 385     {
 386         if (objects[j].object_class == obj_class)
 387         {
 388             if (object_data[j].difficult == false)
 389             {
 390                 //if at least one non-difficult example is present, this flag is always set to CV_VOC_GT_PRESENT
 391                 ground_truth = CV_VOC_GT_PRESENT;
 392                 break;
 393             } else {
 394                 //set if at least one object instance is present, but it is marked difficult
 395                 ground_truth = CV_VOC_GT_DIFFICULT;
 396             }
 397         }
 398     }
 399
 400     return image;
 401 }
 402
 403 //Return ground truth data for the presence/absence of a given object class in an arbitrary array of images
 404 //---------------------------------------------------------------------------------------------------------
 405 //INPUTS:
 406 // - obj_class          The VOC object class identifier string
 407 // - images             An array of ObdImage OR strings containing the images for which ground truth
 408 //                          will be computed
 409 //OUTPUTS:
 410 // - ground_truth       An output array indicating the presence/absence of obj_class within each image
 411 void VocData::getClassifierGroundTruth(const string& obj_class, const vector<ObdImage>& images, vector<char>& ground_truth)
 412 {
 413     vector<char>(images.size()).swap(ground_truth);
 414
 415     vector<ObdObject> objects;
 416     vector<VocObjectData> object_data;
 417     vector<char>::iterator gt_it = ground_truth.begin();
 418     for (vector<ObdImage>::const_iterator it = images.begin(); it != images.end(); ++it, ++gt_it)
 419     {
 420         //getObjects(obj_class, it->id, objects, object_data, voc_ground_truth);
 421         (*gt_it) = (getClassifierGroundTruthImage(obj_class, it->id));
 422     }
 423 }
 424
 425 void VocData::getClassifierGroundTruth(const string& obj_class, const vector<string>& images, vector<char>& ground_truth)
 426 {
 427     vector<char>(images.size()).swap(ground_truth);
 428
 429     vector<ObdObject> objects;
 430     vector<VocObjectData> object_data;
 431     vector<char>::iterator gt_it = ground_truth.begin();
 432     for (vector<string>::const_iterator it = images.begin(); it != images.end(); ++it, ++gt_it)
 433     {
 434         //getObjects(obj_class, (*it), objects, object_data, voc_ground_truth);
 435         (*gt_it) = (getClassifierGroundTruthImage(obj_class, (*it)));
 436     }
 437 }
 438
 439 //Return ground truth data for the accuracy of detection results
 440 //--------------------------------------------------------------
 441 //INPUTS:
 442 // - obj_class          The VOC object class identifier string
 443 // - images             An array of ObdImage containing the images for which ground truth
 444 //                          will be computed
 445 // - bounding_boxes     A 2D input array containing the bounding box rects of the objects of
 446 //                          obj_class which were detected in each image
 447 //OUTPUTS:
 448 // - ground_truth       A 2D output array indicating whether each object detection was accurate
 449 //                          or not
 450 // - detection_difficult A 2D output array indicating whether the detection fired on an object
 451 //                          marked as 'difficult'. This allows it to be ignored if necessary
 452 //                          (the voc documentation specifies objects marked as difficult
 453 //                          have no effects on the results and are effectively ignored)
 454 // - (ignore_difficult) If set to true, objects marked as difficult will be ignored when returning
 455 //                          the number of hits for p-r normalization (default = true)
 456 //RETURN VALUE:
 457 //                      Returns the number of object hits in total in the gt to allow proper normalization
 458 //                          of a p-r curve
 459 //NOTES:
 460 // As stated in the VOC documentation, multiple detections of the same object in an image are
 461 // considered FALSE detections e.g. 5 detections of a single object is counted as 1 correct
 462 // detection and 4 false detections - it is the responsibility of the participant's system
 463 // to filter multiple detections from its output
 464 int VocData::getDetectorGroundTruth(const string& obj_class, const ObdDatasetType dataset, const vector<ObdImage>& images, const vector<vector<Rect> >& bounding_boxes, const vector<vector<float> >& scores, vector<vector<char> >& ground_truth, vector<vector<char> >& detection_difficult, bool ignore_difficult)
 465 {
 466     int recall_normalization = 0;
 467
 468     /* first create a list of indices referring to the elements of bounding_boxes and scores in
 469      * descending order of scores */
 470     vector<ObdScoreIndexSorter> sorted_ids;
 471     {
 472         /* first count how many objects to allow preallocation */
 473         size_t obj_count = 0;
 474         CV_Assert(images.size() == bounding_boxes.size());
 475         CV_Assert(scores.size() == bounding_boxes.size());
 476         for (size_t im_idx = 0; im_idx < scores.size(); ++im_idx)
 477         {
 478             CV_Assert(scores[im_idx].size() == bounding_boxes[im_idx].size());
 479             obj_count += scores[im_idx].size();
 480         }
 481         /* preallocate id vector */
 482         sorted_ids.resize(obj_count);
 483         /* now copy across scores and indexes to preallocated vector */
 484         int flat_pos = 0;
 485         for (size_t im_idx = 0; im_idx < scores.size(); ++im_idx)
 486         {
 487             for (size_t ob_idx = 0; ob_idx < scores[im_idx].size(); ++ob_idx)
 488             {
 489                 sorted_ids[flat_pos].score = scores[im_idx][ob_idx];
 490                 sorted_ids[flat_pos].image_idx = (int)im_idx;
 491                 sorted_ids[flat_pos].obj_idx = (int)ob_idx;
 492                 ++flat_pos;
 493             }
 494         }
 495         /* and sort the vector in descending order of score */
 496         std::sort(sorted_ids.begin(),sorted_ids.end());
 497         std::reverse(sorted_ids.begin(),sorted_ids.end());
 498     }
 499
 500     /* prepare ground truth + difficult vector (1st dimension) */
 501     vector<vector<char> >(images.size()).swap(ground_truth);
 502     vector<vector<char> >(images.size()).swap(detection_difficult);
 503     vector<vector<char> > detected(images.size());
 504
 505     vector<vector<ObdObject> > img_objects(images.size());
 506     vector<vector<VocObjectData> > img_object_data(images.size());
 507     /* preload object ground truth bounding box data */
 508     {
 509         vector<vector<ObdObject> > img_objects_all(images.size());
 510         vector<vector<VocObjectData> > img_object_data_all(images.size());
 511         for (size_t image_idx = 0; image_idx < images.size(); ++image_idx)
 512         {
 513             /* prepopulate ground truth bounding boxes */
 514             getObjects(images[image_idx].id, img_objects_all[image_idx], img_object_data_all[image_idx]);
 515             /* meanwhile, also set length of target ground truth + difficult vector to same as number of object detections (2nd dimension) */
 516             ground_truth[image_idx].resize(bounding_boxes[image_idx].size());
 517             detection_difficult[image_idx].resize(bounding_boxes[image_idx].size());
 518         }
 519
 520         /* save only instances of the object class concerned */
 521         for (size_t image_idx = 0; image_idx < images.size(); ++image_idx)
 522         {
 523             for (size_t obj_idx = 0; obj_idx < img_objects_all[image_idx].size(); ++obj_idx)
 524             {
 525                 if (img_objects_all[image_idx][obj_idx].object_class == obj_class)
 526                 {
 527                     img_objects[image_idx].push_back(img_objects_all[image_idx][obj_idx]);
 528                     img_object_data[image_idx].push_back(img_object_data_all[image_idx][obj_idx]);
 529                 }
 530             }
 531             detected[image_idx].resize(img_objects[image_idx].size(), false);
 532         }
 533     }
 534
 535     /* calculate the total number of objects in the ground truth for the current dataset */
 536     {
 537         vector<ObdImage> gt_images;
 538         vector<char> gt_object_present;
 539         getClassImages(obj_class, dataset, gt_images, gt_object_present);
 540
 541         for (size_t image_idx = 0; image_idx < gt_images.size(); ++image_idx)
 542         {
 543             vector<ObdObject> gt_img_objects;
 544             vector<VocObjectData> gt_img_object_data;
 545             getObjects(gt_images[image_idx].id, gt_img_objects, gt_img_object_data);
 546             for (size_t obj_idx = 0; obj_idx < gt_img_objects.size(); ++obj_idx)
 547             {
 548                 if (gt_img_objects[obj_idx].object_class == obj_class)
 549                 {
 550                     if ((gt_img_object_data[obj_idx].difficult == false) || (ignore_difficult == false))
 551                         ++recall_normalization;
 552                 }
 553             }
 554         }
 555     }
 556
 557 #ifdef PR_DEBUG
 558     int printed_count = 0;
 559 #endif
 560     /* now iterate through detections in descending order of score, assigning to ground truth bounding boxes if possible */
 561     for (size_t detect_idx = 0; detect_idx < sorted_ids.size(); ++detect_idx)
 562     {
 563         //read in indexes to make following code easier to read
 564         int im_idx = sorted_ids[detect_idx].image_idx;
 565         int ob_idx = sorted_ids[detect_idx].obj_idx;
 566         //set ground truth for the current object to false by default
 567         ground_truth[im_idx][ob_idx] = false;
 568         detection_difficult[im_idx][ob_idx] = false;
 569         float maxov = -1.0;
 570         bool max_is_difficult = false;
 571         int max_gt_obj_idx = -1;
 572         //-- for each detected object iterate through objects present in the bounding box ground truth --
 573         for (size_t gt_obj_idx = 0; gt_obj_idx < img_objects[im_idx].size(); ++gt_obj_idx)
 574         {
 575             if (detected[im_idx][gt_obj_idx] == false)
 576             {
 577                 //check if the detected object and ground truth object overlap by a sufficient margin
 578                 float ov = testBoundingBoxesForOverlap(bounding_boxes[im_idx][ob_idx], img_objects[im_idx][gt_obj_idx].boundingBox);
 579                 if (ov != -1.0)
 580                 {
 581                     //if all conditions are met store the overlap score and index (as objects are assigned to the highest scoring match)
 582                     if (ov > maxov)
 583                     {
 584                         maxov = ov;
 585                         max_gt_obj_idx = (int)gt_obj_idx;
 586                         //store whether the maximum detection is marked as difficult or not
 587                         max_is_difficult = (img_object_data[im_idx][gt_obj_idx].difficult);
 588                     }
 589                 }
 590             }
 591         }
 592         //-- if a match was found, set the ground truth of the current object to true --
 593         if (maxov != -1.0)
 594         {
 595             CV_Assert(max_gt_obj_idx != -1);
 596             ground_truth[im_idx][ob_idx] = true;
 597             //store whether the maximum detection was marked as 'difficult' or not
 598             detection_difficult[im_idx][ob_idx] = max_is_difficult;
 599             //remove the ground truth object so it doesn't match with subsequent detected objects
 600             //** this is the behaviour defined by the voc documentation **
 601             detected[im_idx][max_gt_obj_idx] = true;
 602         }
 603 #ifdef PR_DEBUG
 604         if (printed_count < 10)
 605         {
 606             cout << printed_count << ": id=" << images[im_idx].id << ", score=" << scores[im_idx][ob_idx] << " (" << ob_idx << ") [" << bounding_boxes[im_idx][ob_idx].x << "," <<
 607                     bounding_boxes[im_idx][ob_idx].y << "," << bounding_boxes[im_idx][ob_idx].width + bounding_boxes[im_idx][ob_idx].x <<
 608                     "," << bounding_boxes[im_idx][ob_idx].height + bounding_boxes[im_idx][ob_idx].y << "] detected=" << ground_truth[im_idx][ob_idx] <<
 609                     ", difficult=" << detection_difficult[im_idx][ob_idx] << endl;
 610             ++printed_count;
 611             /* print ground truth */
 612             for (int gt_obj_idx = 0; gt_obj_idx < img_objects[im_idx].size(); ++gt_obj_idx)
 613             {
 614                 cout << "    GT: [" << img_objects[im_idx][gt_obj_idx].boundingBox.x << "," <<
 615                         img_objects[im_idx][gt_obj_idx].boundingBox.y << "," << img_objects[im_idx][gt_obj_idx].boundingBox.width + img_objects[im_idx][gt_obj_idx].boundingBox.x <<
 616                         "," << img_objects[im_idx][gt_obj_idx].boundingBox.height + img_objects[im_idx][gt_obj_idx].boundingBox.y << "]";
 617                 if (gt_obj_idx == max_gt_obj_idx) cout << " <--- (" << maxov << " overlap)";
 618                 cout << endl;
 619             }
 620         }
 621 #endif
 622     }
 623
 624     return recall_normalization;
 625 }
 626
 627 //Write VOC-compliant classifier results file
 628 //-------------------------------------------
 629 //INPUTS:
 630 // - obj_class          The VOC object class identifier string
 631 // - dataset            Specifies whether working with the training or test set
 632 // - images             An array of ObdImage containing the images for which data will be saved to the result file
 633 // - scores             A corresponding array of confidence scores given a query
 634 // - (competition)      If specified, defines which competition the results are for (see VOC documentation - default 1)
 635 //NOTES:
 636 // The result file path and filename are determined automatically using m_results_directory as a base
 637 void VocData::writeClassifierResultsFile( const string& out_dir, const string& obj_class, const ObdDatasetType dataset, const vector<ObdImage>& images, const vector<float>& scores, const int competition, const bool overwrite_ifexists)
 638 {
 639     CV_Assert(images.size() == scores.size());
 640
 641     string output_file_base, output_file;
 642     if (dataset == CV_OBD_TRAIN)
 643     {
 644         output_file_base = out_dir + "/comp" + integerToString(competition) + "_cls_" + m_train_set + "_" + obj_class;
 645     } else {
 646         output_file_base = out_dir + "/comp" + integerToString(competition) + "_cls_" + m_test_set + "_" + obj_class;
 647     }
 648     output_file = output_file_base + ".txt";
 649
 650     //check if file exists, and if so create a numbered new file instead
 651     if (overwrite_ifexists == false)
 652     {
 653         struct stat stFileInfo;
 654         if (stat(output_file.c_str(),&stFileInfo) == 0)
 655         {
 656             string output_file_new;
 657             int filenum = 0;
 658             do
 659             {
 660                 ++filenum;
 661                 output_file_new = output_file_base + "_" + integerToString(filenum);
 662                 output_file = output_file_new + ".txt";
 663             } while (stat(output_file.c_str(),&stFileInfo) == 0);
 664         }
 665     }
 666
 667     //output data to file
 668     std::ofstream result_file(output_file.c_str());
 669     if (result_file.is_open())
 670     {
 671         for (size_t i = 0; i < images.size(); ++i)
 672         {
 673             result_file << images[i].id << " " << scores[i] << endl;
 674         }
 675         result_file.close();
 676     } else {
 677         string err_msg = "could not open classifier results file '" + output_file + "' for writing. Before running for the first time, a 'results' subdirectory should be created within the VOC dataset base directory. e.g. if the VOC data is stored in /VOC/VOC2010 then the path /VOC/results must be created.";
 678         CV_Error(CV_StsError,err_msg.c_str());
 679     }
 680 }
 681
 682 //---------------------------------------
 683 //CALCULATE METRICS FROM VOC RESULTS DATA
 684 //---------------------------------------
 685
 686 //Utility function to construct a VOC-standard classification results filename
 687 //----------------------------------------------------------------------------
 688 //INPUTS:
 689 // - obj_class          The VOC object class identifier string
 690 // - task               Specifies whether to generate a filename for the classification or detection task
 691 // - dataset            Specifies whether working with the training or test set
 692 // - (competition)      If specified, defines which competition the results are for (see VOC documentation
 693 //                      default of -1 means this is set to 1 for the classification task and 3 for the detection task)
 694 // - (number)           If specified and above 0, defines which of a number of duplicate results file produced for a given set of
 695 //                      of settings should be used (this number will be added as a postfix to the filename)
 696 //NOTES:
 697 // This is primarily useful for returning the filename of a classification file previously computed using writeClassifierResultsFile
 698 // for example when calling calcClassifierPrecRecall
 699 string VocData::getResultsFilename(const string& obj_class, const VocTask task, const ObdDatasetType dataset, const int competition, const int number)
 700 {
 701     if ((competition < 1) && (competition != -1))
 702         CV_Error(CV_StsBadArg,"competition argument should be a positive non-zero number or -1 to accept the default");
 703     if ((number < 1) && (number != -1))
 704         CV_Error(CV_StsBadArg,"number argument should be a positive non-zero number or -1 to accept the default");
 705
 706     string dset, task_type;
 707
 708     if (dataset == CV_OBD_TRAIN)
 709     {
 710         dset = m_train_set;
 711     } else {
 712         dset = m_test_set;
 713     }
 714
 715     int comp = competition;
 716     if (task == CV_VOC_TASK_CLASSIFICATION)
 717     {
 718         task_type = "cls";
 719         if (comp == -1) comp = 1;
 720     } else {
 721         task_type = "det";
 722         if (comp == -1) comp = 3;
 723     }
 724
 725     stringstream ss;
 726     if (number < 1)
 727     {
 728         ss << "comp" << comp << "_" << task_type << "_" << dset << "_" << obj_class << ".txt";
 729     } else {
 730         ss << "comp" << comp << "_" << task_type << "_" << dset << "_" << obj_class << "_" << number << ".txt";
 731     }
 732
 733     string filename = ss.str();
 734     return filename;
 735 }
 736
 737 //Calculate metrics for classification results
 738 //--------------------------------------------
 739 //INPUTS:
 740 // - ground_truth       A vector of booleans determining whether the currently tested class is present in each input image
 741 // - scores             A vector containing the similarity score for each input image (higher is more similar)
 742 //OUTPUTS:
 743 // - precision          A vector containing the precision calculated at each datapoint of a p-r curve generated from the result set
 744 // - recall             A vector containing the recall calculated at each datapoint of a p-r curve generated from the result set
 745 // - ap                The ap metric calculated from the result set
 746 // - (ranking)          A vector of the same length as 'ground_truth' and 'scores' containing the order of the indices in both of
 747 //                      these arrays when sorting by the ranking score in descending order
 748 //NOTES:
 749 // The result file path and filename are determined automatically using m_results_directory as a base
 750 void VocData::calcClassifierPrecRecall(const string& obj_class, const vector<ObdImage>& images, const vector<float>& scores, vector<float>& precision, vector<float>& recall, float& ap, vector<size_t>& ranking)
 751 {
 752     vector<char> res_ground_truth;
 753     getClassifierGroundTruth(obj_class, images, res_ground_truth);
 754
 755     calcPrecRecall_impl(res_ground_truth, scores, precision, recall, ap, ranking);
 756 }
 757
 758 void VocData::calcClassifierPrecRecall(const string& obj_class, const vector<ObdImage>& images, const vector<float>& scores, vector<float>& precision, vector<float>& recall, float& ap)
 759 {
 760     vector<char> res_ground_truth;
 761     getClassifierGroundTruth(obj_class, images, res_ground_truth);
 762
 763     vector<size_t> ranking;
 764     calcPrecRecall_impl(res_ground_truth, scores, precision, recall, ap, ranking);
 765 }
 766
 767 //< Overloaded version which accepts VOC classification result file input instead of array of scores/ground truth >
 768 //INPUTS:
 769 // - input_file         The path to the VOC standard results file to use for calculating precision/recall
 770 //                      If a full path is not specified, it is assumed this file is in the VOC standard results directory
 771 //                      A VOC standard filename can be retrieved (as used by writeClassifierResultsFile) by calling  getClassifierResultsFilename
 772
 773 void VocData::calcClassifierPrecRecall(const string& input_file, vector<float>& precision, vector<float>& recall, float& ap, bool outputRankingFile)
 774 {
 775     //read in classification results file
 776     vector<string> res_image_codes;
 777     vector<float> res_scores;
 778
 779     string input_file_std = checkFilenamePathsep(input_file);
 780     readClassifierResultsFile(input_file_std, res_image_codes, res_scores);
 781
 782     //extract the object class and dataset from the results file filename
 783     string class_name, dataset_name;
 784     extractDataFromResultsFilename(input_file_std, class_name, dataset_name);
 785
 786     //generate the ground truth for the images extracted from the results file
 787     vector<char> res_ground_truth;
 788
 789     getClassifierGroundTruth(class_name, res_image_codes, res_ground_truth);
 790
 791     if (outputRankingFile)
 792     {
 793         /* 1. store sorting order by score (descending) in 'order' */
 794         vector<std::pair<size_t, vector<float>::const_iterator> > order(res_scores.size());
 795
 796         size_t n = 0;
 797         for (vector<float>::const_iterator it = res_scores.begin(); it != res_scores.end(); ++it, ++n)
 798             order[n] = make_pair(n, it);
 799
 800         std::sort(order.begin(),order.end(),orderingSorter());
 801
 802         /* 2. save ranking results to text file */
 803         string input_file_std1 = checkFilenamePathsep(input_file);
 804         size_t fnamestart = input_file_std1.rfind("/");
 805         string scoregt_file_str = input_file_std1.substr(0,fnamestart+1) + "scoregt_" + class_name + ".txt";
 806         std::ofstream scoregt_file(scoregt_file_str.c_str());
 807         if (scoregt_file.is_open())
 808         {
 809             for (size_t i = 0; i < res_scores.size(); ++i)
 810             {
 811                 scoregt_file << res_image_codes[order[i].first] << " " << res_scores[order[i].first] << " " << res_ground_truth[order[i].first] << endl;
 812             }
 813             scoregt_file.close();
 814         } else {
 815             string err_msg = "could not open scoregt file '" + scoregt_file_str + "' for writing.";
 816             CV_Error(CV_StsError,err_msg.c_str());
 817         }
 818     }
 819
 820     //finally, calculate precision+recall+ap
 821     vector<size_t> ranking;
 822     calcPrecRecall_impl(res_ground_truth,res_scores,precision,recall,ap,ranking);
 823 }
 824
 825 //< Protected implementation of Precision-Recall calculation used by both calcClassifierPrecRecall and calcDetectorPrecRecall >
 826
 827 void VocData::calcPrecRecall_impl(const vector<char>& ground_truth, const vector<float>& scores, vector<float>& precision, vector<float>& recall, float& ap, vector<size_t>& ranking, int recall_normalization)
 828 {
 829     CV_Assert(ground_truth.size() == scores.size());
 830
 831     //add extra element for p-r at 0 recall (in case that first retrieved is positive)
 832     vector<float>(scores.size()+1).swap(precision);
 833     vector<float>(scores.size()+1).swap(recall);
 834
 835     // SORT RESULTS BY THEIR SCORE
 836     /* 1. store sorting order in 'order' */
 837     VocData::getSortOrder(scores, ranking);
 838
 839 #ifdef PR_DEBUG
 840     std::ofstream scoregt_file("D:/pr.txt");
 841     if (scoregt_file.is_open())
 842     {
 843        for (int i = 0; i < scores.size(); ++i)
 844        {
 845            scoregt_file << scores[ranking[i]] << " " << ground_truth[ranking[i]] << endl;
 846        }
 847        scoregt_file.close();
 848     }
 849 #endif
 850
 851     // CALCULATE PRECISION+RECALL
 852
 853     int retrieved_hits = 0;
 854
 855     int recall_norm;
 856     if (recall_normalization != -1)
 857     {
 858         recall_norm = recall_normalization;
 859     } else {
 860         recall_norm = (int)std::count_if(ground_truth.begin(),ground_truth.end(),std::bind2nd(std::equal_to<char>(),(char)1));
 861     }
 862
 863     ap = 0;
 864     recall[0] = 0;
 865     for (size_t idx = 0; idx < ground_truth.size(); ++idx)
 866     {
 867         if (ground_truth[ranking[idx]] != 0) ++retrieved_hits;
 868
 869         precision[idx+1] = static_cast<float>(retrieved_hits)/static_cast<float>(idx+1);
 870         recall[idx+1] = static_cast<float>(retrieved_hits)/static_cast<float>(recall_norm);
 871
 872         if (idx == 0)
 873         {
 874             //add further point at 0 recall with the same precision value as the first computed point
 875             precision[idx] = precision[idx+1];
 876         }
 877         if (recall[idx+1] == 1.0)
 878         {
 879             //if recall = 1, then end early as all positive images have been found
 880             recall.resize(idx+2);
 881             precision.resize(idx+2);
 882             break;
 883         }
 884     }
 885
 886     /* ap calculation */
 887     if (m_sampled_ap == false)
 888     {
 889         // FOR VOC2010+ AP IS CALCULATED FROM ALL DATAPOINTS
 890         /* make precision monotonically decreasing for purposes of calculating ap */
 891         vector<float> precision_monot(precision.size());
 892         vector<float>::iterator prec_m_it = precision_monot.begin();
 893         for (vector<float>::iterator prec_it = precision.begin(); prec_it != precision.end(); ++prec_it, ++prec_m_it)
 894         {
 895             vector<float>::iterator max_elem;
 896             max_elem = std::max_element(prec_it,precision.end());
 897             (*prec_m_it) = (*max_elem);
 898         }
 899         /* calculate ap */
 900         for (size_t idx = 0; idx < (recall.size()-1); ++idx)
 901         {
 902             ap += (recall[idx+1] - recall[idx])*precision_monot[idx+1] +   //no need to take min of prec - is monotonically decreasing
 903                     0.5f*(recall[idx+1] - recall[idx])*std::abs(precision_monot[idx+1] - precision_monot[idx]);
 904         }
 905     } else {
 906         // FOR BEFORE VOC2010 AP IS CALCULATED BY SAMPLING PRECISION AT RECALL 0.0,0.1,..,1.0
 907
 908         for (float recall_pos = 0.f; recall_pos <= 1.f; recall_pos += 0.1f)
 909         {
 910             //find iterator of the precision corresponding to the first recall >= recall_pos
 911             vector<float>::iterator recall_it = recall.begin();
 912             vector<float>::iterator prec_it = precision.begin();
 913
 914             while ((*recall_it) < recall_pos)
 915             {
 916                 ++recall_it;
 917                 ++prec_it;
 918                 if (recall_it == recall.end()) break;
 919             }
 920
 921             /* if no recall >= recall_pos found, this level of recall is never reached so stop adding to ap */
 922             if (recall_it == recall.end()) break;
 923
 924             /* if the prec_it is valid, compute the max precision at this level of recall or higher */
 925             vector<float>::iterator max_prec = std::max_element(prec_it,precision.end());
 926
 927             ap += (*max_prec)/11;
 928         }
 929     }
 930 }
 931
 932 /* functions for calculating confusion matrix rows */
 933
 934 //Calculate rows of a confusion matrix
 935 //------------------------------------
 936 //INPUTS:
 937 // - obj_class          The VOC object class identifier string for the confusion matrix row to compute
 938 // - images             An array of ObdImage containing the images to use for the computation
 939 // - scores             A corresponding array of confidence scores for the presence of obj_class in each image
 940 // - cond               Defines whether to use a cut off point based on recall (CV_VOC_CCOND_RECALL) or score
 941 //                      (CV_VOC_CCOND_SCORETHRESH) the latter is useful for classifier detections where positive
 942 //                      values are positive detections and negative values are negative detections
 943 // - threshold          Threshold value for cond. In case of CV_VOC_CCOND_RECALL, is proportion recall (e.g. 0.5).
 944 //                      In the case of CV_VOC_CCOND_SCORETHRESH is the value above which to count results.
 945 //OUTPUTS:
 946 // - output_headers     An output vector of object class headers for the confusion matrix row
 947 // - output_values      An output vector of values for the confusion matrix row corresponding to the classes
 948 //                      defined in output_headers
 949 //NOTES:
 950 // The methodology used by the classifier version of this function is that true positives have a single unit
 951 // added to the obj_class column in the confusion matrix row, whereas false positives have a single unit
 952 // distributed in proportion between all the columns in the confusion matrix row corresponding to the objects
 953 // present in the image.
 954 void VocData::calcClassifierConfMatRow(const string& obj_class, const vector<ObdImage>& images, const vector<float>& scores, const VocConfCond cond, const float threshold, vector<string>& output_headers, vector<float>& output_values)
 955 {
 956     CV_Assert(images.size() == scores.size());
 957
 958     // SORT RESULTS BY THEIR SCORE
 959     /* 1. store sorting order in 'ranking' */
 960     vector<size_t> ranking;
 961     VocData::getSortOrder(scores, ranking);
 962
 963     // CALCULATE CONFUSION MATRIX ENTRIES
 964     /* prepare object category headers */
 965     output_headers = m_object_classes;
 966     vector<float>(output_headers.size(),0.0).swap(output_values);
 967     /* find the index of the target object class in the headers for later use */
 968     int target_idx;
 969     {
 970         vector<string>::iterator target_idx_it = std::find(output_headers.begin(),output_headers.end(),obj_class);
 971         /* if the target class can not be found, raise an exception */
 972         if (target_idx_it == output_headers.end())
 973         {
 974             string err_msg = "could not find the target object class '" + obj_class + "' in list of valid classes.";
 975             CV_Error(CV_StsError,err_msg.c_str());
 976         }
 977         /* convert iterator to index */
 978         target_idx = (int)std::distance(output_headers.begin(),target_idx_it);
 979     }
 980
 981     /* prepare variables related to calculating recall if using the recall threshold */
 982     int retrieved_hits = 0;
 983     int total_relevant = 0;
 984     if (cond == CV_VOC_CCOND_RECALL)
 985     {
 986         vector<char> ground_truth;
 987         /* in order to calculate the total number of relevant images for normalization of recall
 988             it's necessary to extract the ground truth for the images under consideration */
 989         getClassifierGroundTruth(obj_class, images, ground_truth);
 990         total_relevant = (int)std::count_if(ground_truth.begin(),ground_truth.end(),std::bind2nd(std::equal_to<char>(),(char)1));
 991     }
 992
 993     /* iterate through images */
 994     vector<ObdObject> img_objects;
 995     vector<VocObjectData> img_object_data;
 996     int total_images = 0;
 997     for (size_t image_idx = 0; image_idx < images.size(); ++image_idx)
 998     {
 999         /* if using the score as the break condition, check for it now */
1000         if (cond == CV_VOC_CCOND_SCORETHRESH)
1001         {
1002             if (scores[ranking[image_idx]] <= threshold) break;
1003         }
1004         /* if continuing for this iteration, increment the image counter for later normalization */
1005         ++total_images;
1006         /* for each image retrieve the objects contained */
1007         getObjects(images[ranking[image_idx]].id, img_objects, img_object_data);
1008         //check if the tested for object class is present
1009         if (getClassifierGroundTruthImage(obj_class, images[ranking[image_idx]].id))
1010         {
1011             //if the target class is present, assign fully to the target class element in the confusion matrix row
1012             output_values[target_idx] += 1.0;
1013             if (cond == CV_VOC_CCOND_RECALL) ++retrieved_hits;
1014         } else {
1015             //first delete all objects marked as difficult
1016             for (size_t obj_idx = 0; obj_idx < img_objects.size(); ++obj_idx)
1017             {
1018                 if (img_object_data[obj_idx].difficult == true)
1019                 {
1020                     vector<ObdObject>::iterator it1 = img_objects.begin();
1021                     std::advance(it1,obj_idx);
1022                     img_objects.erase(it1);
1023                     vector<VocObjectData>::iterator it2 = img_object_data.begin();
1024                     std::advance(it2,obj_idx);
1025                     img_object_data.erase(it2);
1026                     --obj_idx;
1027                 }
1028             }
1029             //if the target class is not present, add values to the confusion matrix row in equal proportions to all objects present in the image
1030             for (size_t obj_idx = 0; obj_idx < img_objects.size(); ++obj_idx)
1031             {
1032                 //find the index of the currently considered object
1033                 vector<string>::iterator class_idx_it = std::find(output_headers.begin(),output_headers.end(),img_objects[obj_idx].object_class);
1034                 //if the class name extracted from the ground truth file could not be found in the list of available classes, raise an exception
1035                 if (class_idx_it == output_headers.end())
1036                 {
1037                     string err_msg = "could not find object class '" + img_objects[obj_idx].object_class + "' specified in the ground truth file of '" + images[ranking[image_idx]].id +"'in list of valid classes.";
1038                     CV_Error(CV_StsError,err_msg.c_str());
1039                 }
1040                 /* convert iterator to index */
1041                 int class_idx = (int)std::distance(output_headers.begin(),class_idx_it);
1042                 //add to confusion matrix row in proportion
1043                 output_values[class_idx] += 1.f/static_cast<float>(img_objects.size());
1044             }
1045         }
1046         //check break conditions if breaking on certain level of recall
1047         if (cond == CV_VOC_CCOND_RECALL)
1048         {
1049             if(static_cast<float>(retrieved_hits)/static_cast<float>(total_relevant) >= threshold) break;
1050         }
1051     }
1052     /* finally, normalize confusion matrix row */
1053     for (vector<float>::iterator it = output_values.begin(); it < output_values.end(); ++it)
1054     {
1055         (*it) /= static_cast<float>(total_images);
1056     }
1057 }
1058
1059 // NOTE: doesn't ignore repeated detections
1060 void VocData::calcDetectorConfMatRow(const string& obj_class, const ObdDatasetType dataset, const vector<ObdImage>& images, const vector<vector<float> >& scores, const vector<vector<Rect> >& bounding_boxes, const VocConfCond cond, const float threshold, vector<string>& output_headers, vector<float>& output_values, bool ignore_difficult)
1061 {
1062     CV_Assert(images.size() == scores.size());
1063     CV_Assert(images.size() == bounding_boxes.size());
1064
1065     //collapse scores and ground_truth vectors into 1D vectors to allow ranking
1066     /* define final flat vectors */
1067     vector<string> images_flat;
1068     vector<float> scores_flat;
1069     vector<Rect> bounding_boxes_flat;
1070     {
1071         /* first count how many objects to allow preallocation */
1072         int obj_count = 0;
1073         CV_Assert(scores.size() == bounding_boxes.size());
1074         for (size_t img_idx = 0; img_idx < scores.size(); ++img_idx)
1075         {
1076             CV_Assert(scores[img_idx].size() == bounding_boxes[img_idx].size());
1077             for (size_t obj_idx = 0; obj_idx < scores[img_idx].size(); ++obj_idx)
1078             {
1079                 ++obj_count;
1080             }
1081         }
1082         /* preallocate vectors */
1083         images_flat.resize(obj_count);
1084         scores_flat.resize(obj_count);
1085         bounding_boxes_flat.resize(obj_count);
1086         /* now copy across to preallocated vectors */
1087         int flat_pos = 0;
1088         for (size_t img_idx = 0; img_idx < scores.size(); ++img_idx)
1089         {
1090             for (size_t obj_idx = 0; obj_idx < scores[img_idx].size(); ++obj_idx)
1091             {
1092                 images_flat[flat_pos] = images[img_idx].id;
1093                 scores_flat[flat_pos] = scores[img_idx][obj_idx];
1094                 bounding_boxes_flat[flat_pos] = bounding_boxes[img_idx][obj_idx];
1095                 ++flat_pos;
1096             }
1097         }
1098     }
1099
1100     // SORT RESULTS BY THEIR SCORE
1101     /* 1. store sorting order in 'ranking' */
1102     vector<size_t> ranking;
1103     VocData::getSortOrder(scores_flat, ranking);
1104
1105     // CALCULATE CONFUSION MATRIX ENTRIES
1106     /* prepare object category headers */
1107     output_headers = m_object_classes;
1108     output_headers.push_back("background");
1109     vector<float>(output_headers.size(),0.0).swap(output_values);
1110
1111     /* prepare variables related to calculating recall if using the recall threshold */
1112     int retrieved_hits = 0;
1113     int total_relevant = 0;
1114     if (cond == CV_VOC_CCOND_RECALL)
1115     {
1116 //        vector<char> ground_truth;
1117 //        /* in order to calculate the total number of relevant images for normalization of recall
1118 //            it's necessary to extract the ground truth for the images under consideration */
1119 //        getClassifierGroundTruth(obj_class, images, ground_truth);
1120 //        total_relevant = std::count_if(ground_truth.begin(),ground_truth.end(),std::bind2nd(std::equal_to<bool>(),true));
1121         /* calculate the total number of objects in the ground truth for the current dataset */
1122         vector<ObdImage> gt_images;
1123         vector<char> gt_object_present;
1124         getClassImages(obj_class, dataset, gt_images, gt_object_present);
1125
1126         for (size_t image_idx = 0; image_idx < gt_images.size(); ++image_idx)
1127         {
1128             vector<ObdObject> gt_img_objects;
1129             vector<VocObjectData> gt_img_object_data;
1130             getObjects(gt_images[image_idx].id, gt_img_objects, gt_img_object_data);
1131             for (size_t obj_idx = 0; obj_idx < gt_img_objects.size(); ++obj_idx)
1132             {
1133                 if (gt_img_objects[obj_idx].object_class == obj_class)
1134                 {
1135                     if ((gt_img_object_data[obj_idx].difficult == false) || (ignore_difficult == false))
1136                         ++total_relevant;
1137                 }
1138             }
1139         }
1140     }
1141
1142     /* iterate through objects */
1143     vector<ObdObject> img_objects;
1144     vector<VocObjectData> img_object_data;
1145     int total_objects = 0;
1146     for (size_t image_idx = 0; image_idx < images.size(); ++image_idx)
1147     {
1148         /* if using the score as the break condition, check for it now */
1149         if (cond == CV_VOC_CCOND_SCORETHRESH)
1150         {
1151             if (scores_flat[ranking[image_idx]] <= threshold) break;
1152         }
1153         /* increment the image counter for later normalization */
1154         ++total_objects;
1155         /* for each image retrieve the objects contained */
1156         getObjects(images[ranking[image_idx]].id, img_objects, img_object_data);
1157
1158         //find the ground truth object which has the highest overlap score with the detected object
1159         float maxov = -1.0;
1160         int max_gt_obj_idx = -1;
1161         //-- for each detected object iterate through objects present in ground truth --
1162         for (size_t gt_obj_idx = 0; gt_obj_idx < img_objects.size(); ++gt_obj_idx)
1163         {
1164             //check difficulty flag
1165             if (ignore_difficult || (img_object_data[gt_obj_idx].difficult == false))
1166             {
1167                 //if the class matches, then check if the detected object and ground truth object overlap by a sufficient margin
1168                 float ov = testBoundingBoxesForOverlap(bounding_boxes_flat[ranking[image_idx]], img_objects[gt_obj_idx].boundingBox);
1169                 if (ov != -1.f)
1170                 {
1171                     //if all conditions are met store the overlap score and index (as objects are assigned to the highest scoring match)
1172                     if (ov > maxov)
1173                     {
1174                         maxov = ov;
1175                         max_gt_obj_idx = (int)gt_obj_idx;
1176                     }
1177                 }
1178             }
1179         }
1180
1181         //assign to appropriate object class if an object was detected
1182         if (maxov != -1.0)
1183         {
1184             //find the index of the currently considered object
1185             vector<string>::iterator class_idx_it = std::find(output_headers.begin(),output_headers.end(),img_objects[max_gt_obj_idx].object_class);
1186             //if the class name extracted from the ground truth file could not be found in the list of available classes, raise an exception
1187             if (class_idx_it == output_headers.end())
1188             {
1189                 string err_msg = "could not find object class '" + img_objects[max_gt_obj_idx].object_class + "' specified in the ground truth file of '" + images[ranking[image_idx]].id +"'in list of valid classes.";
1190                 CV_Error(CV_StsError,err_msg.c_str());
1191             }
1192             /* convert iterator to index */
1193             int class_idx = (int)std::distance(output_headers.begin(),class_idx_it);
1194             //add to confusion matrix row in proportion
1195             output_values[class_idx] += 1.0;
1196         } else {
1197             //otherwise assign to background class
1198             output_values[output_values.size()-1] += 1.0;
1199         }
1200
1201         //check break conditions if breaking on certain level of recall
1202         if (cond == CV_VOC_CCOND_RECALL)
1203         {
1204             if(static_cast<float>(retrieved_hits)/static_cast<float>(total_relevant) >= threshold) break;
1205         }
1206     }
1207
1208     /* finally, normalize confusion matrix row */
1209     for (vector<float>::iterator it = output_values.begin(); it < output_values.end(); ++it)
1210     {
1211         (*it) /= static_cast<float>(total_objects);
1212     }
1213 }
1214
1215 //Save Precision-Recall results to a p-r curve in GNUPlot format
1216 //--------------------------------------------------------------
1217 //INPUTS:
1218 // - output_file        The file to which to save the GNUPlot data file. If only a filename is specified, the data
1219 //                      file is saved to the standard VOC results directory.
1220 // - precision          Vector of precisions as returned from calcClassifier/DetectorPrecRecall
1221 // - recall             Vector of recalls as returned from calcClassifier/DetectorPrecRecall
1222 // - ap                ap as returned from calcClassifier/DetectorPrecRecall
1223 // - (title)            Title to use for the plot (if not specified, just the ap is printed as the title)
1224 //                      This also specifies the filename of the output file if printing to pdf
1225 // - (plot_type)        Specifies whether to instruct GNUPlot to save to a PDF file (CV_VOC_PLOT_PDF) or directly
1226 //                      to screen (CV_VOC_PLOT_SCREEN) in the datafile
1227 //NOTES:
1228 // The GNUPlot data file can be executed using GNUPlot from the commandline in the following way:
1229 //      >> GNUPlot <output_file>
1230 // This will then display the p-r curve on the screen or save it to a pdf file depending on plot_type
1231
1232 void VocData::savePrecRecallToGnuplot(const string& output_file, const vector<float>& precision, const vector<float>& recall, const float ap, const string title, const VocPlotType plot_type)
1233 {
1234     string output_file_std = checkFilenamePathsep(output_file);
1235
1236     //if no directory is specified, by default save the output file in the results directory
1237 //    if (output_file_std.find("/") == output_file_std.npos)
1238 //    {
1239 //        output_file_std = m_results_directory + output_file_std;
1240 //    }
1241
1242     std::ofstream plot_file(output_file_std.c_str());
1243
1244     if (plot_file.is_open())
1245     {
1246         plot_file << "set xrange [0:1]" << endl;
1247         plot_file << "set yrange [0:1]" << endl;
1248         plot_file << "set size square" << endl;
1249         string title_text = title;
1250         if (title_text.size() == 0) title_text = "Precision-Recall Curve";
1251         plot_file << "set title \"" << title_text << " (ap: " << ap << ")\"" << endl;
1252         plot_file << "set xlabel \"Recall\"" << endl;
1253         plot_file << "set ylabel \"Precision\"" << endl;
1254         plot_file << "set style data lines" << endl;
1255         plot_file << "set nokey" << endl;
1256         if (plot_type == CV_VOC_PLOT_PNG)
1257         {
1258             plot_file << "set terminal png" << endl;
1259             string pdf_filename;
1260             if (title.size() != 0)
1261             {
1262                 pdf_filename = title;
1263             } else {
1264                 pdf_filename = "prcurve";
1265             }
1266             plot_file << "set out \"" << title << ".png\"" << endl;
1267         }
1268         plot_file << "plot \"-\" using 1:2" << endl;
1269         plot_file << "# X Y" << endl;
1270         CV_Assert(precision.size() == recall.size());
1271         for (size_t i = 0; i < precision.size(); ++i)
1272         {
1273             plot_file << "  " << recall[i] << " " << precision[i] << endl;
1274         }
1275         plot_file << "end" << endl;
1276         if (plot_type == CV_VOC_PLOT_SCREEN)
1277         {
1278             plot_file << "pause -1" << endl;
1279         }
1280         plot_file.close();
1281     } else {
1282         string err_msg = "could not open plot file '" + output_file_std + "' for writing.";
1283         CV_Error(CV_StsError,err_msg.c_str());
1284     }
1285 }
1286
1287 void VocData::readClassifierGroundTruth(const string& obj_class, const ObdDatasetType dataset, vector<ObdImage>& images, vector<char>& object_present)
1288 {
1289     images.clear();
1290
1291     string gtFilename = m_class_imageset_path;
1292     gtFilename.replace(gtFilename.find("%s"),2,obj_class);
1293     if (dataset == CV_OBD_TRAIN)
1294     {
1295         gtFilename.replace(gtFilename.find("%s"),2,m_train_set);
1296     } else {
1297         gtFilename.replace(gtFilename.find("%s"),2,m_test_set);
1298     }
1299
1300     vector<string> image_codes;
1301     readClassifierGroundTruth(gtFilename, image_codes, object_present);
1302
1303     convertImageCodesToObdImages(image_codes, images);
1304 }
1305
1306 void VocData::readClassifierResultsFile(const std:: string& input_file, vector<ObdImage>& images, vector<float>& scores)
1307 {
1308     images.clear();
1309
1310     string input_file_std = checkFilenamePathsep(input_file);
1311
1312     //if no directory is specified, by default search for the input file in the results directory
1313 //    if (input_file_std.find("/") == input_file_std.npos)
1314 //    {
1315 //        input_file_std = m_results_directory + input_file_std;
1316 //    }
1317
1318     vector<string> image_codes;
1319     readClassifierResultsFile(input_file_std, image_codes, scores);
1320
1321     convertImageCodesToObdImages(image_codes, images);
1322 }
1323
1324 void VocData::readDetectorResultsFile(const string& input_file, vector<ObdImage>& images, vector<vector<float> >& scores, vector<vector<Rect> >& bounding_boxes)
1325 {
1326     images.clear();
1327
1328     string input_file_std = checkFilenamePathsep(input_file);
1329
1330     //if no directory is specified, by default search for the input file in the results directory
1331 //    if (input_file_std.find("/") == input_file_std.npos)
1332 //    {
1333 //        input_file_std = m_results_directory + input_file_std;
1334 //    }
1335
1336     vector<string> image_codes;
1337     readDetectorResultsFile(input_file_std, image_codes, scores, bounding_boxes);
1338
1339     convertImageCodesToObdImages(image_codes, images);
1340 }
1341
1342 const vector<string>& VocData::getObjectClasses()
1343 {
1344     return m_object_classes;
1345 }
1346
1347 //string VocData::getResultsDirectory()
1348 //{
1349 //    return m_results_directory;
1350 //}
1351
1352 //---------------------------------------------------------
1353 // Protected Functions ------------------------------------
1354 //---------------------------------------------------------
1355
1356 static string getVocName( const string& vocPath )
1357 {
1358     size_t found = vocPath.rfind( '/' );
1359     if( found == string::npos )
1360     {
1361         found = vocPath.rfind( '\\' );
1362         if( found == string::npos )
1363             return vocPath;
1364     }
1365     return vocPath.substr(found + 1, vocPath.size() - found);
1366 }
1367
1368 void VocData::initVoc( const string& vocPath, const bool useTestDataset )
1369 {
1370     initVoc2007to2010( vocPath, useTestDataset );
1371 }
1372
1373 //Initialize file paths and settings for the VOC 2010 dataset
1374 //-----------------------------------------------------------
1375 void VocData::initVoc2007to2010( const string& vocPath, const bool useTestDataset )
1376 {
1377     //check format of root directory and modify if necessary
1378
1379     m_vocName = getVocName( vocPath );
1380
1381     CV_Assert( !m_vocName.compare("VOC2007") || !m_vocName.compare("VOC2008") ||
1382                !m_vocName.compare("VOC2009") || !m_vocName.compare("VOC2010") );
1383
1384     m_vocPath = checkFilenamePathsep( vocPath, true );
1385
1386     if (useTestDataset)
1387     {
1388         m_train_set = "trainval";
1389         m_test_set = "test";
1390     } else {
1391         m_train_set = "train";
1392         m_test_set = "val";
1393     }
1394
1395     // initialize main classification/detection challenge paths
1396     m_annotation_path = m_vocPath + "/Annotations/%s.xml";
1397     m_image_path = m_vocPath + "/JPEGImages/%s.jpg";
1398     m_imageset_path = m_vocPath + "/ImageSets/Main/%s.txt";
1399     m_class_imageset_path = m_vocPath + "/ImageSets/Main/%s_%s.txt";
1400
1401     //define available object_classes for VOC2010 dataset
1402     m_object_classes.push_back("aeroplane");
1403     m_object_classes.push_back("bicycle");
1404     m_object_classes.push_back("bird");
1405     m_object_classes.push_back("boat");
1406     m_object_classes.push_back("bottle");
1407     m_object_classes.push_back("bus");
1408     m_object_classes.push_back("car");
1409     m_object_classes.push_back("cat");
1410     m_object_classes.push_back("chair");
1411     m_object_classes.push_back("cow");
1412     m_object_classes.push_back("diningtable");
1413     m_object_classes.push_back("dog");
1414     m_object_classes.push_back("horse");
1415     m_object_classes.push_back("motorbike");
1416     m_object_classes.push_back("person");
1417     m_object_classes.push_back("pottedplant");
1418     m_object_classes.push_back("sheep");
1419     m_object_classes.push_back("sofa");
1420     m_object_classes.push_back("train");
1421     m_object_classes.push_back("tvmonitor");
1422
1423     m_min_overlap = 0.5;
1424
1425     //up until VOC 2010, ap was calculated by sampling p-r curve, not taking complete curve
1426     m_sampled_ap = ((m_vocName == "VOC2007") || (m_vocName == "VOC2008") || (m_vocName == "VOC2009"));
1427 }
1428
1429 //Read a VOC classification ground truth text file for a given object class and dataset
1430 //-------------------------------------------------------------------------------------
1431 //INPUTS:
1432 // - filename           The path of the text file to read
1433 //OUTPUTS:
1434 // - image_codes        VOC image codes extracted from the GT file in the form 20XX_XXXXXX where the first four
1435 //                          digits specify the year of the dataset, and the last group specifies a unique ID
1436 // - object_present     For each image in the 'image_codes' array, specifies whether the object class described
1437 //                          in the loaded GT file is present or not
1438 void VocData::readClassifierGroundTruth(const string& filename, vector<string>& image_codes, vector<char>& object_present)
1439 {
1440     image_codes.clear();
1441     object_present.clear();
1442
1443     std::ifstream gtfile(filename.c_str());
1444     if (!gtfile.is_open())
1445     {
1446         string err_msg = "could not open VOC ground truth textfile '" + filename + "'.";
1447         CV_Error(CV_StsError,err_msg.c_str());
1448     }
1449
1450     string line;
1451     string image;
1452     int obj_present = 0;
1453     while (!gtfile.eof())
1454     {
1455         std::getline(gtfile,line);
1456         std::istringstream iss(line);
1457         iss >> image >> obj_present;
1458         if (!iss.fail())
1459         {
1460             image_codes.push_back(image);
1461             object_present.push_back(obj_present == 1);
1462         } else {
1463             if (!gtfile.eof()) CV_Error(CV_StsParseError,"error parsing VOC ground truth textfile.");
1464         }
1465     }
1466     gtfile.close();
1467 }
1468
1469 void VocData::readClassifierResultsFile(const string& input_file, vector<string>& image_codes, vector<float>& scores)
1470 {
1471     //check if results file exists
1472     std::ifstream result_file(input_file.c_str());
1473     if (result_file.is_open())
1474     {
1475         string line;
1476         string image;
1477         float score;
1478         //read in the results file
1479         while (!result_file.eof())
1480         {
1481             std::getline(result_file,line);
1482             std::istringstream iss(line);
1483             iss >> image >> score;
1484             if (!iss.fail())
1485             {
1486                 image_codes.push_back(image);
1487                 scores.push_back(score);
1488             } else {
1489                 if(!result_file.eof()) CV_Error(CV_StsParseError,"error parsing VOC classifier results file.");
1490             }
1491         }
1492         result_file.close();
1493     } else {
1494         string err_msg = "could not open classifier results file '" + input_file + "' for reading.";
1495         CV_Error(CV_StsError,err_msg.c_str());
1496     }
1497 }
1498
1499 void VocData::readDetectorResultsFile(const string& input_file, vector<string>& image_codes, vector<vector<float> >& scores, vector<vector<Rect> >& bounding_boxes)
1500 {
1501     image_codes.clear();
1502     scores.clear();
1503     bounding_boxes.clear();
1504
1505     //check if results file exists
1506     std::ifstream result_file(input_file.c_str());
1507     if (result_file.is_open())
1508     {
1509         string line;
1510         string image;
1511         Rect bounding_box;
1512         float score;
1513         //read in the results file
1514         while (!result_file.eof())
1515         {
1516             std::getline(result_file,line);
1517             std::istringstream iss(line);
1518             iss >> image >> score >> bounding_box.x >> bounding_box.y >> bounding_box.width >> bounding_box.height;
1519             if (!iss.fail())
1520             {
1521                 //convert right and bottom positions to width and height
1522                 bounding_box.width -= bounding_box.x;
1523                 bounding_box.height -= bounding_box.y;
1524                 //convert to 0-indexing
1525                 bounding_box.x -= 1;
1526                 bounding_box.y -= 1;
1527                 //store in output vectors
1528                 /* first check if the current image code has been seen before */
1529                 vector<string>::iterator image_codes_it = std::find(image_codes.begin(),image_codes.end(),image);
1530                 if (image_codes_it == image_codes.end())
1531                 {
1532                     image_codes.push_back(image);
1533                     vector<float> score_vect(1);
1534                     score_vect[0] = score;
1535                     scores.push_back(score_vect);
1536                     vector<Rect> bounding_box_vect(1);
1537                     bounding_box_vect[0] = bounding_box;
1538                     bounding_boxes.push_back(bounding_box_vect);
1539                 } else {
1540                     /* if the image index has been seen before, add the current object below it in the 2D arrays */
1541                     int image_idx = (int)std::distance(image_codes.begin(),image_codes_it);
1542                     scores[image_idx].push_back(score);
1543                     bounding_boxes[image_idx].push_back(bounding_box);
1544                 }
1545             } else {
1546                 if(!result_file.eof()) CV_Error(CV_StsParseError,"error parsing VOC detector results file.");
1547             }
1548         }
1549         result_file.close();
1550     } else {
1551         string err_msg = "could not open detector results file '" + input_file + "' for reading.";
1552         CV_Error(CV_StsError,err_msg.c_str());
1553     }
1554 }
1555
1556
1557 //Read a VOC annotation xml file for a given image
1558 //------------------------------------------------
1559 //INPUTS:
1560 // - filename           The path of the xml file to read
1561 //OUTPUTS:
1562 // - objects            Array of VocObject describing all object instances present in the given image
1563 void VocData::extractVocObjects(const string filename, vector<ObdObject>& objects, vector<VocObjectData>& object_data)
1564 {
1565 #ifdef PR_DEBUG
1566     int block = 1;
1567     cout << "SAMPLE VOC OBJECT EXTRACTION for " << filename << ":" << endl;
1568 #endif
1569     objects.clear();
1570     object_data.clear();
1571
1572     string contents, object_contents, tag_contents;
1573
1574     readFileToString(filename, contents);
1575
1576     //keep on extracting 'object' blocks until no more can be found
1577     if (extractXMLBlock(contents, "annotation", 0, contents) != -1)
1578     {
1579         int searchpos = 0;
1580         searchpos = extractXMLBlock(contents, "object", searchpos, object_contents);
1581         while (searchpos != -1)
1582         {
1583 #ifdef PR_DEBUG
1584             cout << "SEARCHPOS:" << searchpos << endl;
1585             cout << "start block " << block << " ---------" << endl;
1586             cout << object_contents << endl;
1587             cout << "end block " << block << " -----------" << endl;
1588             ++block;
1589 #endif
1590
1591             ObdObject object;
1592             VocObjectData object_d;
1593
1594             //object class -------------
1595
1596             if (extractXMLBlock(object_contents, "name", 0, tag_contents) == -1) CV_Error(CV_StsError,"missing <name> tag in object definition of '" + filename + "'");
1597             object.object_class.swap(tag_contents);
1598
1599             //object bounding box -------------
1600
1601             int xmax, xmin, ymax, ymin;
1602
1603             if (extractXMLBlock(object_contents, "xmax", 0, tag_contents) == -1) CV_Error(CV_StsError,"missing <xmax> tag in object definition of '" + filename + "'");
1604             xmax = stringToInteger(tag_contents);
1605
1606             if (extractXMLBlock(object_contents, "xmin", 0, tag_contents) == -1) CV_Error(CV_StsError,"missing <xmin> tag in object definition of '" + filename + "'");
1607             xmin = stringToInteger(tag_contents);
1608
1609             if (extractXMLBlock(object_contents, "ymax", 0, tag_contents) == -1) CV_Error(CV_StsError,"missing <ymax> tag in object definition of '" + filename + "'");
1610             ymax = stringToInteger(tag_contents);
1611
1612             if (extractXMLBlock(object_contents, "ymin", 0, tag_contents) == -1) CV_Error(CV_StsError,"missing <ymin> tag in object definition of '" + filename + "'");
1613             ymin = stringToInteger(tag_contents);
1614
1615             object.boundingBox.x = xmin-1;      //convert to 0-based indexing
1616             object.boundingBox.width = xmax - xmin;
1617             object.boundingBox.y = ymin-1;
1618             object.boundingBox.height = ymax - ymin;
1619
1620             CV_Assert(xmin != 0);
1621             CV_Assert(xmax > xmin);
1622             CV_Assert(ymin != 0);
1623             CV_Assert(ymax > ymin);
1624
1625
1626             //object tags -------------
1627
1628             if (extractXMLBlock(object_contents, "difficult", 0, tag_contents) != -1)
1629             {
1630                 object_d.difficult = (tag_contents == "1");
1631             } else object_d.difficult = false;
1632             if (extractXMLBlock(object_contents, "occluded", 0, tag_contents) != -1)
1633             {
1634                 object_d.occluded = (tag_contents == "1");
1635             } else object_d.occluded = false;
1636             if (extractXMLBlock(object_contents, "truncated", 0, tag_contents) != -1)
1637             {
1638                 object_d.truncated = (tag_contents == "1");
1639             } else object_d.truncated = false;
1640             if (extractXMLBlock(object_contents, "pose", 0, tag_contents) != -1)
1641             {
1642                 if (tag_contents == "Frontal") object_d.pose = CV_VOC_POSE_FRONTAL;
1643                 if (tag_contents == "Rear") object_d.pose = CV_VOC_POSE_REAR;
1644                 if (tag_contents == "Left") object_d.pose = CV_VOC_POSE_LEFT;
1645                 if (tag_contents == "Right") object_d.pose = CV_VOC_POSE_RIGHT;
1646             }
1647
1648             //add to array of objects
1649             objects.push_back(object);
1650             object_data.push_back(object_d);
1651
1652             //extract next 'object' block from file if it exists
1653             searchpos = extractXMLBlock(contents, "object", searchpos, object_contents);
1654         }
1655     }
1656 }
1657
1658 //Converts an image identifier string in the format YYYY_XXXXXX to a single index integer of form XXXXXXYYYY
1659 //where Y represents a year and returns the image path
1660 //----------------------------------------------------------------------------------------------------------
1661 string VocData::getImagePath(const string& input_str)
1662 {
1663     string path = m_image_path;
1664     path.replace(path.find("%s"),2,input_str);
1665     return path;
1666 }
1667
1668 //Tests two boundary boxes for overlap (using the intersection over union metric) and returns the overlap if the objects
1669 //defined by the two bounding boxes are considered to be matched according to the criterion outlined in
1670 //the VOC documentation [namely intersection/union > some threshold] otherwise returns -1.0 (no match)
1671 //----------------------------------------------------------------------------------------------------------
1672 float VocData::testBoundingBoxesForOverlap(const Rect detection, const Rect ground_truth)
1673 {
1674     int detection_x2 = detection.x + detection.width;
1675     int detection_y2 = detection.y + detection.height;
1676     int ground_truth_x2 = ground_truth.x + ground_truth.width;
1677     int ground_truth_y2 = ground_truth.y + ground_truth.height;
1678     //first calculate the boundaries of the intersection of the rectangles
1679     int intersection_x = std::max(detection.x, ground_truth.x); //rightmost left
1680     int intersection_y = std::max(detection.y, ground_truth.y); //bottommost top
1681     int intersection_x2 = std::min(detection_x2, ground_truth_x2); //leftmost right
1682     int intersection_y2 = std::min(detection_y2, ground_truth_y2); //topmost bottom
1683     //then calculate the width and height of the intersection rect
1684     int intersection_width = intersection_x2 - intersection_x + 1;
1685     int intersection_height = intersection_y2 - intersection_y + 1;
1686     //if there is no overlap then return false straight away
1687     if ((intersection_width <= 0) || (intersection_height <= 0)) return -1.0;
1688     //otherwise calculate the intersection
1689     int intersection_area = intersection_width*intersection_height;
1690
1691     //now calculate the union
1692     int union_area = (detection.width+1)*(detection.height+1) + (ground_truth.width+1)*(ground_truth.height+1) - intersection_area;
1693
1694     //calculate the intersection over union and use as threshold as per VOC documentation
1695     float overlap = static_cast<float>(intersection_area)/static_cast<float>(union_area);
1696     if (overlap > m_min_overlap)
1697     {
1698         return overlap;
1699     } else {
1700         return -1.0;
1701     }
1702 }
1703
1704 //Extracts the object class and dataset from the filename of a VOC standard results text file, which takes
1705 //the format 'comp<n>_{cls/det}_<dataset>_<objclass>.txt'
1706 //----------------------------------------------------------------------------------------------------------
1707 void VocData::extractDataFromResultsFilename(const string& input_file, string& class_name, string& dataset_name)
1708 {
1709     string input_file_std = checkFilenamePathsep(input_file);
1710
1711     size_t fnamestart = input_file_std.rfind("/");
1712     size_t fnameend = input_file_std.rfind(".txt");
1713
1714     if ((fnamestart == input_file_std.npos) || (fnameend == input_file_std.npos))
1715         CV_Error(CV_StsError,"Could not extract filename of results file.");
1716
1717     ++fnamestart;
1718     if (fnamestart >= fnameend)
1719         CV_Error(CV_StsError,"Could not extract filename of results file.");
1720
1721     //extract dataset and class names, triggering exception if the filename format is not correct
1722     string filename = input_file_std.substr(fnamestart, fnameend-fnamestart);
1723     size_t datasetstart = filename.find("_");
1724     datasetstart = filename.find("_",datasetstart+1);
1725     size_t classstart = filename.find("_",datasetstart+1);
1726     //allow for appended index after a further '_' by discarding this part if it exists
1727     size_t classend = filename.find("_",classstart+1);
1728     if (classend == filename.npos) classend = filename.size();
1729     if ((datasetstart == filename.npos) || (classstart == filename.npos))
1730         CV_Error(CV_StsError,"Error parsing results filename. Is it in standard format of 'comp<n>_{cls/det}_<dataset>_<objclass>.txt'?");
1731     ++datasetstart;
1732     ++classstart;
1733     if (((datasetstart-classstart) < 1) || ((classend-datasetstart) < 1))
1734         CV_Error(CV_StsError,"Error parsing results filename. Is it in standard format of 'comp<n>_{cls/det}_<dataset>_<objclass>.txt'?");
1735
1736     dataset_name = filename.substr(datasetstart,classstart-datasetstart-1);
1737     class_name = filename.substr(classstart,classend-classstart);
1738 }
1739
1740 bool VocData::getClassifierGroundTruthImage(const string& obj_class, const string& id)
1741 {
1742     /* if the classifier ground truth data for all images of the current class has not been loaded yet, load it now */
1743     if (m_classifier_gt_all_ids.empty() || (m_classifier_gt_class != obj_class))
1744     {
1745         m_classifier_gt_all_ids.clear();
1746         m_classifier_gt_all_present.clear();
1747         m_classifier_gt_class = obj_class;
1748         for (int i=0; i<2; ++i) //run twice (once over test set and once over training set)
1749         {
1750             //generate the filename of the classification ground-truth textfile for the object class
1751             string gtFilename = m_class_imageset_path;
1752             gtFilename.replace(gtFilename.find("%s"),2,obj_class);
1753             if (i == 0)
1754             {
1755                 gtFilename.replace(gtFilename.find("%s"),2,m_train_set);
1756             } else {
1757                 gtFilename.replace(gtFilename.find("%s"),2,m_test_set);
1758             }
1759
1760             //parse the ground truth file, storing in two separate vectors
1761             //for the image code and the ground truth value
1762             vector<string> image_codes;
1763             vector<char> object_present;
1764             readClassifierGroundTruth(gtFilename, image_codes, object_present);
1765
1766             m_classifier_gt_all_ids.insert(m_classifier_gt_all_ids.end(),image_codes.begin(),image_codes.end());
1767             m_classifier_gt_all_present.insert(m_classifier_gt_all_present.end(),object_present.begin(),object_present.end());
1768
1769             CV_Assert(m_classifier_gt_all_ids.size() == m_classifier_gt_all_present.size());
1770         }
1771     }
1772
1773
1774     //search for the image code
1775     vector<string>::iterator it = find (m_classifier_gt_all_ids.begin(), m_classifier_gt_all_ids.end(), id);
1776     if (it != m_classifier_gt_all_ids.end())
1777     {
1778         //image found, so return corresponding ground truth
1779         return m_classifier_gt_all_present[std::distance(m_classifier_gt_all_ids.begin(),it)] != 0;
1780     } else {
1781         string err_msg = "could not find classifier ground truth for image '" + id + "' and class '" + obj_class + "'";
1782         CV_Error(CV_StsError,err_msg.c_str());
1783     }
1784
1785     return true;
1786 }
1787
1788 //-------------------------------------------------------------------
1789 // Protected Functions (utility) ------------------------------------
1790 //-------------------------------------------------------------------
1791
1792 //returns a vector containing indexes of the input vector in sorted ascending/descending order
1793 void VocData::getSortOrder(const vector<float>& values, vector<size_t>& order, bool descending)
1794 {
1795     /* 1. store sorting order in 'order_pair' */
1796     vector<std::pair<size_t, vector<float>::const_iterator> > order_pair(values.size());
1797
1798     size_t n = 0;
1799     for (vector<float>::const_iterator it = values.begin(); it != values.end(); ++it, ++n)
1800         order_pair[n] = make_pair(n, it);
1801
1802     std::sort(order_pair.begin(),order_pair.end(),orderingSorter());
1803     if (descending == false) std::reverse(order_pair.begin(),order_pair.end());
1804
1805     vector<size_t>(order_pair.size()).swap(order);
1806     for (size_t i = 0; i < order_pair.size(); ++i)
1807     {
1808         order[i] = order_pair[i].first;
1809     }
1810 }
1811
1812 void VocData::readFileToString(const string filename, string& file_contents)
1813 {
1814     std::ifstream ifs(filename.c_str());
1815     if (!ifs.is_open()) CV_Error(CV_StsError,"could not open text file");
1816
1817     stringstream oss;
1818     oss << ifs.rdbuf();
1819
1820     file_contents = oss.str();
1821 }
1822
1823 int VocData::stringToInteger(const string input_str)
1824 {
1825     int result = 0;
1826
1827     stringstream ss(input_str);
1828     if ((ss >> result).fail())
1829     {
1830         CV_Error(CV_StsBadArg,"could not perform string to integer conversion");
1831     }
1832     return result;
1833 }
1834
1835 string VocData::integerToString(const int input_int)
1836 {
1837     string result;
1838
1839     stringstream ss;
1840     if ((ss << input_int).fail())
1841     {
1842         CV_Error(CV_StsBadArg,"could not perform integer to string conversion");
1843     }
1844     result = ss.str();
1845     return result;
1846 }
1847
1848 string VocData::checkFilenamePathsep( const string filename, bool add_trailing_slash )
1849 {
1850     string filename_new = filename;
1851
1852     size_t pos = filename_new.find("\\\\");
1853     while (pos != filename_new.npos)
1854     {
1855         filename_new.replace(pos,2,"/");
1856         pos = filename_new.find("\\\\", pos);
1857     }
1858     pos = filename_new.find("\\");
1859     while (pos != filename_new.npos)
1860     {
1861         filename_new.replace(pos,1,"/");
1862         pos = filename_new.find("\\", pos);
1863     }
1864     if (add_trailing_slash)
1865     {
1866         //add training slash if this is missing
1867         if (filename_new.rfind("/") != filename_new.length()-1) filename_new += "/";
1868     }
1869
1870     return filename_new;
1871 }
1872
1873 void VocData::convertImageCodesToObdImages(const vector<string>& image_codes, vector<ObdImage>& images)
1874 {
1875     images.clear();
1876     images.reserve(image_codes.size());
1877
1878     string path;
1879     //transfer to output arrays
1880     for (size_t i = 0; i < image_codes.size(); ++i)
1881     {
1882         //generate image path and indices from extracted string code
1883         path = getImagePath(image_codes[i]);
1884         images.push_back(ObdImage(image_codes[i], path));
1885     }
1886 }
1887
1888 //Extract text from within a given tag from an XML file
1889 //-----------------------------------------------------
1890 //INPUTS:
1891 // - src            XML source file
1892 // - tag            XML tag delimiting block to extract
1893 // - searchpos      position within src at which to start search
1894 //OUTPUTS:
1895 // - tag_contents   text extracted between <tag> and </tag> tags
1896 //RETURN VALUE:
1897 // - the position of the final character extracted in tag_contents within src
1898 //      (can be used to call extractXMLBlock recursively to extract multiple blocks)
1899 //      returns -1 if the tag could not be found
1900 int VocData::extractXMLBlock(const string src, const string tag, const int searchpos, string& tag_contents)
1901 {
1902     size_t startpos, next_startpos, endpos;
1903     int embed_count = 1;
1904
1905     //find position of opening tag
1906     startpos = src.find("<" + tag + ">", searchpos);
1907     if (startpos == string::npos) return -1;
1908
1909     //initialize endpos -
1910     // start searching for end tag anywhere after opening tag
1911     endpos = startpos;
1912
1913     //find position of next opening tag
1914     next_startpos = src.find("<" + tag + ">", startpos+1);
1915
1916     //match opening tags with closing tags, and only
1917     //accept final closing tag of same level as original
1918     //opening tag
1919     while (embed_count > 0)
1920     {
1921         endpos = src.find("</" + tag + ">", endpos+1);
1922         if (endpos == string::npos) return -1;
1923
1924         //the next code is only executed if there are embedded tags with the same name
1925         if (next_startpos != string::npos)
1926         {
1927             while (next_startpos<endpos)
1928             {
1929                 //counting embedded start tags
1930                 ++embed_count;
1931                 next_startpos = src.find("<" + tag + ">", next_startpos+1);
1932                 if (next_startpos == string::npos) break;
1933             }
1934         }
1935         //passing end tag so decrement nesting level
1936         --embed_count;
1937     }
1938
1939     //finally, extract the tag region
1940     startpos += tag.length() + 2;
1941     if (startpos > src.length()) return -1;
1942     if (endpos > src.length()) return -1;
1943     tag_contents = src.substr(startpos,endpos-startpos);
1944     return static_cast<int>(endpos);
1945 }
1946
1947 /****************************************************************************************\
1948 *                            Sample on image classification                             *
1949 \****************************************************************************************/
1950 //
1951 // This part of the code was a little refactor
1952 //
1953 struct DDMParams
1954 {
1955     DDMParams() : detectorType("SURF"), descriptorType("SURF"), matcherType("BruteForce") {}
1956     DDMParams( const string _detectorType, const string _descriptorType, const string& _matcherType ) :
1957         detectorType(_detectorType), descriptorType(_descriptorType), matcherType(_matcherType){}
1958     void read( const FileNode& fn )
1959     {
1960         fn["detectorType"] >> detectorType;
1961         fn["descriptorType"] >> descriptorType;
1962         fn["matcherType"] >> matcherType;
1963     }
1964     void write( FileStorage& fs ) const
1965     {
1966         fs << "detectorType" << detectorType;
1967         fs << "descriptorType" << descriptorType;
1968         fs << "matcherType" << matcherType;
1969     }
1970     void print() const
1971     {
1972         cout << "detectorType: " << detectorType << endl;
1973         cout << "descriptorType: " << descriptorType << endl;
1974         cout << "matcherType: " << matcherType << endl;
1975     }
1976
1977     string detectorType;
1978     string descriptorType;
1979     string matcherType;
1980 };
1981
1982 struct VocabTrainParams
1983 {
1984     VocabTrainParams() : trainObjClass("chair"), vocabSize(1000), memoryUse(200), descProportion(0.3f) {}
1985     VocabTrainParams( const string _trainObjClass, size_t _vocabSize, size_t _memoryUse, float _descProportion ) :
1986             trainObjClass(_trainObjClass), vocabSize((int)_vocabSize), memoryUse((int)_memoryUse), descProportion(_descProportion) {}
1987     void read( const FileNode& fn )
1988     {
1989         fn["trainObjClass"] >> trainObjClass;
1990         fn["vocabSize"] >> vocabSize;
1991         fn["memoryUse"] >> memoryUse;
1992         fn["descProportion"] >> descProportion;
1993     }
1994     void write( FileStorage& fs ) const
1995     {
1996         fs << "trainObjClass" << trainObjClass;
1997         fs << "vocabSize" << vocabSize;
1998         fs << "memoryUse" << memoryUse;
1999         fs << "descProportion" << descProportion;
2000     }
2001     void print() const
2002     {
2003         cout << "trainObjClass: " << trainObjClass << endl;
2004         cout << "vocabSize: " << vocabSize << endl;
2005         cout << "memoryUse: " << memoryUse << endl;
2006         cout << "descProportion: " << descProportion << endl;
2007     }
2008
2009
2010     string trainObjClass; // Object class used for training visual vocabulary.
2011                           // It shouldn't matter which object class is specified here - visual vocab will still be the same.
2012     int vocabSize; //number of visual words in vocabulary to train
2013     int memoryUse; // Memory to preallocate (in MB) when training vocab.
2014                    // Change this depending on the size of the dataset/available memory.
2015     float descProportion; // Specifies the number of descriptors to use from each image as a proportion of the total num descs.
2016 };
2017
2018 struct SVMTrainParamsExt
2019 {
2020     SVMTrainParamsExt() : descPercent(0.5f), targetRatio(0.4f), balanceClasses(true) {}
2021     SVMTrainParamsExt( float _descPercent, float _targetRatio, bool _balanceClasses ) :
2022             descPercent(_descPercent), targetRatio(_targetRatio), balanceClasses(_balanceClasses) {}
2023     void read( const FileNode& fn )
2024     {
2025         fn["descPercent"] >> descPercent;
2026         fn["targetRatio"] >> targetRatio;
2027         fn["balanceClasses"] >> balanceClasses;
2028     }
2029     void write( FileStorage& fs ) const
2030     {
2031         fs << "descPercent" << descPercent;
2032         fs << "targetRatio" << targetRatio;
2033         fs << "balanceClasses" << balanceClasses;
2034     }
2035     void print() const
2036     {
2037         cout << "descPercent: " << descPercent << endl;
2038         cout << "targetRatio: " << targetRatio << endl;
2039         cout << "balanceClasses: " << balanceClasses << endl;
2040     }
2041
2042     float descPercent; // Percentage of extracted descriptors to use for training.
2043     float targetRatio; // Try to get this ratio of positive to negative samples (minimum).
2044     bool balanceClasses;    // Balance class weights by number of samples in each (if true cSvmTrainTargetRatio is ignored).
2045 };
2046
2047 static void readUsedParams( const FileNode& fn, string& vocName, DDMParams& ddmParams, VocabTrainParams& vocabTrainParams, SVMTrainParamsExt& svmTrainParamsExt )
2048 {
2049     fn["vocName"] >> vocName;
2050
2051     FileNode currFn = fn;
2052
2053     currFn = fn["ddmParams"];
2054     ddmParams.read( currFn );
2055
2056     currFn = fn["vocabTrainParams"];
2057     vocabTrainParams.read( currFn );
2058
2059     currFn = fn["svmTrainParamsExt"];
2060     svmTrainParamsExt.read( currFn );
2061 }
2062
2063 static void writeUsedParams( FileStorage& fs, const string& vocName, const DDMParams& ddmParams, const VocabTrainParams& vocabTrainParams, const SVMTrainParamsExt& svmTrainParamsExt )
2064 {
2065     fs << "vocName" << vocName;
2066
2067     fs << "ddmParams" << "{";
2068     ddmParams.write(fs);
2069     fs << "}";
2070
2071     fs << "vocabTrainParams" << "{";
2072     vocabTrainParams.write(fs);
2073     fs << "}";
2074
2075     fs << "svmTrainParamsExt" << "{";
2076     svmTrainParamsExt.write(fs);
2077     fs << "}";
2078 }
2079
2080 static void printUsedParams( const string& vocPath, const string& resDir,
2081                       const DDMParams& ddmParams, const VocabTrainParams& vocabTrainParams,
2082                       const SVMTrainParamsExt& svmTrainParamsExt )
2083 {
2084     cout << "CURRENT CONFIGURATION" << endl;
2085     cout << "----------------------------------------------------------------" << endl;
2086     cout << "vocPath: " << vocPath << endl;
2087     cout << "resDir: " << resDir << endl;
2088     cout << endl; ddmParams.print();
2089     cout << endl; vocabTrainParams.print();
2090     cout << endl; svmTrainParamsExt.print();
2091     cout << "----------------------------------------------------------------" << endl << endl;
2092 }
2093
2094 static bool readVocabulary( const string& filename, Mat& vocabulary )
2095 {
2096     cout << "Reading vocabulary...";
2097     FileStorage fs( filename, FileStorage::READ );
2098     if( fs.isOpened() )
2099     {
2100         fs["vocabulary"] >> vocabulary;
2101         cout << "done" << endl;
2102         return true;
2103     }
2104     return false;
2105 }
2106
2107 static bool writeVocabulary( const string& filename, const Mat& vocabulary )
2108 {
2109     cout << "Saving vocabulary..." << endl;
2110     FileStorage fs( filename, FileStorage::WRITE );
2111     if( fs.isOpened() )
2112     {
2113         fs << "vocabulary" << vocabulary;
2114         return true;
2115     }
2116     return false;
2117 }
2118
2119 static Mat trainVocabulary( const string& filename, VocData& vocData, const VocabTrainParams& trainParams,
2120                      const Ptr<FeatureDetector>& fdetector, const Ptr<DescriptorExtractor>& dextractor )
2121 {
2122     Mat vocabulary;
2123     if( !readVocabulary( filename, vocabulary) )
2124     {
2125         CV_Assert( dextractor->descriptorType() == CV_32FC1 );
2126         const int elemSize = CV_ELEM_SIZE(dextractor->descriptorType());
2127         const int descByteSize = dextractor->descriptorSize() * elemSize;
2128         const int bytesInMB = 1048576;
2129         const int maxDescCount = (trainParams.memoryUse * bytesInMB) / descByteSize; // Total number of descs to use for training.
2130
2131         cout << "Extracting VOC data..." << endl;
2132         vector<ObdImage> images;
2133         vector<char> objectPresent;
2134         vocData.getClassImages( trainParams.trainObjClass, CV_OBD_TRAIN, images, objectPresent );
2135
2136         cout << "Computing descriptors..." << endl;
2137         RNG& rng = theRNG();
2138         TermCriteria terminate_criterion;
2139         terminate_criterion.epsilon = FLT_EPSILON;
2140         BOWKMeansTrainer bowTrainer( trainParams.vocabSize, terminate_criterion, 3, KMEANS_PP_CENTERS );
2141
2142         while( images.size() > 0 )
2143         {
2144             if( bowTrainer.descripotorsCount() > maxDescCount )
2145             {
2146 #ifdef DEBUG_DESC_PROGRESS
2147                 cout << "Breaking due to full memory ( descriptors count = " << bowTrainer.descripotorsCount()
2148                         << "; descriptor size in bytes = " << descByteSize << "; all used memory = "
2149                         << bowTrainer.descripotorsCount()*descByteSize << endl;
2150 #endif
2151                 break;
2152             }
2153
2154             // Randomly pick an image from the dataset which hasn't yet been seen
2155             // and compute the descriptors from that image.
2156             int randImgIdx = rng( (unsigned)images.size() );
2157             Mat colorImage = imread( images[randImgIdx].path );
2158             vector<KeyPoint> imageKeypoints;
2159             fdetector->detect( colorImage, imageKeypoints );
2160             Mat imageDescriptors;
2161             dextractor->compute( colorImage, imageKeypoints, imageDescriptors );
2162
2163             //check that there were descriptors calculated for the current image
2164             if( !imageDescriptors.empty() )
2165             {
2166                 int descCount = imageDescriptors.rows;
2167                 // Extract trainParams.descProportion descriptors from the image, breaking if the 'allDescriptors' matrix becomes full
2168                 int descsToExtract = static_cast<int>(trainParams.descProportion * static_cast<float>(descCount));
2169                 // Fill mask of used descriptors
2170                 vector<char> usedMask( descCount, false );
2171                 fill( usedMask.begin(), usedMask.begin() + descsToExtract, true );
2172                 for( int i = 0; i < descCount; i++ )
2173                 {
2174                     int i1 = rng(descCount), i2 = rng(descCount);
2175                     char tmp = usedMask[i1]; usedMask[i1] = usedMask[i2]; usedMask[i2] = tmp;
2176                 }
2177
2178                 for( int i = 0; i < descCount; i++ )
2179                 {
2180                     if( usedMask[i] && bowTrainer.descripotorsCount() < maxDescCount )
2181                         bowTrainer.add( imageDescriptors.row(i) );
2182                 }
2183             }
2184
2185 #ifdef DEBUG_DESC_PROGRESS
2186             cout << images.size() << " images left, " << images[randImgIdx].id << " processed - "
2187                     <</* descs_extracted << "/" << image_descriptors.rows << " extracted - " << */
2188                     cvRound((static_cast<double>(bowTrainer.descripotorsCount())/static_cast<double>(maxDescCount))*100.0)
2189                     << " % memory used" << ( imageDescriptors.empty() ? " -> no descriptors extracted, skipping" : "") << endl;
2190 #endif
2191
2192             // Delete the current element from images so it is not added again
2193             images.erase( images.begin() + randImgIdx );
2194         }
2195
2196         cout << "Maximum allowed descriptor count: " << maxDescCount << ", Actual descriptor count: " << bowTrainer.descripotorsCount() << endl;
2197
2198         cout << "Training vocabulary..." << endl;
2199         vocabulary = bowTrainer.cluster();
2200
2201         if( !writeVocabulary(filename, vocabulary) )
2202         {
2203             cout << "Error: file " << filename << " can not be opened to write" << endl;
2204             exit(-1);
2205         }
2206     }
2207     return vocabulary;
2208 }
2209
2210 static bool readBowImageDescriptor( const string& file, Mat& bowImageDescriptor )
2211 {
2212     FileStorage fs( file, FileStorage::READ );
2213     if( fs.isOpened() )
2214     {
2215         fs["imageDescriptor"] >> bowImageDescriptor;
2216         return true;
2217     }
2218     return false;
2219 }
2220
2221 static bool writeBowImageDescriptor( const string& file, const Mat& bowImageDescriptor )
2222 {
2223     FileStorage fs( file, FileStorage::WRITE );
2224     if( fs.isOpened() )
2225     {
2226         fs << "imageDescriptor" << bowImageDescriptor;
2227         return true;
2228     }
2229     return false;
2230 }
2231
2232 // Load in the bag of words vectors for a set of images, from file if possible
2233 static void calculateImageDescriptors( const vector<ObdImage>& images, vector<Mat>& imageDescriptors,
2234                                 Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
2235                                 const string& resPath )
2236 {
2237     CV_Assert( !bowExtractor->getVocabulary().empty() );
2238     imageDescriptors.resize( images.size() );
2239
2240     for( size_t i = 0; i < images.size(); i++ )
2241     {
2242         string filename = resPath + bowImageDescriptorsDir + "/" + images[i].id + ".xml.gz";
2243         if( readBowImageDescriptor( filename, imageDescriptors[i] ) )
2244         {
2245 #ifdef DEBUG_DESC_PROGRESS
2246             cout << "Loaded bag of word vector for image " << i+1 << " of " << images.size() << " (" << images[i].id << ")" << endl;
2247 #endif
2248         }
2249         else
2250         {
2251             Mat colorImage = imread( images[i].path );
2252 #ifdef DEBUG_DESC_PROGRESS
2253             cout << "Computing descriptors for image " << i+1 << " of " << images.size() << " (" << images[i].id << ")" << flush;
2254 #endif
2255             vector<KeyPoint> keypoints;
2256             fdetector->detect( colorImage, keypoints );
2257 #ifdef DEBUG_DESC_PROGRESS
2258                 cout << " + generating BoW vector" << std::flush;
2259 #endif
2260             bowExtractor->compute( colorImage, keypoints, imageDescriptors[i] );
2261 #ifdef DEBUG_DESC_PROGRESS
2262             cout << " ...DONE " << static_cast<int>(static_cast<float>(i+1)/static_cast<float>(images.size())*100.0)
2263                  << " % complete" << endl;
2264 #endif
2265             if( !imageDescriptors[i].empty() )
2266             {
2267                 if( !writeBowImageDescriptor( filename, imageDescriptors[i] ) )
2268                 {
2269                     cout << "Error: file " << filename << "can not be opened to write bow image descriptor" << endl;
2270                     exit(-1);
2271                 }
2272             }
2273         }
2274     }
2275 }
2276
2277 static void removeEmptyBowImageDescriptors( vector<ObdImage>& images, vector<Mat>& bowImageDescriptors,
2278                                      vector<char>& objectPresent )
2279 {
2280     CV_Assert( !images.empty() );
2281     for( int i = (int)images.size() - 1; i >= 0; i-- )
2282     {
2283         bool res = bowImageDescriptors[i].empty();
2284         if( res )
2285         {
2286             cout << "Removing image " << images[i].id << " due to no descriptors..." << endl;
2287             images.erase( images.begin() + i );
2288             bowImageDescriptors.erase( bowImageDescriptors.begin() + i );
2289             objectPresent.erase( objectPresent.begin() + i );
2290         }
2291     }
2292 }
2293
2294 static void removeBowImageDescriptorsByCount( vector<ObdImage>& images, vector<Mat> bowImageDescriptors, vector<char> objectPresent,
2295                                        const SVMTrainParamsExt& svmParamsExt, int descsToDelete )
2296 {
2297     RNG& rng = theRNG();
2298     int pos_ex = (int)std::count( objectPresent.begin(), objectPresent.end(), (char)1 );
2299     int neg_ex = (int)std::count( objectPresent.begin(), objectPresent.end(), (char)0 );
2300
2301     while( descsToDelete != 0 )
2302     {
2303         int randIdx = rng((unsigned)images.size());
2304
2305         // Prefer positive training examples according to svmParamsExt.targetRatio if required
2306         if( objectPresent[randIdx] )
2307         {
2308             if( (static_cast<float>(pos_ex)/static_cast<float>(neg_ex+pos_ex)  < svmParamsExt.targetRatio) &&
2309                 (neg_ex > 0) && (svmParamsExt.balanceClasses == false) )
2310             { continue; }
2311             else
2312             { pos_ex--; }
2313         }
2314         else
2315         { neg_ex--; }
2316
2317         images.erase( images.begin() + randIdx );
2318         bowImageDescriptors.erase( bowImageDescriptors.begin() + randIdx );
2319         objectPresent.erase( objectPresent.begin() + randIdx );
2320
2321         descsToDelete--;
2322     }
2323     CV_Assert( bowImageDescriptors.size() == objectPresent.size() );
2324 }
2325
2326 static void setSVMParams( CvSVMParams& svmParams, CvMat& class_wts_cv, const Mat& responses, bool balanceClasses )
2327 {
2328     int pos_ex = countNonZero(responses == 1);
2329     int neg_ex = countNonZero(responses == -1);
2330     cout << pos_ex << " positive training samples; " << neg_ex << " negative training samples" << endl;
2331
2332     svmParams.svm_type = CvSVM::C_SVC;
2333     svmParams.kernel_type = CvSVM::RBF;
2334     if( balanceClasses )
2335     {
2336         Mat class_wts( 2, 1, CV_32FC1 );
2337         // The first training sample determines the '+1' class internally, even if it is negative,
2338         // so store whether this is the case so that the class weights can be reversed accordingly.
2339         bool reversed_classes = (responses.at<float>(0) < 0.f);
2340         if( reversed_classes == false )
2341         {
2342             class_wts.at<float>(0) = static_cast<float>(pos_ex)/static_cast<float>(pos_ex+neg_ex); // weighting for costs of positive class + 1 (i.e. cost of false positive - larger gives greater cost)
2343             class_wts.at<float>(1) = static_cast<float>(neg_ex)/static_cast<float>(pos_ex+neg_ex); // weighting for costs of negative class - 1 (i.e. cost of false negative)
2344         }
2345         else
2346         {
2347             class_wts.at<float>(0) = static_cast<float>(neg_ex)/static_cast<float>(pos_ex+neg_ex);
2348             class_wts.at<float>(1) = static_cast<float>(pos_ex)/static_cast<float>(pos_ex+neg_ex);
2349         }
2350         class_wts_cv = class_wts;
2351         svmParams.class_weights = &class_wts_cv;
2352     }
2353 }
2354
2355 static void setSVMTrainAutoParams( CvParamGrid& c_grid, CvParamGrid& gamma_grid,
2356                             CvParamGrid& p_grid, CvParamGrid& nu_grid,
2357                             CvParamGrid& coef_grid, CvParamGrid& degree_grid )
2358 {
2359     c_grid = CvSVM::get_default_grid(CvSVM::C);
2360
2361     gamma_grid = CvSVM::get_default_grid(CvSVM::GAMMA);
2362
2363     p_grid = CvSVM::get_default_grid(CvSVM::P);
2364     p_grid.step = 0;
2365
2366     nu_grid = CvSVM::get_default_grid(CvSVM::NU);
2367     nu_grid.step = 0;
2368
2369     coef_grid = CvSVM::get_default_grid(CvSVM::COEF);
2370     coef_grid.step = 0;
2371
2372     degree_grid = CvSVM::get_default_grid(CvSVM::DEGREE);
2373     degree_grid.step = 0;
2374 }
2375
2376 static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData,
2377                          Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
2378                          const string& resPath )
2379 {
2380     /* first check if a previously trained svm for the current class has been saved to file */
2381     string svmFilename = resPath + svmsDir + "/" + objClassName + ".xml.gz";
2382
2383     FileStorage fs( svmFilename, FileStorage::READ);
2384     if( fs.isOpened() )
2385     {
2386         cout << "*** LOADING SVM CLASSIFIER FOR CLASS " << objClassName << " ***" << endl;
2387         svm.load( svmFilename.c_str() );
2388     }
2389     else
2390     {
2391         cout << "*** TRAINING CLASSIFIER FOR CLASS " << objClassName << " ***" << endl;
2392         cout << "CALCULATING BOW VECTORS FOR TRAINING SET OF " << objClassName << "..." << endl;
2393
2394         // Get classification ground truth for images in the training set
2395         vector<ObdImage> images;
2396         vector<Mat> bowImageDescriptors;
2397         vector<char> objectPresent;
2398         vocData.getClassImages( objClassName, CV_OBD_TRAIN, images, objectPresent );
2399
2400         // Compute the bag of words vector for each image in the training set.
2401         calculateImageDescriptors( images, bowImageDescriptors, bowExtractor, fdetector, resPath );
2402
2403         // Remove any images for which descriptors could not be calculated
2404         removeEmptyBowImageDescriptors( images, bowImageDescriptors, objectPresent );
2405
2406         CV_Assert( svmParamsExt.descPercent > 0.f && svmParamsExt.descPercent <= 1.f );
2407         if( svmParamsExt.descPercent < 1.f )
2408         {
2409             int descsToDelete = static_cast<int>(static_cast<float>(images.size())*(1.0-svmParamsExt.descPercent));
2410
2411             cout << "Using " << (images.size() - descsToDelete) << " of " << images.size() <<
2412                     " descriptors for training (" << svmParamsExt.descPercent*100.0 << " %)" << endl;
2413             removeBowImageDescriptorsByCount( images, bowImageDescriptors, objectPresent, svmParamsExt, descsToDelete );
2414         }
2415
2416         // Prepare the input matrices for SVM training.
2417         Mat trainData( (int)images.size(), bowExtractor->getVocabulary().rows, CV_32FC1 );
2418         Mat responses( (int)images.size(), 1, CV_32SC1 );
2419
2420         // Transfer bag of words vectors and responses across to the training data matrices
2421         for( size_t imageIdx = 0; imageIdx < images.size(); imageIdx++ )
2422         {
2423             // Transfer image descriptor (bag of words vector) to training data matrix
2424             Mat submat = trainData.row((int)imageIdx);
2425             if( bowImageDescriptors[imageIdx].cols != bowExtractor->descriptorSize() )
2426             {
2427                 cout << "Error: computed bow image descriptor size " << bowImageDescriptors[imageIdx].cols
2428                      << " differs from vocabulary size" << bowExtractor->getVocabulary().cols << endl;
2429                 exit(-1);
2430             }
2431             bowImageDescriptors[imageIdx].copyTo( submat );
2432
2433             // Set response value
2434             responses.at<int>((int)imageIdx) = objectPresent[imageIdx] ? 1 : -1;
2435         }
2436
2437         cout << "TRAINING SVM FOR CLASS ..." << objClassName << "..." << endl;
2438         CvSVMParams svmParams;
2439         CvMat class_wts_cv;
2440         setSVMParams( svmParams, class_wts_cv, responses, svmParamsExt.balanceClasses );
2441         CvParamGrid c_grid, gamma_grid, p_grid, nu_grid, coef_grid, degree_grid;
2442         setSVMTrainAutoParams( c_grid, gamma_grid,  p_grid, nu_grid, coef_grid, degree_grid );
2443         svm.train_auto( trainData, responses, Mat(), Mat(), svmParams, 10, c_grid, gamma_grid, p_grid, nu_grid, coef_grid, degree_grid );
2444         cout << "SVM TRAINING FOR CLASS " << objClassName << " COMPLETED" << endl;
2445
2446         svm.save( svmFilename.c_str() );
2447         cout << "SAVED CLASSIFIER TO FILE" << endl;
2448     }
2449 }
2450
2451 static void computeConfidences( CvSVM& svm, const string& objClassName, VocData& vocData,
2452                          Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
2453                          const string& resPath )
2454 {
2455     cout << "*** CALCULATING CONFIDENCES FOR CLASS " << objClassName << " ***" << endl;
2456     cout << "CALCULATING BOW VECTORS FOR TEST SET OF " << objClassName << "..." << endl;
2457     // Get classification ground truth for images in the test set
2458     vector<ObdImage> images;
2459     vector<Mat> bowImageDescriptors;
2460     vector<char> objectPresent;
2461     vocData.getClassImages( objClassName, CV_OBD_TEST, images, objectPresent );
2462
2463     // Compute the bag of words vector for each image in the test set
2464     calculateImageDescriptors( images, bowImageDescriptors, bowExtractor, fdetector, resPath );
2465     // Remove any images for which descriptors could not be calculated
2466     removeEmptyBowImageDescriptors( images, bowImageDescriptors, objectPresent);
2467
2468     // Use the bag of words vectors to calculate classifier output for each image in test set
2469     cout << "CALCULATING CONFIDENCE SCORES FOR CLASS " << objClassName << "..." << endl;
2470     vector<float> confidences( images.size() );
2471     float signMul = 1.f;
2472     for( size_t imageIdx = 0; imageIdx < images.size(); imageIdx++ )
2473     {
2474         if( imageIdx == 0 )
2475         {
2476             // In the first iteration, determine the sign of the positive class
2477             float classVal = confidences[imageIdx] = svm.predict( bowImageDescriptors[imageIdx], false );
2478             float scoreVal = confidences[imageIdx] = svm.predict( bowImageDescriptors[imageIdx], true );
2479             signMul = (classVal < 0) == (scoreVal < 0) ? 1.f : -1.f;
2480         }
2481         // svm output of decision function
2482         confidences[imageIdx] = signMul * svm.predict( bowImageDescriptors[imageIdx], true );
2483     }
2484
2485     cout << "WRITING QUERY RESULTS TO VOC RESULTS FILE FOR CLASS " << objClassName << "..." << endl;
2486     vocData.writeClassifierResultsFile( resPath + plotsDir, objClassName, CV_OBD_TEST, images, confidences, 1, true );
2487
2488     cout << "DONE - " << objClassName << endl;
2489     cout << "---------------------------------------------------------------" << endl;
2490 }
2491
2492 static void computeGnuPlotOutput( const string& resPath, const string& objClassName, VocData& vocData )
2493 {
2494     vector<float> precision, recall;
2495     float ap;
2496
2497     const string resultFile = vocData.getResultsFilename( objClassName, CV_VOC_TASK_CLASSIFICATION, CV_OBD_TEST);
2498     const string plotFile = resultFile.substr(0, resultFile.size()-4) + ".plt";
2499
2500     cout << "Calculating precision recall curve for class '" <<objClassName << "'" << endl;
2501     vocData.calcClassifierPrecRecall( resPath + plotsDir + "/" + resultFile, precision, recall, ap, true );
2502     cout << "Outputting to GNUPlot file..." << endl;
2503     vocData.savePrecRecallToGnuplot( resPath + plotsDir + "/" + plotFile, precision, recall, ap, objClassName, CV_VOC_PLOT_PNG );
2504 }
2505
2506
2507
2508
2509 int main(int argc, char** argv)
2510 {
2511     if( argc != 3 && argc != 6 )
2512     {
2513         help(argv);
2514         return -1;
2515     }
2516
2517     cv::initModule_nonfree();
2518
2519     const string vocPath = argv[1], resPath = argv[2];
2520
2521     // Read or set default parameters
2522     string vocName;
2523     DDMParams ddmParams;
2524     VocabTrainParams vocabTrainParams;
2525     SVMTrainParamsExt svmTrainParamsExt;
2526
2527     makeUsedDirs( resPath );
2528
2529     FileStorage paramsFS( resPath + "/" + paramsFile, FileStorage::READ );
2530     if( paramsFS.isOpened() )
2531     {
2532        readUsedParams( paramsFS.root(), vocName, ddmParams, vocabTrainParams, svmTrainParamsExt );
2533        CV_Assert( vocName == getVocName(vocPath) );
2534     }
2535     else
2536     {
2537         vocName = getVocName(vocPath);
2538         if( argc!= 6 )
2539         {
2540             cout << "Feature detector, descriptor extractor, descriptor matcher must be set" << endl;
2541             return -1;
2542         }
2543         ddmParams = DDMParams( argv[3], argv[4], argv[5] ); // from command line
2544         // vocabTrainParams and svmTrainParamsExt is set by defaults
2545         paramsFS.open( resPath + "/" + paramsFile, FileStorage::WRITE );
2546         if( paramsFS.isOpened() )
2547         {
2548             writeUsedParams( paramsFS, vocName, ddmParams, vocabTrainParams, svmTrainParamsExt );
2549             paramsFS.release();
2550         }
2551         else
2552         {
2553             cout << "File " << (resPath + "/" + paramsFile) << "can not be opened to write" << endl;
2554             return -1;
2555         }
2556     }
2557
2558     // Create detector, descriptor, matcher.
2559     Ptr<FeatureDetector> featureDetector = FeatureDetector::create( ddmParams.detectorType );
2560     Ptr<DescriptorExtractor> descExtractor = DescriptorExtractor::create( ddmParams.descriptorType );
2561     Ptr<BOWImgDescriptorExtractor> bowExtractor;
2562     if( !featureDetector || !descExtractor )
2563     {
2564         cout << "featureDetector or descExtractor was not created" << endl;
2565         return -1;
2566     }
2567     {
2568         Ptr<DescriptorMatcher> descMatcher = DescriptorMatcher::create( ddmParams.matcherType );
2569         if( !featureDetector || !descExtractor || !descMatcher )
2570         {
2571             cout << "descMatcher was not created" << endl;
2572             return -1;
2573         }
2574         bowExtractor = makePtr<BOWImgDescriptorExtractor>( descExtractor, descMatcher );
2575     }
2576
2577     // Print configuration to screen
2578     printUsedParams( vocPath, resPath, ddmParams, vocabTrainParams, svmTrainParamsExt );
2579     // Create object to work with VOC
2580     VocData vocData( vocPath, false );
2581
2582     // 1. Train visual word vocabulary if a pre-calculated vocabulary file doesn't already exist from previous run
2583     Mat vocabulary = trainVocabulary( resPath + "/" + vocabularyFile, vocData, vocabTrainParams,
2584                                       featureDetector, descExtractor );
2585     bowExtractor->setVocabulary( vocabulary );
2586
2587     // 2. Train a classifier and run a sample query for each object class
2588     const vector<string>& objClasses = vocData.getObjectClasses(); // object class list
2589     for( size_t classIdx = 0; classIdx < objClasses.size(); ++classIdx )
2590     {
2591         // Train a classifier on train dataset
2592         CvSVM svm;
2593         trainSVMClassifier( svm, svmTrainParamsExt, objClasses[classIdx], vocData,
2594                             bowExtractor, featureDetector, resPath );
2595
2596         // Now use the classifier over all images on the test dataset and rank according to score order
2597         // also calculating precision-recall etc.
2598         computeConfidences( svm, objClasses[classIdx], vocData,
2599                             bowExtractor, featureDetector, resPath );
2600         // Calculate precision/recall/ap and use GNUPlot to output to a pdf file
2601         computeGnuPlotOutput( resPath, objClasses[classIdx], vocData );
2602     }
2603     return 0;
2604 }