From 44ebe29015a7be0adccb3a99eb79368477609f9c Mon Sep 17 00:00:00 2001 From: Kai Li Date: Wed, 19 Mar 2014 23:08:51 +0800 Subject: [PATCH] Removing feature binarization and image retrieval examples --- examples/feature_extraction/binarize_features.cpp | 161 -------------------- examples/feature_extraction/retrieve_images.cpp | 176 ---------------------- 2 files changed, 337 deletions(-) delete mode 100644 examples/feature_extraction/binarize_features.cpp delete mode 100644 examples/feature_extraction/retrieve_images.cpp diff --git a/examples/feature_extraction/binarize_features.cpp b/examples/feature_extraction/binarize_features.cpp deleted file mode 100644 index e15e125..0000000 --- a/examples/feature_extraction/binarize_features.cpp +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2014 kloudkl@github - -#include -#include -#include // for std::signbit -#include -#include - -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/vision_layers.hpp" -#include "caffe/net.hpp" -#include "caffe/proto/caffe.pb.h" -#include "caffe/util/io.hpp" - -using namespace caffe; // NOLINT(build/namespaces) - -template -void binarize(const vector > >& feature_blob_vector, - shared_ptr > binary_codes); - -template -int features_binarization_pipeline(int argc, char** argv); - -int main(int argc, char** argv) { - return features_binarization_pipeline(argc, argv); -// return features_binarization_pipeline(argc, argv); -} - -template -int features_binarization_pipeline(int argc, char** argv) { - const int num_required_args = 5; - if (argc < num_required_args) { - LOG(ERROR)<< - "This program compresses real valued features into compact binary codes.\n" - "Usage: demo_binarize_features real_valued_feature_prototxt" - " feature_blob_name save_binarized_feature_binaryproto_file" - " num_mini_batches [CPU/GPU] [DEVICE_ID=0]"; - return 1; - } - int arg_pos = num_required_args; - - arg_pos = num_required_args; - if (argc > arg_pos && strcmp(argv[arg_pos], "GPU") == 0) { - LOG(ERROR)<< "Using GPU"; - uint device_id = 0; - if (argc > arg_pos + 1) { - device_id = atoi(argv[arg_pos + 1]); - } - LOG(ERROR) << "Using Device_id=" << device_id; - Caffe::SetDevice(device_id); - Caffe::set_mode(Caffe::GPU); - } else { - LOG(ERROR) << "Using CPU"; - Caffe::set_mode(Caffe::CPU); - } - Caffe::set_phase(Caffe::TEST); - - NetParameter pretrained_net_param; - - arg_pos = 0; // the name of the executable - - // Expected prototxt contains at least one data layer as the real valued - // features. - /* - layers { - layer { - name: "real_valued_features" - type: "data" - source: "/path/to/your/real/valued/features_leveldb" - batchsize: 256 - } - top: "real_valued_features" - top: "label" - } - */ - string real_valued_feature_prototxt(argv[++arg_pos]); - NetParameter real_valued_feature_net_param; - ReadProtoFromTextFile(real_valued_feature_prototxt, - &real_valued_feature_net_param); - shared_ptr > real_valued_feature_net( - new Net(real_valued_feature_net_param)); - - string feature_blob_name(argv[++arg_pos]); - CHECK(real_valued_feature_net->has_blob(feature_blob_name)) - << "Unknown feature blob name " << feature_blob_name - << " in the network " << real_valued_feature_prototxt; - - string save_binarized_feature_binaryproto_file(argv[++arg_pos]); - - int num_mini_batches = atoi(argv[++arg_pos]); - - LOG(ERROR)<< "Binarizing features"; - vector*> input_vec; - vector > > feature_blob_vector; - for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { - real_valued_feature_net->Forward(input_vec); - const shared_ptr > feature_blob = real_valued_feature_net - ->blob_by_name(feature_blob_name); - feature_blob_vector.push_back(feature_blob); - } - shared_ptr > feature_binary_codes(new Blob()); - binarize(feature_blob_vector, feature_binary_codes); - - BlobProto blob_proto; - feature_binary_codes->ToProto(&blob_proto); - WriteProtoToBinaryFile(blob_proto, save_binarized_feature_binaryproto_file); - LOG(ERROR) << "Successfully binarized " << feature_binary_codes->num() - << " features!"; - return 0; -} - -// http://scikit-learn.org/stable/modules/preprocessing.html -// #feature-binarization -template -void binarize(const vector > >& feature_blob_vector, - shared_ptr > binary_codes) { - CHECK_GT(feature_blob_vector.size(), 0); - Dtype sum; - size_t count = 0; - size_t num_features = 0; - for (int i = 0; i < feature_blob_vector.size(); ++i) { - num_features += feature_blob_vector[i]->num(); - const Dtype* data = feature_blob_vector[i]->cpu_data(); - for (int j = 0; j < feature_blob_vector[i]->count(); ++j) { - sum += data[j]; - ++count; - } - } - Dtype mean = sum / count; - int dim = feature_blob_vector[0]->count() / feature_blob_vector[0]->num(); - int size_of_code = sizeof(Dtype) * 8; - binary_codes->Reshape(num_features, (dim + size_of_code - 1) / size_of_code, - 1, 1); - uint64_t code; - count = 0; - for (int i = 0; i < feature_blob_vector.size(); ++i) { - for (int j = 0; j < feature_blob_vector[i]->num(); ++j) { - const Dtype* data = feature_blob_vector[i]->cpu_data() - + feature_blob_vector[i]->offset(j); - Dtype* binary_data = binary_codes->mutable_cpu_data() - + binary_codes->offset(count++); - code = 0; - int k; - for (k = 0; k < dim;) { - code |= std::signbit(mean - data[k]); - ++k; - if (k % size_of_code == 0) { - binary_data[(k + size_of_code - 1) / size_of_code] = code; - code = 0; - } else { - code <<= 1; - } - } // for k - if (k % size_of_code != 0) { - code <<= (size_of_code - 1 - k % size_of_code); - binary_data[(k + size_of_code - 1) / size_of_code] = code; - } - } // for j - } // for i -} diff --git a/examples/feature_extraction/retrieve_images.cpp b/examples/feature_extraction/retrieve_images.cpp deleted file mode 100644 index dddff69..0000000 --- a/examples/feature_extraction/retrieve_images.cpp +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2014 kloudkl@github - -#include -#include -#include -#include // for std::priority_queue -#include -#include // for pair -#include - -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/vision_layers.hpp" -#include "caffe/net.hpp" -#include "caffe/proto/caffe.pb.h" -#include "caffe/util/io.hpp" -#include "caffe/util/math_functions.hpp" - -using namespace caffe; // NOLINT(build/namespaces) - -template -void similarity_search( - const shared_ptr > sample_binary_feature_blobs, - const shared_ptr > query_binary_feature, - const int top_k_results, - vector > >* retrieval_results); - -template -int image_retrieval_pipeline(int argc, char** argv); - -int main(int argc, char** argv) { - return image_retrieval_pipeline(argc, argv); -// return image_retrieval_pipeline(argc, argv); -} - -template -int image_retrieval_pipeline(int argc, char** argv) { - const int num_required_args = 4; - if (argc < num_required_args) { - LOG(ERROR)<< - "This program takes in binarized features of query images and sample images" - " extracted by Caffe to retrieve similar images.\n" - "Usage: demo_retrieve_images sample_binary_features_binaryproto_file" - " query_binary_features_binaryproto_file save_retrieval_result_filename" - " [top_k_results=1] [CPU/GPU] [DEVICE_ID=0]"; - return 1; - } - int arg_pos = num_required_args; - - int top_k_results; - if (argc <= num_required_args) { - top_k_results = 1; - } else { - top_k_results = atoi(argv[arg_pos]); - CHECK_GE(top_k_results, 0); - } - - arg_pos = num_required_args + 1; - if (argc > arg_pos && strcmp(argv[arg_pos], "GPU") == 0) { - LOG(ERROR)<< "Using GPU"; - uint device_id = 0; - if (argc > arg_pos + 1) { - device_id = atoi(argv[arg_pos + 1]); - } - LOG(ERROR) << "Using Device_id=" << device_id; - Caffe::SetDevice(device_id); - Caffe::set_mode(Caffe::GPU); - } else { - LOG(ERROR) << "Using CPU"; - Caffe::set_mode(Caffe::CPU); - } - Caffe::set_phase(Caffe::TEST); - - arg_pos = 0; // the name of the executable - - LOG(ERROR)<< "Loading sample binary features"; - string sample_binary_features_binaryproto_file(argv[++arg_pos]); - BlobProto sample_binary_features; - ReadProtoFromBinaryFile(sample_binary_features_binaryproto_file, - &sample_binary_features); - shared_ptr > sample_binary_feature_blob(new Blob()); - sample_binary_feature_blob->FromProto(sample_binary_features); - int num_samples = sample_binary_feature_blob->num(); - if (top_k_results > num_samples) { - top_k_results = num_samples; - } - - LOG(ERROR)<< "Loading query binary features"; - string query_images_feature_blob_binaryproto(argv[++arg_pos]); - BlobProto query_images_features; - ReadProtoFromBinaryFile(query_images_feature_blob_binaryproto, - &query_images_features); - shared_ptr > query_binary_feature_blob(new Blob()); - query_binary_feature_blob->FromProto(query_images_features); - - string save_retrieval_result_filename(argv[++arg_pos]); - LOG(ERROR)<< "Opening result file " << save_retrieval_result_filename; - FILE * result_fileid = fopen(save_retrieval_result_filename.c_str(), - "w"); - - LOG(ERROR)<< "Retrieving images"; - vector > > retrieval_results; - int query_image_index = 0; - - similarity_search(sample_binary_feature_blob, - query_binary_feature_blob, top_k_results, - &retrieval_results); - int num_results = retrieval_results.size(); - for (int i = 0; i < num_results; ++i) { - fprintf(result_fileid, "%d", query_image_index++); - for (int j = 0; j < retrieval_results[i].size(); ++j) { - fprintf(result_fileid, " %d:%d", retrieval_results[i][j].first, - retrieval_results[i][j].second); - } - fprintf(result_fileid, "\n"); - } - if (result_fileid != NULL) { - fclose(result_fileid); - } - LOG(ERROR) << "Successfully retrieved similar images for " << num_results - << " queries!"; - return 0; -} - -class MinHeapComparison { - public: - bool operator()(const std::pair& lhs, - const std::pair&rhs) const { - return (lhs.first > rhs.first); - } -}; - -template -void similarity_search( - const shared_ptr > sample_images_feature_blob, - const shared_ptr > query_binary_feature_blob, - const int top_k_results, - vector > >* retrieval_results) { - int num_samples = sample_images_feature_blob->num(); - int num_queries = query_binary_feature_blob->num(); - int dim = query_binary_feature_blob->count() / num_queries; - LOG(ERROR)<< "num_samples " << num_samples << ", num_queries " << - num_queries << ", dim " << dim; - int hamming_dist; - int neighbor_index; - retrieval_results->resize(num_queries); - std::priority_queue, std::vector >, - MinHeapComparison> results; - for (int i = 0; i < num_queries; ++i) { - while (!results.empty()) { - results.pop(); - } - const Dtype* query_data = query_binary_feature_blob->cpu_data() - + query_binary_feature_blob->offset(i); - for (int k = 0; k < num_samples; ++k) { - const Dtype* sample_data = sample_images_feature_blob->cpu_data() - + sample_images_feature_blob->offset(k); - hamming_dist = caffe_hamming_distance(dim, query_data, sample_data); - if (results.size() < top_k_results) { - results.push(std::make_pair(-hamming_dist, k)); - } else if (-hamming_dist > results.top().first) { - // smaller hamming dist, nearer neighbor - results.pop(); - results.push(std::make_pair(-hamming_dist, k)); - } - } // for (int k = 0; k < num_samples; ++k) { - retrieval_results->at(i).resize(results.size()); - for (int k = results.size() - 1; k >= 0; --k) { - hamming_dist = -results.top().first; - neighbor_index = results.top().second; - retrieval_results->at(i)[k] = std::make_pair(neighbor_index, - hamming_dist); - results.pop(); - } - } // for (int i = 0; i < num_queries; ++i) { -} -- 2.7.4