--- /dev/null
+// Copyright 2013 Yangqing Jia
+// This program converts a set of images to a leveldb by storing them as Datum
+// proto buffers.
+// Usage:
+// convert_dataset ROOTFOLDER LISTFILE DB_NAME [0/1]
+// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
+// should be a list of files as well as their labels, in the format as
+// subfolder1/file1.JPEG 7
+// ....
+// if the last argument is 1, a random shuffle will be carried out before we
+// process the file lines.
+// You are responsible for shuffling the files yourself.
+
+#include <glog/logging.h>
+#include <leveldb/db.h>
+#include <leveldb/write_batch.h>
+
+#include <algorithm>
+#include <string>
+#include <iostream>
+#include <fstream>
+
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/util/io.hpp"
+
+using namespace caffe;
+using std::pair;
+using std::string;
+using std::stringstream;
+
+int main(int argc, char** argv) {
+ ::google::InitGoogleLogging(argv[0]);
+ std::ifstream infile(argv[2]);
+ vector<pair<string, int> > lines;
+ string filename;
+ int label;
+ while (infile >> filename >> label) {
+ lines.push_back(std::make_pair(filename, label));
+ }
+ if (argc == 5 && argv[4][0] == '1') {
+ // randomly shuffle data
+ LOG(INFO) << "Shuffling data";
+ std::random_shuffle(lines.begin(), lines.end());
+ }
+ LOG(INFO) << "A total of " << lines.size() << "images.";
+
+ leveldb::DB* db;
+ leveldb::Options options;
+ options.error_if_exists = true;
+ options.create_if_missing = true;
+ options.write_buffer_size = 268435456;
+ LOG(INFO) << "Opening leveldb " << argv[3];
+ leveldb::Status status = leveldb::DB::Open(
+ options, argv[3], &db);
+ CHECK(status.ok()) << "Failed to open leveldb " << argv[3];
+
+ string root_folder(argv[1]);
+ Datum datum;
+ int count = 0;
+ char key_cstr[100];
+ leveldb::WriteBatch* batch = new leveldb::WriteBatch();
+ for (int line_id = 0; line_id < lines.size(); ++line_id) {
+ ReadImageToDatum(root_folder + lines[line_id].first, lines[line_id].second,
+ &datum);
+ // sequential
+ sprintf(key_cstr, "%08d_%s", line_id, lines[line_id].first.c_str());
+ string value;
+ // get the value
+ datum.SerializeToString(&value);
+ batch->Put(string(key_cstr), value);
+ if (++count % 1000 == 0) {
+ db->Write(leveldb::WriteOptions(), batch);
+ LOG(ERROR) << "Processed " << count << " files.";
+ delete batch;
+ batch = new leveldb::WriteBatch();
+ }
+ }
+
+ delete db;
+ return 0;
+}
#include <google/protobuf/text_format.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/io/coded_stream.h>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
#include <algorithm>
#include <string>
CHECK(proto.SerializeToOstream(&output));
}
+
+void ReadImageToDatum(const string& filename, const int label, Datum* datum) {
+ Mat cv_img;
+ cv_img = cv::imread(filename, CV_LOAD_IMAGE_COLOR);
+ CHECK(cv_img.data) << "Could not open or find the image.";
+ datum->set_channels(3);
+ datum->set_height(cv_img.rows);
+ datum->set_width(cv_img.cols);
+ datum->set_label(label);
+ datum->clear_data();
+ datum->clear_float_data();
+ string* datum_string = datum->mutable_data();
+ for (int c = 0; c < 3; ++c) {
+ for (int h = 0; h < cv_img.rows; ++h) {
+ for (int w = 0; w < cv_img.cols; ++w) {
+ datum_string->push_back(static_cast<char>(cv_img.at<Vec3b>(h, w)[c]));
+ }
+ }
+ }
+}
+
} // namespace caffe