1 #ifdef WITH_PYTHON_LAYER
2 #include "boost/python.hpp"
3 namespace bp = boost::python;
6 #include <gflags/gflags.h>
7 #include <glog/logging.h>
14 #include "boost/algorithm/string.hpp"
15 #include "caffe/caffe.hpp"
16 #include "caffe/util/signal_handler.h"
23 using caffe::shared_ptr;
27 using std::ostringstream;
29 DEFINE_string(gpu, "",
30 "Optional; run in GPU mode on given device IDs separated by ','."
31 "Use '-gpu all' to run on all available GPUs. The effective training "
32 "batch size is multiplied by the number of devices.");
33 DEFINE_string(solver, "",
34 "The solver definition protocol buffer text file.");
35 DEFINE_string(model, "",
36 "The model definition protocol buffer text file.");
37 DEFINE_string(phase, "",
38 "Optional; network phase (TRAIN or TEST). Only used for 'time'.");
39 DEFINE_int32(level, 0,
40 "Optional; network level.");
41 DEFINE_string(stage, "",
42 "Optional; network stages (not to be confused with phase), "
44 DEFINE_string(snapshot, "",
45 "Optional; the snapshot solver state to resume training.");
46 DEFINE_string(weights, "",
47 "Optional; the pretrained weights to initialize finetuning, "
48 "separated by ','. Cannot be set simultaneously with snapshot.");
49 DEFINE_int32(iterations, 50,
50 "The number of iterations to run.");
51 DEFINE_string(sigint_effect, "stop",
52 "Optional; action to take when a SIGINT signal is received: "
53 "snapshot, stop or none.");
54 DEFINE_string(sighup_effect, "snapshot",
55 "Optional; action to take when a SIGHUP signal is received: "
56 "snapshot, stop or none.");
58 // A simple registry for caffe commands.
59 typedef int (*BrewFunction)();
60 typedef std::map<caffe::string, BrewFunction> BrewMap;
63 #define RegisterBrewFunction(func) \
65 class __Registerer_##func { \
66 public: /* NOLINT */ \
67 __Registerer_##func() { \
68 g_brew_map[#func] = &func; \
71 __Registerer_##func g_registerer_##func; \
74 static BrewFunction GetBrewFunction(const caffe::string& name) {
75 if (g_brew_map.count(name)) {
76 return g_brew_map[name];
78 LOG(ERROR) << "Available caffe actions:";
79 for (BrewMap::iterator it = g_brew_map.begin();
80 it != g_brew_map.end(); ++it) {
81 LOG(ERROR) << "\t" << it->first;
83 LOG(FATAL) << "Unknown action: " << name;
84 return NULL; // not reachable, just to suppress old compiler warnings.
88 // Parse GPU ids or use all available devices
89 static void get_gpus(vector<int>* gpus) {
90 if (FLAGS_gpu == "all") {
93 CUDA_CHECK(cudaGetDeviceCount(&count));
97 for (int i = 0; i < count; ++i) {
100 } else if (FLAGS_gpu.size()) {
101 vector<string> strings;
102 boost::split(strings, FLAGS_gpu, boost::is_any_of(","));
103 for (int i = 0; i < strings.size(); ++i) {
104 gpus->push_back(boost::lexical_cast<int>(strings[i]));
107 CHECK_EQ(gpus->size(), 0);
111 // Parse phase from flags
112 caffe::Phase get_phase_from_flags(caffe::Phase default_value) {
113 if (FLAGS_phase == "")
114 return default_value;
115 if (FLAGS_phase == "TRAIN")
117 if (FLAGS_phase == "TEST")
119 LOG(FATAL) << "phase must be \"TRAIN\" or \"TEST\"";
120 return caffe::TRAIN; // Avoid warning
123 // Parse stages from flags
124 vector<string> get_stages_from_flags() {
125 vector<string> stages;
126 boost::split(stages, FLAGS_stage, boost::is_any_of(","));
130 // caffe commands to call by
131 // caffe <command> <args>
133 // To add a command, define a function "int command()" and register it with
134 // RegisterBrewFunction(action);
136 // Device Query: show diagnostic information for a GPU device.
138 LOG(INFO) << "Querying GPUs " << FLAGS_gpu;
141 for (int i = 0; i < gpus.size(); ++i) {
142 caffe::Caffe::SetDevice(gpus[i]);
143 caffe::Caffe::DeviceQuery();
147 RegisterBrewFunction(device_query);
149 // Load the weights from the specified caffemodel(s) into the train and
151 void CopyLayers(caffe::Solver<float>* solver, const std::string& model_list) {
152 std::vector<std::string> model_names;
153 boost::split(model_names, model_list, boost::is_any_of(",") );
154 for (int i = 0; i < model_names.size(); ++i) {
155 LOG(INFO) << "Finetuning from " << model_names[i];
156 solver->net()->CopyTrainedLayersFrom(model_names[i]);
157 for (int j = 0; j < solver->test_nets().size(); ++j) {
158 solver->test_nets()[j]->CopyTrainedLayersFrom(model_names[i]);
163 // Translate the signal effect the user specified on the command-line to the
164 // corresponding enumeration.
165 caffe::SolverAction::Enum GetRequestedAction(
166 const std::string& flag_value) {
167 if (flag_value == "stop") {
168 return caffe::SolverAction::STOP;
170 if (flag_value == "snapshot") {
171 return caffe::SolverAction::SNAPSHOT;
173 if (flag_value == "none") {
174 return caffe::SolverAction::NONE;
176 LOG(FATAL) << "Invalid signal effect \""<< flag_value << "\" was specified";
179 // Train / Finetune a model.
181 CHECK_GT(FLAGS_solver.size(), 0) << "Need a solver definition to train.";
182 CHECK(!FLAGS_snapshot.size() || !FLAGS_weights.size())
183 << "Give a snapshot to resume training or weights to finetune "
185 vector<string> stages = get_stages_from_flags();
187 caffe::SolverParameter solver_param;
188 caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);
190 solver_param.mutable_train_state()->set_level(FLAGS_level);
191 for (int i = 0; i < stages.size(); i++) {
192 solver_param.mutable_train_state()->add_stage(stages[i]);
195 // If the gpus flag is not provided, allow the mode and device to be set
196 // in the solver prototxt.
197 if (FLAGS_gpu.size() == 0
198 && solver_param.has_solver_mode()
199 && solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) {
200 if (solver_param.has_device_id()) {
202 boost::lexical_cast<string>(solver_param.device_id());
203 } else { // Set default GPU if unspecified
204 FLAGS_gpu = "" + boost::lexical_cast<string>(0);
210 if (gpus.size() == 0) {
211 LOG(INFO) << "Use CPU.";
212 Caffe::set_mode(Caffe::CPU);
215 for (int i = 0; i < gpus.size(); ++i) {
216 s << (i ? ", " : "") << gpus[i];
218 LOG(INFO) << "Using GPUs " << s.str();
220 cudaDeviceProp device_prop;
221 for (int i = 0; i < gpus.size(); ++i) {
222 cudaGetDeviceProperties(&device_prop, gpus[i]);
223 LOG(INFO) << "GPU " << gpus[i] << ": " << device_prop.name;
226 solver_param.set_device_id(gpus[0]);
227 Caffe::SetDevice(gpus[0]);
228 Caffe::set_mode(Caffe::GPU);
229 Caffe::set_solver_count(gpus.size());
232 caffe::SignalHandler signal_handler(
233 GetRequestedAction(FLAGS_sigint_effect),
234 GetRequestedAction(FLAGS_sighup_effect));
236 shared_ptr<caffe::Solver<float> >
237 solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));
239 solver->SetActionFunction(signal_handler.GetActionFunction());
241 if (FLAGS_snapshot.size()) {
242 LOG(INFO) << "Resuming from " << FLAGS_snapshot;
243 solver->Restore(FLAGS_snapshot.c_str());
244 } else if (FLAGS_weights.size()) {
245 CopyLayers(solver.get(), FLAGS_weights);
248 LOG(INFO) << "Starting Optimization";
249 if (gpus.size() > 1) {
251 caffe::NCCL<float> nccl(solver);
252 nccl.Run(gpus, FLAGS_snapshot.size() > 0 ? FLAGS_snapshot.c_str() : NULL);
254 LOG(FATAL) << "Multi-GPU execution not available - rebuild with USE_NCCL";
259 LOG(INFO) << "Optimization Done.";
262 RegisterBrewFunction(train);
265 // Test: score a model.
267 CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to score.";
268 CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score.";
269 vector<string> stages = get_stages_from_flags();
271 // Set device id and mode
274 if (gpus.size() != 0) {
275 LOG(INFO) << "Use GPU with device ID " << gpus[0];
277 cudaDeviceProp device_prop;
278 cudaGetDeviceProperties(&device_prop, gpus[0]);
279 LOG(INFO) << "GPU device name: " << device_prop.name;
281 Caffe::SetDevice(gpus[0]);
282 Caffe::set_mode(Caffe::GPU);
284 LOG(INFO) << "Use CPU.";
285 Caffe::set_mode(Caffe::CPU);
287 // Instantiate the caffe net.
288 Net<float> caffe_net(FLAGS_model, caffe::TEST, FLAGS_level, &stages);
289 caffe_net.CopyTrainedLayersFrom(FLAGS_weights);
290 LOG(INFO) << "Running for " << FLAGS_iterations << " iterations.";
292 vector<int> test_score_output_id;
293 vector<float> test_score;
295 for (int i = 0; i < FLAGS_iterations; ++i) {
297 const vector<Blob<float>*>& result =
298 caffe_net.Forward(&iter_loss);
301 for (int j = 0; j < result.size(); ++j) {
302 const float* result_vec = result[j]->cpu_data();
303 for (int k = 0; k < result[j]->count(); ++k, ++idx) {
304 const float score = result_vec[k];
306 test_score.push_back(score);
307 test_score_output_id.push_back(j);
309 test_score[idx] += score;
311 const std::string& output_name = caffe_net.blob_names()[
312 caffe_net.output_blob_indices()[j]];
313 LOG(INFO) << "Batch " << i << ", " << output_name << " = " << score;
317 loss /= FLAGS_iterations;
318 LOG(INFO) << "Loss: " << loss;
319 for (int i = 0; i < test_score.size(); ++i) {
320 const std::string& output_name = caffe_net.blob_names()[
321 caffe_net.output_blob_indices()[test_score_output_id[i]]];
322 const float loss_weight = caffe_net.blob_loss_weights()[
323 caffe_net.output_blob_indices()[test_score_output_id[i]]];
324 std::ostringstream loss_msg_stream;
325 const float mean_score = test_score[i] / FLAGS_iterations;
327 loss_msg_stream << " (* " << loss_weight
328 << " = " << loss_weight * mean_score << " loss)";
330 LOG(INFO) << output_name << " = " << mean_score << loss_msg_stream.str();
335 RegisterBrewFunction(test);
338 // Time: benchmark the execution time of a model.
340 CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time.";
341 caffe::Phase phase = get_phase_from_flags(caffe::TRAIN);
342 vector<string> stages = get_stages_from_flags();
344 // Set device id and mode
347 if (gpus.size() != 0) {
348 LOG(INFO) << "Use GPU with device ID " << gpus[0];
349 Caffe::SetDevice(gpus[0]);
350 Caffe::set_mode(Caffe::GPU);
352 LOG(INFO) << "Use CPU.";
353 Caffe::set_mode(Caffe::CPU);
355 // Instantiate the caffe net.
356 Net<float> caffe_net(FLAGS_model, phase, FLAGS_level, &stages);
358 // Do a clean forward and backward pass, so that memory allocation are done
359 // and future iterations will be more stable.
360 LOG(INFO) << "Performing Forward";
361 // Note that for the speed benchmark, we will assume that the network does
362 // not take any input blobs.
364 caffe_net.Forward(&initial_loss);
365 LOG(INFO) << "Initial loss: " << initial_loss;
366 LOG(INFO) << "Performing Backward";
367 caffe_net.Backward();
369 const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers();
370 const vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
371 const vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
372 const vector<vector<bool> >& bottom_need_backward =
373 caffe_net.bottom_need_backward();
374 LOG(INFO) << "*** Benchmark begins ***";
375 LOG(INFO) << "Testing for " << FLAGS_iterations << " iterations.";
379 Timer backward_timer;
381 std::vector<double> forward_time_per_layer(layers.size(), 0.0);
382 std::vector<double> backward_time_per_layer(layers.size(), 0.0);
383 double forward_time = 0.0;
384 double backward_time = 0.0;
385 for (int j = 0; j < FLAGS_iterations; ++j) {
388 forward_timer.Start();
389 for (int i = 0; i < layers.size(); ++i) {
391 layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
392 forward_time_per_layer[i] += timer.MicroSeconds();
394 forward_time += forward_timer.MicroSeconds();
395 backward_timer.Start();
396 for (int i = layers.size() - 1; i >= 0; --i) {
398 layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
400 backward_time_per_layer[i] += timer.MicroSeconds();
402 backward_time += backward_timer.MicroSeconds();
403 LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
404 << iter_timer.MilliSeconds() << " ms.";
406 LOG(INFO) << "Average time per layer: ";
407 for (int i = 0; i < layers.size(); ++i) {
408 const caffe::string& layername = layers[i]->layer_param().name();
409 LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
410 "\tforward: " << forward_time_per_layer[i] / 1000 /
411 FLAGS_iterations << " ms.";
412 LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
413 "\tbackward: " << backward_time_per_layer[i] / 1000 /
414 FLAGS_iterations << " ms.";
417 LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
418 FLAGS_iterations << " ms.";
419 LOG(INFO) << "Average Backward pass: " << backward_time / 1000 /
420 FLAGS_iterations << " ms.";
421 LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() /
422 FLAGS_iterations << " ms.";
423 LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
424 LOG(INFO) << "*** Benchmark ends ***";
427 RegisterBrewFunction(time);
429 int main(int argc, char** argv) {
430 // Print output to stderr (while still logging).
431 FLAGS_alsologtostderr = 1;
433 gflags::SetVersionString(AS_STRING(CAFFE_VERSION));
435 gflags::SetUsageMessage("command line brew\n"
436 "usage: caffe <command> <args>\n\n"
438 " train train or finetune a model\n"
439 " test score a model\n"
440 " device_query show GPU diagnostic information\n"
441 " time benchmark model execution time");
442 // Run tool or show usage.
443 caffe::GlobalInit(&argc, &argv);
445 #ifdef WITH_PYTHON_LAYER
448 return GetBrewFunction(caffe::string(argv[1]))();
449 #ifdef WITH_PYTHON_LAYER
450 } catch (bp::error_already_set) {
456 gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/caffe");