tools/caffe.cpp

   1 #ifdef WITH_PYTHON_LAYER
   2 #include "boost/python.hpp"
   3 namespace bp = boost::python;
   4 #endif
   5
   6 #include <gflags/gflags.h>
   7 #include <glog/logging.h>
   8
   9 #include <cstring>
  10 #include <map>
  11 #include <string>
  12 #include <vector>
  13
  14 #include "boost/algorithm/string.hpp"
  15 #include "caffe/caffe.hpp"
  16 #include "caffe/util/signal_handler.h"
  17
  18 using caffe::Blob;
  19 using caffe::Caffe;
  20 using caffe::Net;
  21 using caffe::Layer;
  22 using caffe::Solver;
  23 using caffe::shared_ptr;
  24 using caffe::string;
  25 using caffe::Timer;
  26 using caffe::vector;
  27 using std::ostringstream;
  28
  29 DEFINE_string(gpu, "",
  30     "Optional; run in GPU mode on given device IDs separated by ','."
  31     "Use '-gpu all' to run on all available GPUs. The effective training "
  32     "batch size is multiplied by the number of devices.");
  33 DEFINE_string(solver, "",
  34     "The solver definition protocol buffer text file.");
  35 DEFINE_string(model, "",
  36     "The model definition protocol buffer text file.");
  37 DEFINE_string(phase, "",
  38     "Optional; network phase (TRAIN or TEST). Only used for 'time'.");
  39 DEFINE_int32(level, 0,
  40     "Optional; network level.");
  41 DEFINE_string(stage, "",
  42     "Optional; network stages (not to be confused with phase), "
  43     "separated by ','.");
  44 DEFINE_string(snapshot, "",
  45     "Optional; the snapshot solver state to resume training.");
  46 DEFINE_string(weights, "",
  47     "Optional; the pretrained weights to initialize finetuning, "
  48     "separated by ','. Cannot be set simultaneously with snapshot.");
  49 DEFINE_int32(iterations, 50,
  50     "The number of iterations to run.");
  51 DEFINE_string(sigint_effect, "stop",
  52              "Optional; action to take when a SIGINT signal is received: "
  53               "snapshot, stop or none.");
  54 DEFINE_string(sighup_effect, "snapshot",
  55              "Optional; action to take when a SIGHUP signal is received: "
  56              "snapshot, stop or none.");
  57
  58 // A simple registry for caffe commands.
  59 typedef int (*BrewFunction)();
  60 typedef std::map<caffe::string, BrewFunction> BrewMap;
  61 BrewMap g_brew_map;
  62
  63 #define RegisterBrewFunction(func) \
  64 namespace { \
  65 class __Registerer_##func { \
  66  public: /* NOLINT */ \
  67   __Registerer_##func() { \
  68     g_brew_map[#func] = &func; \
  69   } \
  70 }; \
  71 __Registerer_##func g_registerer_##func; \
  72 }
  73
  74 static BrewFunction GetBrewFunction(const caffe::string& name) {
  75   if (g_brew_map.count(name)) {
  76     return g_brew_map[name];
  77   } else {
  78     LOG(ERROR) << "Available caffe actions:";
  79     for (BrewMap::iterator it = g_brew_map.begin();
  80          it != g_brew_map.end(); ++it) {
  81       LOG(ERROR) << "\t" << it->first;
  82     }
  83     LOG(FATAL) << "Unknown action: " << name;
  84     return NULL;  // not reachable, just to suppress old compiler warnings.
  85   }
  86 }
  87
  88 // Parse GPU ids or use all available devices
  89 static void get_gpus(vector<int>* gpus) {
  90   if (FLAGS_gpu == "all") {
  91     int count = 0;
  92 #ifndef CPU_ONLY
  93     CUDA_CHECK(cudaGetDeviceCount(&count));
  94 #else
  95     NO_GPU;
  96 #endif
  97     for (int i = 0; i < count; ++i) {
  98       gpus->push_back(i);
  99     }
 100   } else if (FLAGS_gpu.size()) {
 101     vector<string> strings;
 102     boost::split(strings, FLAGS_gpu, boost::is_any_of(","));
 103     for (int i = 0; i < strings.size(); ++i) {
 104       gpus->push_back(boost::lexical_cast<int>(strings[i]));
 105     }
 106   } else {
 107     CHECK_EQ(gpus->size(), 0);
 108   }
 109 }
 110
 111 // Parse phase from flags
 112 caffe::Phase get_phase_from_flags(caffe::Phase default_value) {
 113   if (FLAGS_phase == "")
 114     return default_value;
 115   if (FLAGS_phase == "TRAIN")
 116     return caffe::TRAIN;
 117   if (FLAGS_phase == "TEST")
 118     return caffe::TEST;
 119   LOG(FATAL) << "phase must be \"TRAIN\" or \"TEST\"";
 120   return caffe::TRAIN;  // Avoid warning
 121 }
 122
 123 // Parse stages from flags
 124 vector<string> get_stages_from_flags() {
 125   vector<string> stages;
 126   boost::split(stages, FLAGS_stage, boost::is_any_of(","));
 127   return stages;
 128 }
 129
 130 // caffe commands to call by
 131 //     caffe <command> <args>
 132 //
 133 // To add a command, define a function "int command()" and register it with
 134 // RegisterBrewFunction(action);
 135
 136 // Device Query: show diagnostic information for a GPU device.
 137 int device_query() {
 138   LOG(INFO) << "Querying GPUs " << FLAGS_gpu;
 139   vector<int> gpus;
 140   get_gpus(&gpus);
 141   for (int i = 0; i < gpus.size(); ++i) {
 142     caffe::Caffe::SetDevice(gpus[i]);
 143     caffe::Caffe::DeviceQuery();
 144   }
 145   return 0;
 146 }
 147 RegisterBrewFunction(device_query);
 148
 149 // Load the weights from the specified caffemodel(s) into the train and
 150 // test nets.
 151 void CopyLayers(caffe::Solver<float>* solver, const std::string& model_list) {
 152   std::vector<std::string> model_names;
 153   boost::split(model_names, model_list, boost::is_any_of(",") );
 154   for (int i = 0; i < model_names.size(); ++i) {
 155     LOG(INFO) << "Finetuning from " << model_names[i];
 156     solver->net()->CopyTrainedLayersFrom(model_names[i]);
 157     for (int j = 0; j < solver->test_nets().size(); ++j) {
 158       solver->test_nets()[j]->CopyTrainedLayersFrom(model_names[i]);
 159     }
 160   }
 161 }
 162
 163 // Translate the signal effect the user specified on the command-line to the
 164 // corresponding enumeration.
 165 caffe::SolverAction::Enum GetRequestedAction(
 166     const std::string& flag_value) {
 167   if (flag_value == "stop") {
 168     return caffe::SolverAction::STOP;
 169   }
 170   if (flag_value == "snapshot") {
 171     return caffe::SolverAction::SNAPSHOT;
 172   }
 173   if (flag_value == "none") {
 174     return caffe::SolverAction::NONE;
 175   }
 176   LOG(FATAL) << "Invalid signal effect \""<< flag_value << "\" was specified";
 177 }
 178
 179 // Train / Finetune a model.
 180 int train() {
 181   CHECK_GT(FLAGS_solver.size(), 0) << "Need a solver definition to train.";
 182   CHECK(!FLAGS_snapshot.size() || !FLAGS_weights.size())
 183       << "Give a snapshot to resume training or weights to finetune "
 184       "but not both.";
 185   vector<string> stages = get_stages_from_flags();
 186
 187   caffe::SolverParameter solver_param;
 188   caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);
 189
 190   solver_param.mutable_train_state()->set_level(FLAGS_level);
 191   for (int i = 0; i < stages.size(); i++) {
 192     solver_param.mutable_train_state()->add_stage(stages[i]);
 193   }
 194
 195   // If the gpus flag is not provided, allow the mode and device to be set
 196   // in the solver prototxt.
 197   if (FLAGS_gpu.size() == 0
 198       && solver_param.has_solver_mode()
 199       && solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) {
 200       if (solver_param.has_device_id()) {
 201           FLAGS_gpu = "" +
 202               boost::lexical_cast<string>(solver_param.device_id());
 203       } else {  // Set default GPU if unspecified
 204           FLAGS_gpu = "" + boost::lexical_cast<string>(0);
 205       }
 206   }
 207
 208   vector<int> gpus;
 209   get_gpus(&gpus);
 210   if (gpus.size() == 0) {
 211     LOG(INFO) << "Use CPU.";
 212     Caffe::set_mode(Caffe::CPU);
 213   } else {
 214     ostringstream s;
 215     for (int i = 0; i < gpus.size(); ++i) {
 216       s << (i ? ", " : "") << gpus[i];
 217     }
 218     LOG(INFO) << "Using GPUs " << s.str();
 219 #ifndef CPU_ONLY
 220     cudaDeviceProp device_prop;
 221     for (int i = 0; i < gpus.size(); ++i) {
 222       cudaGetDeviceProperties(&device_prop, gpus[i]);
 223       LOG(INFO) << "GPU " << gpus[i] << ": " << device_prop.name;
 224     }
 225 #endif
 226     solver_param.set_device_id(gpus[0]);
 227     Caffe::SetDevice(gpus[0]);
 228     Caffe::set_mode(Caffe::GPU);
 229     Caffe::set_solver_count(gpus.size());
 230   }
 231
 232   caffe::SignalHandler signal_handler(
 233         GetRequestedAction(FLAGS_sigint_effect),
 234         GetRequestedAction(FLAGS_sighup_effect));
 235
 236   shared_ptr<caffe::Solver<float> >
 237       solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));
 238
 239   solver->SetActionFunction(signal_handler.GetActionFunction());
 240
 241   if (FLAGS_snapshot.size()) {
 242     LOG(INFO) << "Resuming from " << FLAGS_snapshot;
 243     solver->Restore(FLAGS_snapshot.c_str());
 244   } else if (FLAGS_weights.size()) {
 245     CopyLayers(solver.get(), FLAGS_weights);
 246   }
 247
 248   LOG(INFO) << "Starting Optimization";
 249   if (gpus.size() > 1) {
 250 #ifdef USE_NCCL
 251     caffe::NCCL<float> nccl(solver);
 252     nccl.Run(gpus, FLAGS_snapshot.size() > 0 ? FLAGS_snapshot.c_str() : NULL);
 253 #else
 254     LOG(FATAL) << "Multi-GPU execution not available - rebuild with USE_NCCL";
 255 #endif
 256   } else {
 257     solver->Solve();
 258   }
 259   LOG(INFO) << "Optimization Done.";
 260   return 0;
 261 }
 262 RegisterBrewFunction(train);
 263
 264
 265 // Test: score a model.
 266 int test() {
 267   CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to score.";
 268   CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score.";
 269   vector<string> stages = get_stages_from_flags();
 270
 271   // Set device id and mode
 272   vector<int> gpus;
 273   get_gpus(&gpus);
 274   if (gpus.size() != 0) {
 275     LOG(INFO) << "Use GPU with device ID " << gpus[0];
 276 #ifndef CPU_ONLY
 277     cudaDeviceProp device_prop;
 278     cudaGetDeviceProperties(&device_prop, gpus[0]);
 279     LOG(INFO) << "GPU device name: " << device_prop.name;
 280 #endif
 281     Caffe::SetDevice(gpus[0]);
 282     Caffe::set_mode(Caffe::GPU);
 283   } else {
 284     LOG(INFO) << "Use CPU.";
 285     Caffe::set_mode(Caffe::CPU);
 286   }
 287   // Instantiate the caffe net.
 288   Net<float> caffe_net(FLAGS_model, caffe::TEST, FLAGS_level, &stages);
 289   caffe_net.CopyTrainedLayersFrom(FLAGS_weights);
 290   LOG(INFO) << "Running for " << FLAGS_iterations << " iterations.";
 291
 292   vector<int> test_score_output_id;
 293   vector<float> test_score;
 294   float loss = 0;
 295   for (int i = 0; i < FLAGS_iterations; ++i) {
 296     float iter_loss;
 297     const vector<Blob<float>*>& result =
 298         caffe_net.Forward(&iter_loss);
 299     loss += iter_loss;
 300     int idx = 0;
 301     for (int j = 0; j < result.size(); ++j) {
 302       const float* result_vec = result[j]->cpu_data();
 303       for (int k = 0; k < result[j]->count(); ++k, ++idx) {
 304         const float score = result_vec[k];
 305         if (i == 0) {
 306           test_score.push_back(score);
 307           test_score_output_id.push_back(j);
 308         } else {
 309           test_score[idx] += score;
 310         }
 311         const std::string& output_name = caffe_net.blob_names()[
 312             caffe_net.output_blob_indices()[j]];
 313         LOG(INFO) << "Batch " << i << ", " << output_name << " = " << score;
 314       }
 315     }
 316   }
 317   loss /= FLAGS_iterations;
 318   LOG(INFO) << "Loss: " << loss;
 319   for (int i = 0; i < test_score.size(); ++i) {
 320     const std::string& output_name = caffe_net.blob_names()[
 321         caffe_net.output_blob_indices()[test_score_output_id[i]]];
 322     const float loss_weight = caffe_net.blob_loss_weights()[
 323         caffe_net.output_blob_indices()[test_score_output_id[i]]];
 324     std::ostringstream loss_msg_stream;
 325     const float mean_score = test_score[i] / FLAGS_iterations;
 326     if (loss_weight) {
 327       loss_msg_stream << " (* " << loss_weight
 328                       << " = " << loss_weight * mean_score << " loss)";
 329     }
 330     LOG(INFO) << output_name << " = " << mean_score << loss_msg_stream.str();
 331   }
 332
 333   return 0;
 334 }
 335 RegisterBrewFunction(test);
 336
 337
 338 // Time: benchmark the execution time of a model.
 339 int time() {
 340   CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time.";
 341   caffe::Phase phase = get_phase_from_flags(caffe::TRAIN);
 342   vector<string> stages = get_stages_from_flags();
 343
 344   // Set device id and mode
 345   vector<int> gpus;
 346   get_gpus(&gpus);
 347   if (gpus.size() != 0) {
 348     LOG(INFO) << "Use GPU with device ID " << gpus[0];
 349     Caffe::SetDevice(gpus[0]);
 350     Caffe::set_mode(Caffe::GPU);
 351   } else {
 352     LOG(INFO) << "Use CPU.";
 353     Caffe::set_mode(Caffe::CPU);
 354   }
 355   // Instantiate the caffe net.
 356   Net<float> caffe_net(FLAGS_model, phase, FLAGS_level, &stages);
 357
 358   // Do a clean forward and backward pass, so that memory allocation are done
 359   // and future iterations will be more stable.
 360   LOG(INFO) << "Performing Forward";
 361   // Note that for the speed benchmark, we will assume that the network does
 362   // not take any input blobs.
 363   float initial_loss;
 364   caffe_net.Forward(&initial_loss);
 365   LOG(INFO) << "Initial loss: " << initial_loss;
 366   LOG(INFO) << "Performing Backward";
 367   caffe_net.Backward();
 368
 369   const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers();
 370   const vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
 371   const vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
 372   const vector<vector<bool> >& bottom_need_backward =
 373       caffe_net.bottom_need_backward();
 374   LOG(INFO) << "*** Benchmark begins ***";
 375   LOG(INFO) << "Testing for " << FLAGS_iterations << " iterations.";
 376   Timer total_timer;
 377   total_timer.Start();
 378   Timer forward_timer;
 379   Timer backward_timer;
 380   Timer timer;
 381   std::vector<double> forward_time_per_layer(layers.size(), 0.0);
 382   std::vector<double> backward_time_per_layer(layers.size(), 0.0);
 383   double forward_time = 0.0;
 384   double backward_time = 0.0;
 385   for (int j = 0; j < FLAGS_iterations; ++j) {
 386     Timer iter_timer;
 387     iter_timer.Start();
 388     forward_timer.Start();
 389     for (int i = 0; i < layers.size(); ++i) {
 390       timer.Start();
 391       layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
 392       forward_time_per_layer[i] += timer.MicroSeconds();
 393     }
 394     forward_time += forward_timer.MicroSeconds();
 395     backward_timer.Start();
 396     for (int i = layers.size() - 1; i >= 0; --i) {
 397       timer.Start();
 398       layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
 399                           bottom_vecs[i]);
 400       backward_time_per_layer[i] += timer.MicroSeconds();
 401     }
 402     backward_time += backward_timer.MicroSeconds();
 403     LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
 404       << iter_timer.MilliSeconds() << " ms.";
 405   }
 406   LOG(INFO) << "Average time per layer: ";
 407   for (int i = 0; i < layers.size(); ++i) {
 408     const caffe::string& layername = layers[i]->layer_param().name();
 409     LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
 410       "\tforward: " << forward_time_per_layer[i] / 1000 /
 411       FLAGS_iterations << " ms.";
 412     LOG(INFO) << std::setfill(' ') << std::setw(10) << layername  <<
 413       "\tbackward: " << backward_time_per_layer[i] / 1000 /
 414       FLAGS_iterations << " ms.";
 415   }
 416   total_timer.Stop();
 417   LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
 418     FLAGS_iterations << " ms.";
 419   LOG(INFO) << "Average Backward pass: " << backward_time / 1000 /
 420     FLAGS_iterations << " ms.";
 421   LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() /
 422     FLAGS_iterations << " ms.";
 423   LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
 424   LOG(INFO) << "*** Benchmark ends ***";
 425   return 0;
 426 }
 427 RegisterBrewFunction(time);
 428
 429 int main(int argc, char** argv) {
 430   // Print output to stderr (while still logging).
 431   FLAGS_alsologtostderr = 1;
 432   // Set version
 433   gflags::SetVersionString(AS_STRING(CAFFE_VERSION));
 434   // Usage message.
 435   gflags::SetUsageMessage("command line brew\n"
 436       "usage: caffe <command> <args>\n\n"
 437       "commands:\n"
 438       "  train           train or finetune a model\n"
 439       "  test            score a model\n"
 440       "  device_query    show GPU diagnostic information\n"
 441       "  time            benchmark model execution time");
 442   // Run tool or show usage.
 443   caffe::GlobalInit(&argc, &argv);
 444   if (argc == 2) {
 445 #ifdef WITH_PYTHON_LAYER
 446     try {
 447 #endif
 448       return GetBrewFunction(caffe::string(argv[1]))();
 449 #ifdef WITH_PYTHON_LAYER
 450     } catch (bp::error_already_set) {
 451       PyErr_Print();
 452       return 1;
 453     }
 454 #endif
 455   } else {
 456     gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/caffe");
 457   }
 458 }