Merge pull request #3186 from lionleaf/makefile_opencv_imgcodecs
authorRonghang Hu <huronghang@hotmail.com>
Thu, 22 Oct 2015 22:17:26 +0000 (15:17 -0700)
committerRonghang Hu <huronghang@hotmail.com>
Thu, 22 Oct 2015 22:17:26 +0000 (15:17 -0700)
Add opencv_imgcodecs to library path in Makefile

217 files changed:
CMakeLists.txt
INSTALL.md
Makefile
Makefile.config.example
cmake/ConfigGen.cmake
cmake/Dependencies.cmake
cmake/Summary.cmake
cmake/Templates/caffe_config.h.in
docs/installation.md
docs/tutorial/solver.md
examples/00-classification.ipynb
examples/mnist/lenet_adadelta_solver.prototxt
examples/mnist/lenet_solver_adam.prototxt
examples/mnist/lenet_solver_rmsprop.prototxt
examples/mnist/mnist_autoencoder_solver_adadelta.prototxt
examples/mnist/mnist_autoencoder_solver_adagrad.prototxt
examples/mnist/mnist_autoencoder_solver_nesterov.prototxt
examples/web_demo/requirements.txt
include/caffe/blob.hpp
include/caffe/caffe.hpp
include/caffe/common_layers.hpp
include/caffe/data_layers.hpp
include/caffe/filler.hpp
include/caffe/layer.hpp
include/caffe/loss_layers.hpp
include/caffe/neuron_layers.hpp
include/caffe/sgd_solvers.hpp [new file with mode: 0644]
include/caffe/solver.hpp
include/caffe/solver_factory.hpp [new file with mode: 0644]
include/caffe/syncedmem.hpp
include/caffe/util/blocking_queue.hpp
include/caffe/util/im2col.hpp
include/caffe/util/io.hpp
include/caffe/util/upgrade_proto.hpp
include/caffe/vision_layers.hpp
matlab/+caffe/+test/test_io.m [new file with mode: 0644]
matlab/+caffe/private/caffe_.cpp
matlab/+caffe/run_tests.m
matlab/hdf5creation/store2hdf5.m
models/bvlc_reference_caffenet/train_val.prototxt
python/caffe/_caffe.cpp
python/caffe/draw.py
python/caffe/io.py
python/caffe/pycaffe.py
python/caffe/test/test_io.py [new file with mode: 0644]
src/caffe/layer_factory.cpp
src/caffe/layers/absval_layer.cpp
src/caffe/layers/absval_layer.cu
src/caffe/layers/accuracy_layer.cpp
src/caffe/layers/argmax_layer.cpp
src/caffe/layers/base_conv_layer.cpp
src/caffe/layers/base_data_layer.cpp
src/caffe/layers/batch_reindex_layer.cpp [new file with mode: 0644]
src/caffe/layers/batch_reindex_layer.cu [new file with mode: 0644]
src/caffe/layers/bnll_layer.cpp
src/caffe/layers/bnll_layer.cu
src/caffe/layers/concat_layer.cpp
src/caffe/layers/concat_layer.cu
src/caffe/layers/contrastive_loss_layer.cpp
src/caffe/layers/contrastive_loss_layer.cu
src/caffe/layers/conv_layer.cpp
src/caffe/layers/conv_layer.cu
src/caffe/layers/cudnn_conv_layer.cpp
src/caffe/layers/cudnn_conv_layer.cu
src/caffe/layers/cudnn_lcn_layer.cpp [new file with mode: 0644]
src/caffe/layers/cudnn_lcn_layer.cu [new file with mode: 0644]
src/caffe/layers/cudnn_lrn_layer.cpp [new file with mode: 0644]
src/caffe/layers/cudnn_lrn_layer.cu [new file with mode: 0644]
src/caffe/layers/cudnn_pooling_layer.cpp
src/caffe/layers/cudnn_pooling_layer.cu
src/caffe/layers/cudnn_relu_layer.cpp
src/caffe/layers/cudnn_relu_layer.cu
src/caffe/layers/cudnn_sigmoid_layer.cpp
src/caffe/layers/cudnn_sigmoid_layer.cu
src/caffe/layers/cudnn_softmax_layer.cpp
src/caffe/layers/cudnn_softmax_layer.cu
src/caffe/layers/cudnn_tanh_layer.cpp
src/caffe/layers/cudnn_tanh_layer.cu
src/caffe/layers/data_layer.cpp
src/caffe/layers/deconv_layer.cpp
src/caffe/layers/deconv_layer.cu
src/caffe/layers/dropout_layer.cpp
src/caffe/layers/dropout_layer.cu
src/caffe/layers/dummy_data_layer.cpp
src/caffe/layers/eltwise_layer.cpp
src/caffe/layers/eltwise_layer.cu
src/caffe/layers/embed_layer.cpp
src/caffe/layers/embed_layer.cu
src/caffe/layers/euclidean_loss_layer.cpp
src/caffe/layers/euclidean_loss_layer.cu
src/caffe/layers/exp_layer.cpp
src/caffe/layers/exp_layer.cu
src/caffe/layers/filter_layer.cpp
src/caffe/layers/filter_layer.cu
src/caffe/layers/flatten_layer.cpp
src/caffe/layers/hdf5_data_layer.cpp
src/caffe/layers/hdf5_data_layer.cu
src/caffe/layers/hdf5_output_layer.cpp
src/caffe/layers/hdf5_output_layer.cu
src/caffe/layers/hinge_loss_layer.cpp
src/caffe/layers/im2col_layer.cpp
src/caffe/layers/im2col_layer.cu
src/caffe/layers/image_data_layer.cpp
src/caffe/layers/infogain_loss_layer.cpp
src/caffe/layers/inner_product_layer.cpp
src/caffe/layers/inner_product_layer.cu
src/caffe/layers/log_layer.cpp
src/caffe/layers/log_layer.cu
src/caffe/layers/loss_layer.cpp
src/caffe/layers/lrn_layer.cpp
src/caffe/layers/lrn_layer.cu
src/caffe/layers/memory_data_layer.cpp
src/caffe/layers/multinomial_logistic_loss_layer.cpp
src/caffe/layers/mvn_layer.cpp
src/caffe/layers/mvn_layer.cu
src/caffe/layers/neuron_layer.cpp
src/caffe/layers/pooling_layer.cpp
src/caffe/layers/pooling_layer.cu
src/caffe/layers/power_layer.cpp
src/caffe/layers/power_layer.cu
src/caffe/layers/prelu_layer.cpp
src/caffe/layers/prelu_layer.cu
src/caffe/layers/reduction_layer.cpp
src/caffe/layers/reduction_layer.cu
src/caffe/layers/relu_layer.cpp
src/caffe/layers/relu_layer.cu
src/caffe/layers/reshape_layer.cpp
src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
src/caffe/layers/sigmoid_layer.cpp
src/caffe/layers/sigmoid_layer.cu
src/caffe/layers/silence_layer.cpp
src/caffe/layers/silence_layer.cu
src/caffe/layers/slice_layer.cpp
src/caffe/layers/slice_layer.cu
src/caffe/layers/softmax_layer.cpp
src/caffe/layers/softmax_layer.cu
src/caffe/layers/softmax_loss_layer.cpp
src/caffe/layers/softmax_loss_layer.cu
src/caffe/layers/split_layer.cpp
src/caffe/layers/split_layer.cu
src/caffe/layers/spp_layer.cpp
src/caffe/layers/tanh_layer.cpp
src/caffe/layers/tanh_layer.cu
src/caffe/layers/threshold_layer.cpp
src/caffe/layers/threshold_layer.cu
src/caffe/layers/tile_layer.cpp
src/caffe/layers/tile_layer.cu
src/caffe/layers/window_data_layer.cpp
src/caffe/net.cpp
src/caffe/parallel.cpp
src/caffe/proto/caffe.proto
src/caffe/solver.cpp
src/caffe/solvers/adadelta_solver.cpp [new file with mode: 0644]
src/caffe/solvers/adagrad_solver.cpp [new file with mode: 0644]
src/caffe/solvers/adam_solver.cpp [new file with mode: 0644]
src/caffe/solvers/nesterov_solver.cpp [new file with mode: 0644]
src/caffe/solvers/rmsprop_solver.cpp [new file with mode: 0644]
src/caffe/solvers/sgd_solver.cpp [new file with mode: 0644]
src/caffe/syncedmem.cpp
src/caffe/test/test_accuracy_layer.cpp
src/caffe/test/test_argmax_layer.cpp
src/caffe/test/test_batch_reindex_layer.cpp [new file with mode: 0644]
src/caffe/test/test_blob.cpp
src/caffe/test/test_common.cpp
src/caffe/test/test_concat_layer.cpp
src/caffe/test/test_contrastive_loss_layer.cpp
src/caffe/test/test_convolution_layer.cpp
src/caffe/test/test_deconvolution_layer.cpp
src/caffe/test/test_eltwise_layer.cpp
src/caffe/test/test_embed_layer.cpp
src/caffe/test/test_euclidean_loss_layer.cpp
src/caffe/test/test_filler.cpp
src/caffe/test/test_filter_layer.cpp
src/caffe/test/test_flatten_layer.cpp
src/caffe/test/test_gradient_based_solver.cpp
src/caffe/test/test_hdf5_output_layer.cpp
src/caffe/test/test_hdf5data_layer.cpp
src/caffe/test/test_hinge_loss_layer.cpp
src/caffe/test/test_im2col_kernel.cu
src/caffe/test/test_im2col_layer.cpp
src/caffe/test/test_image_data_layer.cpp
src/caffe/test/test_infogain_loss_layer.cpp
src/caffe/test/test_inner_product_layer.cpp
src/caffe/test/test_lrn_layer.cpp
src/caffe/test/test_math_functions.cpp
src/caffe/test/test_maxpool_dropout_layers.cpp
src/caffe/test/test_multinomial_logistic_loss_layer.cpp
src/caffe/test/test_mvn_layer.cpp
src/caffe/test/test_neuron_layer.cpp
src/caffe/test/test_pooling_layer.cpp
src/caffe/test/test_power_layer.cpp
src/caffe/test/test_random_number_generator.cpp
src/caffe/test/test_reduction_layer.cpp
src/caffe/test/test_reshape_layer.cpp
src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp
src/caffe/test/test_slice_layer.cpp
src/caffe/test/test_softmax_layer.cpp
src/caffe/test/test_softmax_with_loss_layer.cpp
src/caffe/test/test_solver.cpp
src/caffe/test/test_solver_factory.cpp [new file with mode: 0644]
src/caffe/test/test_split_layer.cpp
src/caffe/test/test_spp_layer.cpp
src/caffe/test/test_stochastic_pooling.cpp
src/caffe/test/test_syncedmem.cpp
src/caffe/test/test_tanh_layer.cpp
src/caffe/test/test_threshold_layer.cpp
src/caffe/test/test_tile_layer.cpp
src/caffe/test/test_upgrade_proto.cpp
src/caffe/test/test_util_blas.cpp
src/caffe/util/db_lmdb.cpp
src/caffe/util/im2col.cpp
src/caffe/util/im2col.cu
src/caffe/util/math_functions.cu
src/caffe/util/upgrade_proto.cpp
tools/caffe.cpp
tools/upgrade_solver_proto_text.cpp [new file with mode: 0644]

index 37f937f..f8f7530 100644 (file)
@@ -23,9 +23,10 @@ set(python_version "2" CACHE STRING "Specify which Python version to use")
 caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE)
 caffe_option(BUILD_docs   "Build documentation" ON IF UNIX OR APPLE)
 caffe_option(BUILD_python_layer "Build the Caffe Python layer" ON)
-caffe_option(USE_LMDB "Build with lmdb" ON)
-caffe_option(USE_LEVELDB "Build with levelDB" ON)
 caffe_option(USE_OPENCV "Build with OpenCV support" ON)
+caffe_option(USE_LEVELDB "Build with levelDB" ON)
+caffe_option(USE_LMDB "Build with lmdb" ON)
+caffe_option(ALLOW_LMDB_NOLOCK "Allow MDB_NOLOCK when reading LMDB files (only if necessary)" OFF)
 
 # ---[ Dependencies
 include(cmake/Dependencies.cmake)
index 42fcf02..05c714d 100644 (file)
@@ -3,5 +3,5 @@
 See http://caffe.berkeleyvision.org/installation.html for the latest
 installation instructions.
 
-Check the issue tracker in case you need help:
-https://github.com/BVLC/caffe/issues
+Check the users group in case you need help:
+https://groups.google.com/forum/#!forum/caffe-users
index 43cb15f..4a1d41d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -318,6 +318,9 @@ ifeq ($(USE_LEVELDB), 1)
 endif
 ifeq ($(USE_LMDB), 1)
        COMMON_FLAGS += -DUSE_LMDB
+ifeq ($(ALLOW_LMDB_NOLOCK), 1)
+       COMMON_FLAGS += -DALLOW_LMDB_NOLOCK
+endif
 endif
 
 # CPU-only configuration
index 8e2c4fb..1dd6a8f 100644 (file)
@@ -8,9 +8,14 @@
 # CPU_ONLY := 1
 
 # uncomment to disable IO dependencies and corresponding data layers
+# USE_OPENCV := 0
 # USE_LEVELDB := 0
 # USE_LMDB := 0
-# USE_OPENCV := 0
+
+# uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary)
+#      You should not set this flag if you will be reading LMDBs with any
+#      possibility of simultaneous read and write
+# ALLOW_LMDB_NOLOCK := 1
 
 # Uncomment if you're using OpenCV 3
 # OPENCV_VERSION := 3
index 8b25996..0563711 100644 (file)
@@ -62,6 +62,9 @@ function(caffe_generate_export_configs)
 
   if(USE_LMDB)
     list(APPEND Caffe_DEFINITIONS -DUSE_LMDB)
+    if (ALLOW_LMDB_NOLOCK)
+        list(APPEND Caffe_DEFINITIONS -DALLOW_LMDB_NOLOCK)
+    endif()
   endif()
 
   if(USE_LEVELDB)
index d68d7bf..5651e2b 100644 (file)
@@ -34,6 +34,9 @@ if(USE_LMDB)
   include_directories(SYSTEM ${LMDB_INCLUDE_DIR})
   list(APPEND Caffe_LINKER_LIBS ${LMDB_LIBRARIES})
   add_definitions(-DUSE_LMDB)
+  if(ALLOW_LMDB_NOLOCK)
+    add_definitions(-DALLOW_LMDB_NOLOCK)
+  endif()
 endif()
 
 # ---[ LevelDB
@@ -55,9 +58,9 @@ endif()
 include(cmake/Cuda.cmake)
 if(NOT HAVE_CUDA)
   if(CPU_ONLY)
-    message("-- CUDA is disabled. Building without it...")
+    message(STATUS "-- CUDA is disabled. Building without it...")
   else()
-    message("-- CUDA is not detected by cmake. Building without it...")
+    message(WARNING "-- CUDA is not detected by cmake. Building without it...")
   endif()
 
   # TODO: remove this not cross platform define in future. Use caffe_config.h instead.
index 3d12e81..6984f41 100644 (file)
@@ -114,9 +114,10 @@ function(caffe_print_configuration_summary)
   caffe_status("  BUILD_matlab      :   ${BUILD_matlab}")
   caffe_status("  BUILD_docs        :   ${BUILD_docs}")
   caffe_status("  CPU_ONLY          :   ${CPU_ONLY}")
-  caffe_status("  USE_LMDB          :   ${USE_LMDB}")
-  caffe_status("  USE_LEVELDB       :   ${USE_LEVELDB}")
   caffe_status("  USE_OPENCV        :   ${USE_OPENCV}")
+  caffe_status("  USE_LEVELDB       :   ${USE_LEVELDB}")
+  caffe_status("  USE_LMDB          :   ${USE_LMDB}")
+  caffe_status("  ALLOW_LMDB_NOLOCK :   ${ALLOW_LMDB_NOLOCK}")
   caffe_status("")
   caffe_status("Dependencies:")
   caffe_status("  BLAS              : " APPLE THEN "Yes (vecLib)" ELSE "Yes (${BLAS})")
index 9302022..8a31b43 100644 (file)
@@ -33,5 +33,6 @@
 
 /* IO libraries */
 #cmakedefine USE_OPENCV
-#cmakedefine USE_LMDB
 #cmakedefine USE_LEVELDB
+#cmakedefine USE_LMDB
+#cmakedefine ALLOW_LMDB_NOLOCK
index 89a8c71..cce7ec3 100644 (file)
@@ -30,13 +30,14 @@ Optional dependencies:
 
 * [OpenCV](http://opencv.org/) >= 2.4 including 3.0
 * IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`)
+* cuDNN for GPU acceleration (v3)
 
 Pycaffe and Matcaffe interfaces have their own natural needs.
 
 * For Python Caffe:  `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python`
 * For MATLAB Caffe: MATLAB with the `mex` compiler.
 
-**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. For now cuDNN v1 is integrated but see [PR #1731](https://github.com/BVLC/caffe/pull/1731) for v2.
+**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v3; older versions are supported in older Caffe.
 
 **CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
 
index b150f64..b719f71 100644 (file)
@@ -8,12 +8,12 @@ The responsibilities of learning are divided between the Solver for overseeing t
 
 The Caffe solvers are:
 
-- Stochastic Gradient Descent (`SGD`), 
-- AdaDelta (`ADADELTA`),
-- Adaptive Gradient (`ADAGRAD`),
-- Adam (`ADAM`),
-- Nesterov's Accelerated Gradient (`NESTEROV`) and
-- RMSprop (`RMSPROP`)
+- Stochastic Gradient Descent (`type: "SGD"`),
+- AdaDelta (`type: "AdaDelta"`),
+- Adaptive Gradient (`type: "AdaGrad"`),
+- Adam (`type: "Adam"`),
+- Nesterov's Accelerated Gradient (`type: "Nesterov"`) and
+- RMSprop (`type: "RMSProp"`)
 
 The solver
 
@@ -51,7 +51,7 @@ The parameter update $$\Delta W$$ is formed by the solver from the error gradien
 
 ### SGD
 
-**Stochastic gradient descent** (`solver_type: SGD`) updates the weights $$ W $$ by a linear combination of the negative gradient $$ \nabla L(W) $$ and the previous weight update $$ V_t $$.
+**Stochastic gradient descent** (`type: "SGD"`) updates the weights $$ W $$ by a linear combination of the negative gradient $$ \nabla L(W) $$ and the previous weight update $$ V_t $$.
 The **learning rate** $$ \alpha $$ is the weight of the negative gradient.
 The **momentum** $$ \mu $$ is the weight of the previous update.
 
@@ -113,7 +113,7 @@ If learning diverges (e.g., you start to see very large or `NaN` or `inf` loss v
 
 ### AdaDelta
 
-The **AdaDelta** (`solver_type: ADADELTA`) method (M. Zeiler [1]) is a "robust learning rate method". It is a gradient-based optimization method (like SGD). The update formulas are
+The **AdaDelta** (`type: "AdaDelta"`) method (M. Zeiler [1]) is a "robust learning rate method". It is a gradient-based optimization method (like SGD). The update formulas are
 
 $$
 \begin{align}
@@ -125,7 +125,7 @@ E[g^2]_t &= \delta{E[g^2]_{t-1} } + (1-\delta)g_{t}^2
 \end{align}
 $$
 
-and 
+and
 
 $$
 (W_{t+1})_i =
@@ -139,7 +139,7 @@ $$
 
 ### AdaGrad
 
-The **adaptive gradient** (`solver_type: ADAGRAD`) method (Duchi et al. [1]) is a gradient-based optimization method (like SGD) that attempts to "find needles in haystacks in the form of very predictive but rarely seen features," in Duchi et al.'s words.
+The **adaptive gradient** (`type: "AdaGrad"`) method (Duchi et al. [1]) is a gradient-based optimization method (like SGD) that attempts to "find needles in haystacks in the form of very predictive but rarely seen features," in Duchi et al.'s words.
 Given the update information from all previous iterations $$ \left( \nabla L(W) \right)_{t'} $$ for $$ t' \in \{1, 2, ..., t\} $$,
 the update formulas proposed by [1] are as follows, specified for each component $$i$$ of the weights $$W$$:
 
@@ -159,7 +159,7 @@ Note that in practice, for weights $$ W \in \mathcal{R}^d $$, AdaGrad implementa
 
 ### Adam
 
-The **Adam** (`solver_type: ADAM`), proposed in Kingma et al. [1], is a gradient-based optimization method (like SGD). This includes an "adaptive moment estimation" ($$m_t, v_t$$) and can be regarded as a generalization of AdaGrad. The update formulas are
+The **Adam** (`type: "Adam"`), proposed in Kingma et al. [1], is a gradient-based optimization method (like SGD). This includes an "adaptive moment estimation" ($$m_t, v_t$$) and can be regarded as a generalization of AdaGrad. The update formulas are
 
 $$
 (m_t)_i = \beta_1 (m_{t-1})_i + (1-\beta_1)(\nabla L(W_t))_i,\\
@@ -181,7 +181,7 @@ Kingma et al. [1] proposed to use $$\beta_1 = 0.9, \beta_2 = 0.999, \varepsilon
 
 ### NAG
 
-**Nesterov's accelerated gradient** (`solver_type: NESTEROV`) was proposed by Nesterov [1] as an "optimal" method of convex optimization, achieving a convergence rate of $$ \mathcal{O}(1/t^2) $$ rather than the $$ \mathcal{O}(1/t) $$.
+**Nesterov's accelerated gradient** (`type: "Nesterov"`) was proposed by Nesterov [1] as an "optimal" method of convex optimization, achieving a convergence rate of $$ \mathcal{O}(1/t^2) $$ rather than the $$ \mathcal{O}(1/t) $$.
 Though the required assumptions to achieve the $$ \mathcal{O}(1/t^2) $$ convergence typically will not hold for deep networks trained with Caffe (e.g., due to non-smoothness and non-convexity), in practice NAG can be a very effective method for optimizing certain types of deep learning architectures, as demonstrated for deep MNIST autoencoders by Sutskever et al. [2].
 
 The weight update formulas look very similar to the SGD updates given above:
@@ -206,10 +206,10 @@ What distinguishes the method from SGD is the weight setting $$ W $$ on which we
 
 ### RMSprop
 
-The **RMSprop** (`solver_type: RMSPROP`), suggested by Tieleman in a Coursera course lecture, is a gradient-based optimization method (like SGD). The update formulas are
+The **RMSprop** (`type: "RMSProp"`), suggested by Tieleman in a Coursera course lecture, is a gradient-based optimization method (like SGD). The update formulas are
 
 $$
-(v_t)_i = 
+(v_t)_i =
 \begin{cases}
 (v_{t-1})_i + \delta, &(\nabla L(W_t))_i(\nabla L(W_{t-1}))_i > 0\\
 (v_{t-1})_i \cdot (1-\delta), & \text{else}
index 46bbb19..89b7dd3 100644 (file)
    "source": [
     "net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(caffe_root + 'examples/images/cat.jpg'))\n",
     "out = net.forward()\n",
-    "print(\"Predicted class is #{}.\".format(out['prob'].argmax()))"
+    "print(\"Predicted class is #{}.\".format(out['prob'][0].argmax()))"
    ]
   },
   {
index 776d1e0..16176c0 100644 (file)
@@ -20,5 +20,5 @@ snapshot: 5000
 snapshot_prefix: "examples/mnist/lenet_adadelta"
 # solver mode: CPU or GPU
 solver_mode: GPU
-solver_type: ADADELTA
+type: "AdaDelta"
 delta: 1e-6
index d22c571..4b5336b 100644 (file)
@@ -22,5 +22,5 @@ max_iter: 10000
 snapshot: 5000
 snapshot_prefix: "examples/mnist/lenet"
 # solver mode: CPU or GPU
-solver_type: ADAM
+type: "Adam"
 solver_mode: GPU
index 74dadc5..924b72d 100644 (file)
@@ -23,5 +23,5 @@ snapshot: 5000
 snapshot_prefix: "examples/mnist/lenet_rmsprop"
 # solver mode: CPU or GPU
 solver_mode: GPU
-solver_type: RMSPROP
+type: "RMSProp"
 rms_decay: 0.98
index 065647d..26c4084 100644 (file)
@@ -16,4 +16,4 @@ snapshot: 10000
 snapshot_prefix: "examples/mnist/mnist_autoencoder_adadelta_train"
 # solver mode: CPU or GPU
 solver_mode: GPU
-solver_type: ADADELTA
+type: "AdaDelta"
index cc0ed9e..065cdb2 100644 (file)
@@ -14,4 +14,4 @@ snapshot: 10000
 snapshot_prefix: "examples/mnist/mnist_autoencoder_adagrad_train"
 # solver mode: CPU or GPU
 solver_mode: GPU
-solver_type: ADAGRAD
+type: "AdaGrad"
index 2a59fd4..c95e3fe 100644 (file)
@@ -17,4 +17,4 @@ snapshot_prefix: "examples/mnist/mnist_autoencoder_nesterov_train"
 momentum: 0.95
 # solver mode: CPU or GPU
 solver_mode: GPU
-solver_type: NESTEROV
+type: "Nesterov"
index fea5117..af360ac 100644 (file)
@@ -8,7 +8,6 @@
 #include "caffe/common.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/syncedmem.hpp"
-#include "caffe/util/math_functions.hpp"
 
 const int kMaxBlobAxes = 32;
 
index 68a5e1d..a339efb 100644 (file)
 #include "caffe/parallel.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/solver.hpp"
+#include "caffe/solver_factory.hpp"
 #include "caffe/util/benchmark.hpp"
 #include "caffe/util/io.hpp"
+#include "caffe/util/upgrade_proto.hpp"
 #include "caffe/vision_layers.hpp"
 
 #endif  // CAFFE_CAFFE_HPP_
index d1ddaee..95358d4 100644 (file)
@@ -1,16 +1,11 @@
 #ifndef CAFFE_COMMON_LAYERS_HPP_
 #define CAFFE_COMMON_LAYERS_HPP_
 
-#include <string>
 #include <utility>
 #include <vector>
 
 #include "caffe/blob.hpp"
-#include "caffe/common.hpp"
-#include "caffe/data_layers.hpp"
 #include "caffe/layer.hpp"
-#include "caffe/loss_layers.hpp"
-#include "caffe/neuron_layers.hpp"
 #include "caffe/proto/caffe.pb.h"
 
 namespace caffe {
@@ -79,6 +74,75 @@ class ArgMaxLayer : public Layer<Dtype> {
 };
 
 /**
+ * @brief Index into the input blob along its first axis.
+ *
+ * This layer can be used to select, reorder, and even replicate examples in a
+ * batch.  The second blob is cast to int and treated as an index into the
+ * first axis of the first blob.
+ */
+template <typename Dtype>
+class BatchReindexLayer : public Layer<Dtype> {
+ public:
+  explicit BatchReindexLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "BatchReindex"; }
+  virtual inline int ExactNumBottomBlobs() const { return 2; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  /**
+   * @param bottom input Blob vector (length 2+)
+   *   -# @f$ (N \times ...) @f$
+   *      the inputs @f$ x_1 @f$
+   *   -# @f$ (M) @f$
+   *      the inputs @f$ x_2 @f$
+   * @param top output Blob vector (length 1)
+   *   -# @f$ (M \times ...) @f$:
+   *      the reindexed array @f$
+   *        y = x_1[x_2]
+   *      @f$
+   */
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  /**
+   * @brief Computes the error gradient w.r.t. the reordered input.
+   *
+   * @param top output Blob vector (length 1), providing the error gradient
+   *        with respect to the outputs
+   *   -# @f$ (M \times ...) @f$:
+   *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+   *      with respect to concatenated outputs @f$ y @f$
+   * @param propagate_down see Layer::Backward.
+   * @param bottom input Blob vector (length 2):
+   *   - @f$ \frac{\partial E}{\partial y} @f$ is de-indexed (summing where
+   *     required) back to the input x_1
+   *   - This layer cannot backprop to x_2, i.e. propagate_down[1] must be
+   *     false.
+   */
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+ private:
+  struct pair_sort_first {
+    bool operator()(const std::pair<int, int> &left,
+                    const std::pair<int, int> &right) {
+      return left.first < right.first;
+    }
+  };
+  void check_batch_reindex(int initial_num, int final_num,
+                           const Dtype* ridx_data);
+};
+
+
+/**
  * @brief Takes at least two Blob%s and concatenates them along either the num
  *        or channel dimension, outputting the result.
  */
index 90fd0d1..aa0ab7d 100644 (file)
@@ -17,6 +17,9 @@
 #include "caffe/util/blocking_queue.hpp"
 #include "caffe/util/db.hpp"
 
+#define HDF5_DATA_DATASET_NAME "data"
+#define HDF5_DATA_LABEL_NAME "label"
+
 namespace caffe {
 
 /**
index 888f4a4..dad9ad4 100644 (file)
@@ -8,7 +8,6 @@
 #include <string>
 
 #include "caffe/blob.hpp"
-#include "caffe/common.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/syncedmem.hpp"
 #include "caffe/util/math_functions.hpp"
index a0d1d4e..10f353f 100644 (file)
@@ -9,7 +9,7 @@
 #include "caffe/common.hpp"
 #include "caffe/layer_factory.hpp"
 #include "caffe/proto/caffe.pb.h"
-#include "caffe/util/device_alternate.hpp"
+#include "caffe/util/math_functions.hpp"
 
 /**
  Forward declare boost::thread instead of including boost/thread.hpp
index 8d41af3..d08ad9b 100644 (file)
@@ -6,7 +6,6 @@
 #include <vector>
 
 #include "caffe/blob.hpp"
-#include "caffe/common.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/neuron_layers.hpp"
 #include "caffe/proto/caffe.pb.h"
index c2e0774..4fa330e 100644 (file)
@@ -10,9 +10,6 @@
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
 
-#define HDF5_DATA_DATASET_NAME "data"
-#define HDF5_DATA_LABEL_NAME "label"
-
 namespace caffe {
 
 /**
diff --git a/include/caffe/sgd_solvers.hpp b/include/caffe/sgd_solvers.hpp
new file mode 100644 (file)
index 0000000..1fc52d8
--- /dev/null
@@ -0,0 +1,148 @@
+#ifndef CAFFE_SGD_SOLVERS_HPP_
+#define CAFFE_SGD_SOLVERS_HPP_
+
+#include <string>
+#include <vector>
+
+#include "caffe/solver.hpp"
+
+namespace caffe {
+
+/**
+ * @brief Optimizes the parameters of a Net using
+ *        stochastic gradient descent (SGD) with momentum.
+ */
+template <typename Dtype>
+class SGDSolver : public Solver<Dtype> {
+ public:
+  explicit SGDSolver(const SolverParameter& param)
+      : Solver<Dtype>(param) { PreSolve(); }
+  explicit SGDSolver(const string& param_file)
+      : Solver<Dtype>(param_file) { PreSolve(); }
+  virtual inline const char* type() const { return "SGD"; }
+
+  const vector<shared_ptr<Blob<Dtype> > >& history() { return history_; }
+
+ protected:
+  void PreSolve();
+  Dtype GetLearningRate();
+  virtual void ApplyUpdate();
+  virtual void Normalize(int param_id);
+  virtual void Regularize(int param_id);
+  virtual void ComputeUpdateValue(int param_id, Dtype rate);
+  virtual void ClipGradients();
+  virtual void SnapshotSolverState(const string& model_filename);
+  virtual void SnapshotSolverStateToBinaryProto(const string& model_filename);
+  virtual void SnapshotSolverStateToHDF5(const string& model_filename);
+  virtual void RestoreSolverStateFromHDF5(const string& state_file);
+  virtual void RestoreSolverStateFromBinaryProto(const string& state_file);
+  // history maintains the historical momentum data.
+  // update maintains update related data and is not needed in snapshots.
+  // temp maintains other information that might be needed in computation
+  //   of gradients/updates and is not needed in snapshots
+  vector<shared_ptr<Blob<Dtype> > > history_, update_, temp_;
+
+  DISABLE_COPY_AND_ASSIGN(SGDSolver);
+};
+
+template <typename Dtype>
+class NesterovSolver : public SGDSolver<Dtype> {
+ public:
+  explicit NesterovSolver(const SolverParameter& param)
+      : SGDSolver<Dtype>(param) {}
+  explicit NesterovSolver(const string& param_file)
+      : SGDSolver<Dtype>(param_file) {}
+  virtual inline const char* type() const { return "Nesterov"; }
+
+ protected:
+  virtual void ComputeUpdateValue(int param_id, Dtype rate);
+
+  DISABLE_COPY_AND_ASSIGN(NesterovSolver);
+};
+
+template <typename Dtype>
+class AdaGradSolver : public SGDSolver<Dtype> {
+ public:
+  explicit AdaGradSolver(const SolverParameter& param)
+      : SGDSolver<Dtype>(param) { constructor_sanity_check(); }
+  explicit AdaGradSolver(const string& param_file)
+      : SGDSolver<Dtype>(param_file) { constructor_sanity_check(); }
+  virtual inline const char* type() const { return "AdaGrad"; }
+
+ protected:
+  virtual void ComputeUpdateValue(int param_id, Dtype rate);
+  void constructor_sanity_check() {
+    CHECK_EQ(0, this->param_.momentum())
+        << "Momentum cannot be used with AdaGrad.";
+  }
+
+  DISABLE_COPY_AND_ASSIGN(AdaGradSolver);
+};
+
+
+template <typename Dtype>
+class RMSPropSolver : public SGDSolver<Dtype> {
+ public:
+  explicit RMSPropSolver(const SolverParameter& param)
+      : SGDSolver<Dtype>(param) { constructor_sanity_check(); }
+  explicit RMSPropSolver(const string& param_file)
+      : SGDSolver<Dtype>(param_file) { constructor_sanity_check(); }
+  virtual inline const char* type() const { return "RMSProp"; }
+
+ protected:
+  virtual void ComputeUpdateValue(int param_id, Dtype rate);
+  void constructor_sanity_check() {
+    CHECK_EQ(0, this->param_.momentum())
+        << "Momentum cannot be used with RMSProp.";
+    CHECK_GE(this->param_.rms_decay(), 0)
+        << "rms_decay should lie between 0 and 1.";
+    CHECK_LT(this->param_.rms_decay(), 1)
+        << "rms_decay should lie between 0 and 1.";
+  }
+
+  DISABLE_COPY_AND_ASSIGN(RMSPropSolver);
+};
+
+template <typename Dtype>
+class AdaDeltaSolver : public SGDSolver<Dtype> {
+ public:
+  explicit AdaDeltaSolver(const SolverParameter& param)
+      : SGDSolver<Dtype>(param) { AdaDeltaPreSolve(); }
+  explicit AdaDeltaSolver(const string& param_file)
+      : SGDSolver<Dtype>(param_file) { AdaDeltaPreSolve(); }
+  virtual inline const char* type() const { return "AdaDelta"; }
+
+ protected:
+  void AdaDeltaPreSolve();
+  virtual void ComputeUpdateValue(int param_id, Dtype rate);
+
+  DISABLE_COPY_AND_ASSIGN(AdaDeltaSolver);
+};
+
+/**
+ * @brief AdamSolver, an algorithm for first-order gradient-based optimization
+ *        of stochastic objective functions, based on adaptive estimates of
+ *        lower-order moments. Described in [1].
+ *
+ * [1] D. P. Kingma and J. L. Ba, "ADAM: A Method for Stochastic Optimization."
+ *     arXiv preprint arXiv:1412.6980v8 (2014).
+ */
+template <typename Dtype>
+class AdamSolver : public SGDSolver<Dtype> {
+ public:
+  explicit AdamSolver(const SolverParameter& param)
+      : SGDSolver<Dtype>(param) { AdamPreSolve();}
+  explicit AdamSolver(const string& param_file)
+      : SGDSolver<Dtype>(param_file) { AdamPreSolve(); }
+  virtual inline const char* type() const { return "Adam"; }
+
+ protected:
+  void AdamPreSolve();
+  virtual void ComputeUpdateValue(int param_id, Dtype rate);
+
+  DISABLE_COPY_AND_ASSIGN(AdamSolver);
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_SGD_SOLVERS_HPP_
index 2ecf539..298a68f 100644 (file)
@@ -1,10 +1,11 @@
-#ifndef CAFFE_OPTIMIZATION_SOLVER_HPP_
-#define CAFFE_OPTIMIZATION_SOLVER_HPP_
+#ifndef CAFFE_SOLVER_HPP_
+#define CAFFE_SOLVER_HPP_
 #include <boost/function.hpp>
 #include <string>
 #include <vector>
 
 #include "caffe/net.hpp"
+#include "caffe/solver_factory.hpp"
 
 namespace caffe {
 
@@ -83,6 +84,10 @@ class Solver {
   }
 
   void CheckSnapshotWritePermissions();
+  /**
+   * @brief Returns the solver type.
+   */
+  virtual inline const char* type() const { return ""; }
 
  protected:
   // Make and apply the update value for the current iteration.
@@ -148,158 +153,6 @@ class WorkerSolver : public Solver<Dtype> {
   }
 };
 
-/**
- * @brief Optimizes the parameters of a Net using
- *        stochastic gradient descent (SGD) with momentum.
- */
-template <typename Dtype>
-class SGDSolver : public Solver<Dtype> {
- public:
-  explicit SGDSolver(const SolverParameter& param)
-      : Solver<Dtype>(param) { PreSolve(); }
-  explicit SGDSolver(const string& param_file)
-      : Solver<Dtype>(param_file) { PreSolve(); }
-
-  const vector<shared_ptr<Blob<Dtype> > >& history() { return history_; }
-
- protected:
-  void PreSolve();
-  Dtype GetLearningRate();
-  virtual void ApplyUpdate();
-  virtual void Normalize(int param_id);
-  virtual void Regularize(int param_id);
-  virtual void ComputeUpdateValue(int param_id, Dtype rate);
-  virtual void ClipGradients();
-  virtual void SnapshotSolverState(const string& model_filename);
-  virtual void SnapshotSolverStateToBinaryProto(const string& model_filename);
-  virtual void SnapshotSolverStateToHDF5(const string& model_filename);
-  virtual void RestoreSolverStateFromHDF5(const string& state_file);
-  virtual void RestoreSolverStateFromBinaryProto(const string& state_file);
-  // history maintains the historical momentum data.
-  // update maintains update related data and is not needed in snapshots.
-  // temp maintains other information that might be needed in computation
-  //   of gradients/updates and is not needed in snapshots
-  vector<shared_ptr<Blob<Dtype> > > history_, update_, temp_;
-
-  DISABLE_COPY_AND_ASSIGN(SGDSolver);
-};
-
-template <typename Dtype>
-class NesterovSolver : public SGDSolver<Dtype> {
- public:
-  explicit NesterovSolver(const SolverParameter& param)
-      : SGDSolver<Dtype>(param) {}
-  explicit NesterovSolver(const string& param_file)
-      : SGDSolver<Dtype>(param_file) {}
-
- protected:
-  virtual void ComputeUpdateValue(int param_id, Dtype rate);
-
-  DISABLE_COPY_AND_ASSIGN(NesterovSolver);
-};
-
-template <typename Dtype>
-class AdaGradSolver : public SGDSolver<Dtype> {
- public:
-  explicit AdaGradSolver(const SolverParameter& param)
-      : SGDSolver<Dtype>(param) { constructor_sanity_check(); }
-  explicit AdaGradSolver(const string& param_file)
-      : SGDSolver<Dtype>(param_file) { constructor_sanity_check(); }
-
- protected:
-  virtual void ComputeUpdateValue(int param_id, Dtype rate);
-  void constructor_sanity_check() {
-    CHECK_EQ(0, this->param_.momentum())
-        << "Momentum cannot be used with AdaGrad.";
-  }
-
-  DISABLE_COPY_AND_ASSIGN(AdaGradSolver);
-};
-
-
-template <typename Dtype>
-class RMSPropSolver : public SGDSolver<Dtype> {
- public:
-  explicit RMSPropSolver(const SolverParameter& param)
-      : SGDSolver<Dtype>(param) { constructor_sanity_check(); }
-  explicit RMSPropSolver(const string& param_file)
-      : SGDSolver<Dtype>(param_file) { constructor_sanity_check(); }
-
- protected:
-  virtual void ComputeUpdateValue(int param_id, Dtype rate);
-  void constructor_sanity_check() {
-    CHECK_EQ(0, this->param_.momentum())
-        << "Momentum cannot be used with RMSProp.";
-    CHECK_GE(this->param_.rms_decay(), 0)
-        << "rms_decay should lie between 0 and 1.";
-    CHECK_LT(this->param_.rms_decay(), 1)
-        << "rms_decay should lie between 0 and 1.";
-  }
-
-  DISABLE_COPY_AND_ASSIGN(RMSPropSolver);
-};
-
-template <typename Dtype>
-class AdaDeltaSolver : public SGDSolver<Dtype> {
- public:
-  explicit AdaDeltaSolver(const SolverParameter& param)
-      : SGDSolver<Dtype>(param) { AdaDeltaPreSolve(); }
-  explicit AdaDeltaSolver(const string& param_file)
-      : SGDSolver<Dtype>(param_file) { AdaDeltaPreSolve(); }
-
- protected:
-  void AdaDeltaPreSolve();
-  virtual void ComputeUpdateValue(int param_id, Dtype rate);
-
-  DISABLE_COPY_AND_ASSIGN(AdaDeltaSolver);
-};
-
-/**
- * @brief AdamSolver, an algorithm for first-order gradient-based optimization
- *        of stochastic objective functions, based on adaptive estimates of
- *        lower-order moments. Described in [1].
- *
- * [1] D. P. Kingma and J. L. Ba, "ADAM: A Method for Stochastic Optimization."
- *     arXiv preprint arXiv:1412.6980v8 (2014).
- */
-template <typename Dtype>
-class AdamSolver : public SGDSolver<Dtype> {
- public:
-  explicit AdamSolver(const SolverParameter& param)
-      : SGDSolver<Dtype>(param) { AdamPreSolve();}
-  explicit AdamSolver(const string& param_file)
-      : SGDSolver<Dtype>(param_file) { AdamPreSolve(); }
-
- protected:
-  void AdamPreSolve();
-  virtual void ComputeUpdateValue(int param_id, Dtype rate);
-
-  DISABLE_COPY_AND_ASSIGN(AdamSolver);
-};
-
-template <typename Dtype>
-Solver<Dtype>* GetSolver(const SolverParameter& param) {
-  SolverParameter_SolverType type = param.solver_type();
-
-  switch (type) {
-  case SolverParameter_SolverType_SGD:
-    return new SGDSolver<Dtype>(param);
-  case SolverParameter_SolverType_NESTEROV:
-    return new NesterovSolver<Dtype>(param);
-  case SolverParameter_SolverType_ADAGRAD:
-    return new AdaGradSolver<Dtype>(param);
-  case SolverParameter_SolverType_RMSPROP:
-    return new RMSPropSolver<Dtype>(param);
-  case SolverParameter_SolverType_ADADELTA:
-    return new AdaDeltaSolver<Dtype>(param);
-  case SolverParameter_SolverType_ADAM:
-    return new AdamSolver<Dtype>(param);
-  default:
-    LOG(FATAL) << "Unknown SolverType: " << type;
-  }
-  return (Solver<Dtype>*) NULL;
-}
-
 }  // namespace caffe
 
-#endif  // CAFFE_OPTIMIZATION_SOLVER_HPP_
+#endif  // CAFFE_SOLVER_HPP_
diff --git a/include/caffe/solver_factory.hpp b/include/caffe/solver_factory.hpp
new file mode 100644 (file)
index 0000000..cfff721
--- /dev/null
@@ -0,0 +1,137 @@
+/**
+ * @brief A solver factory that allows one to register solvers, similar to
+ * layer factory. During runtime, registered solvers could be called by passing
+ * a SolverParameter protobuffer to the CreateSolver function:
+ *
+ *     SolverRegistry<Dtype>::CreateSolver(param);
+ *
+ * There are two ways to register a solver. Assuming that we have a solver like:
+ *
+ *   template <typename Dtype>
+ *   class MyAwesomeSolver : public Solver<Dtype> {
+ *     // your implementations
+ *   };
+ *
+ * and its type is its C++ class name, but without the "Solver" at the end
+ * ("MyAwesomeSolver" -> "MyAwesome").
+ *
+ * If the solver is going to be created simply by its constructor, in your c++
+ * file, add the following line:
+ *
+ *    REGISTER_SOLVER_CLASS(MyAwesome);
+ *
+ * Or, if the solver is going to be created by another creator function, in the
+ * format of:
+ *
+ *    template <typename Dtype>
+ *    Solver<Dtype*> GetMyAwesomeSolver(const SolverParameter& param) {
+ *      // your implementation
+ *    }
+ *
+ * then you can register the creator function instead, like
+ *
+ * REGISTER_SOLVER_CREATOR(MyAwesome, GetMyAwesomeSolver)
+ *
+ * Note that each solver type should only be registered once.
+ */
+
+#ifndef CAFFE_SOLVER_FACTORY_H_
+#define CAFFE_SOLVER_FACTORY_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "caffe/common.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+template <typename Dtype>
+class Solver;
+
+template <typename Dtype>
+class SolverRegistry {
+ public:
+  typedef Solver<Dtype>* (*Creator)(const SolverParameter&);
+  typedef std::map<string, Creator> CreatorRegistry;
+
+  static CreatorRegistry& Registry() {
+    static CreatorRegistry* g_registry_ = new CreatorRegistry();
+    return *g_registry_;
+  }
+
+  // Adds a creator.
+  static void AddCreator(const string& type, Creator creator) {
+    CreatorRegistry& registry = Registry();
+    CHECK_EQ(registry.count(type), 0)
+        << "Solver type " << type << " already registered.";
+    registry[type] = creator;
+  }
+
+  // Get a solver using a SolverParameter.
+  static Solver<Dtype>* CreateSolver(const SolverParameter& param) {
+    const string& type = param.type();
+    CreatorRegistry& registry = Registry();
+    CHECK_EQ(registry.count(type), 1) << "Unknown solver type: " << type
+        << " (known types: " << SolverTypeListString() << ")";
+    return registry[type](param);
+  }
+
+  static vector<string> SolverTypeList() {
+    CreatorRegistry& registry = Registry();
+    vector<string> solver_types;
+    for (typename CreatorRegistry::iterator iter = registry.begin();
+         iter != registry.end(); ++iter) {
+      solver_types.push_back(iter->first);
+    }
+    return solver_types;
+  }
+
+ private:
+  // Solver registry should never be instantiated - everything is done with its
+  // static variables.
+  SolverRegistry() {}
+
+  static string SolverTypeListString() {
+    vector<string> solver_types = SolverTypeList();
+    string solver_types_str;
+    for (vector<string>::iterator iter = solver_types.begin();
+         iter != solver_types.end(); ++iter) {
+      if (iter != solver_types.begin()) {
+        solver_types_str += ", ";
+      }
+      solver_types_str += *iter;
+    }
+    return solver_types_str;
+  }
+};
+
+
+template <typename Dtype>
+class SolverRegisterer {
+ public:
+  SolverRegisterer(const string& type,
+      Solver<Dtype>* (*creator)(const SolverParameter&)) {
+    // LOG(INFO) << "Registering solver type: " << type;
+    SolverRegistry<Dtype>::AddCreator(type, creator);
+  }
+};
+
+
+#define REGISTER_SOLVER_CREATOR(type, creator)                                 \
+  static SolverRegisterer<float> g_creator_f_##type(#type, creator<float>);    \
+  static SolverRegisterer<double> g_creator_d_##type(#type, creator<double>)   \
+
+#define REGISTER_SOLVER_CLASS(type)                                            \
+  template <typename Dtype>                                                    \
+  Solver<Dtype>* Creator_##type##Solver(                                       \
+      const SolverParameter& param)                                            \
+  {                                                                            \
+    return new type##Solver<Dtype>(param);                                     \
+  }                                                                            \
+  REGISTER_SOLVER_CREATOR(type, Creator_##type##Solver)
+
+}  // namespace caffe
+
+#endif  // CAFFE_SOLVER_FACTORY_H_
index 3d92a0e..38ee466 100644 (file)
@@ -4,7 +4,6 @@
 #include <cstdlib>
 
 #include "caffe/common.hpp"
-#include "caffe/util/math_functions.hpp"
 
 namespace caffe {
 
index 955e12c..d3de2e5 100644 (file)
@@ -4,8 +4,6 @@
 #include <queue>
 #include <string>
 
-#include "caffe/common.hpp"
-
 namespace caffe {
 
 template<typename T>
index 531fd29..d3eb6cc 100644 (file)
@@ -23,7 +23,7 @@ void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,
 
 template <typename Dtype>
 void col2im_cpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
+    const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h,
     const int stride_w, Dtype* data_im);
 
@@ -47,7 +47,7 @@ void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes,
 
 template <typename Dtype>
 void col2im_gpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
+    const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h,
     const int stride_w, Dtype* data_im);
 
index 6070b4c..d6cfa44 100644 (file)
@@ -6,7 +6,6 @@
 
 #include "google/protobuf/message.h"
 
-#include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/proto/caffe.pb.h"
 
index c1f21a0..c94bb3c 100644 (file)
@@ -10,6 +10,15 @@ namespace caffe {
 // Return true iff the net is not the current version.
 bool NetNeedsUpgrade(const NetParameter& net_param);
 
+// Check for deprecations and upgrade the NetParameter as needed.
+bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param);
+
+// Read parameters from a file into a NetParameter proto message.
+void ReadNetParamsFromTextFileOrDie(const string& param_file,
+                                    NetParameter* param);
+void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
+                                      NetParameter* param);
+
 // Return true iff any layer contains parameters specified using
 // deprecated V0LayerParameter.
 bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param);
@@ -50,14 +59,17 @@ bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param,
 
 const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type);
 
-// Check for deprecations and upgrade the NetParameter as needed.
-bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param);
+// Return true iff the solver contains any old solver_type specified as enums
+bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param);
 
-// Read parameters from a file into a NetParameter proto message.
-void ReadNetParamsFromTextFileOrDie(const string& param_file,
-                                    NetParameter* param);
-void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
-                                      NetParameter* param);
+bool UpgradeSolverType(SolverParameter* solver_param);
+
+// Check for deprecations and upgrade the SolverParameter as needed.
+bool UpgradeSolverAsNeeded(const string& param_file, SolverParameter* param);
+
+// Read parameters from a file into a SolverParameter proto message.
+void ReadSolverParamsFromTextFileOrDie(const string& param_file,
+                                       SolverParameter* param);
 
 }  // namespace caffe
 
index 06bc045..237b05d 100644 (file)
@@ -304,13 +304,24 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
   bool handles_setup_;
   cudnnHandle_t* handle_;
   cudaStream_t*  stream_;
+
+  // algorithms for forward and backwards convolutions
+  cudnnConvolutionFwdAlgo_t *fwd_algo_;
+  cudnnConvolutionBwdFilterAlgo_t *bwd_filter_algo_;
+  cudnnConvolutionBwdDataAlgo_t *bwd_data_algo_;
+
   vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
   cudnnTensorDescriptor_t    bias_desc_;
   cudnnFilterDescriptor_t      filter_desc_;
   vector<cudnnConvolutionDescriptor_t> conv_descs_;
   int bottom_offset_, top_offset_, bias_offset_;
-  size_t workspaceSizeInBytes;
-  void *workspace;
+
+  size_t *workspace_fwd_sizes_;
+  size_t *workspace_bwd_data_sizes_;
+  size_t *workspace_bwd_filter_sizes_;
+  size_t workspaceSizeInBytes;  // size of underlying storage
+  void *workspaceData;  // underlying storage
+  void **workspace;  // aliases into workspaceData
 };
 #endif
 
@@ -442,6 +453,65 @@ class LRNLayer : public Layer<Dtype> {
   vector<Blob<Dtype>*> product_bottom_vec_;
 };
 
+#ifdef USE_CUDNN
+
+template <typename Dtype>
+class CuDNNLRNLayer : public LRNLayer<Dtype> {
+ public:
+  explicit CuDNNLRNLayer(const LayerParameter& param)
+      : LRNLayer<Dtype>(param), handles_setup_(false) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual ~CuDNNLRNLayer();
+
+ protected:
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  bool handles_setup_;
+  cudnnHandle_t             handle_;
+  cudnnLRNDescriptor_t norm_desc_;
+  cudnnTensorDescriptor_t bottom_desc_, top_desc_;
+
+  int size_;
+  Dtype alpha_, beta_, k_;
+};
+
+template <typename Dtype>
+class CuDNNLCNLayer : public LRNLayer<Dtype> {
+ public:
+  explicit CuDNNLCNLayer(const LayerParameter& param)
+      : LRNLayer<Dtype>(param), handles_setup_(false), tempDataSize(0),
+        tempData1(NULL), tempData2(NULL) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual ~CuDNNLCNLayer();
+
+ protected:
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  bool handles_setup_;
+  cudnnHandle_t             handle_;
+  cudnnLRNDescriptor_t norm_desc_;
+  cudnnTensorDescriptor_t bottom_desc_, top_desc_;
+
+  int size_, pre_pad_;
+  Dtype alpha_, beta_, k_;
+
+  size_t tempDataSize;
+  void *tempData1, *tempData2;
+};
+
+#endif
 
 /**
  * @brief Pools the input image by taking the max, average, etc. within regions.
diff --git a/matlab/+caffe/+test/test_io.m b/matlab/+caffe/+test/test_io.m
new file mode 100644 (file)
index 0000000..2c34bd1
--- /dev/null
@@ -0,0 +1,18 @@
+classdef test_io < matlab.unittest.TestCase
+  methods (Test)
+    function test_read_write_mean(self)
+      % randomly generate mean data
+      width = 200;
+      height = 300;
+      channels = 3;
+      mean_data_write = 255 * rand(width, height, channels, 'single');
+      % write mean data to binary proto
+      mean_proto_file = tempname();
+      caffe.io.write_mean(mean_data_write, mean_proto_file);
+      % read mean data from saved binary proto and test whether they are equal
+      mean_data_read = caffe.io.read_mean(mean_proto_file);
+      self.verifyEqual(mean_data_write, mean_data_read)
+      delete(mean_proto_file);
+    end
+  end
+end
index 7883f79..1641e14 100644 (file)
@@ -188,7 +188,10 @@ static void get_solver(MEX_ARGS) {
       "Usage: caffe_('get_solver', solver_file)");
   char* solver_file = mxArrayToString(prhs[0]);
   mxCHECK_FILE_EXIST(solver_file);
-  shared_ptr<Solver<float> > solver(new caffe::SGDSolver<float>(solver_file));
+  SolverParameter solver_param;
+  ReadSolverParamsFromTextFileOrDie(solver_file, &solver_param);
+  shared_ptr<Solver<float> > solver(
+      SolverRegistry<float>::CreateSolver(solver_param));
   solvers_.push_back(solver);
   plhs[0] = ptr_to_handle<Solver<float> >(solver.get());
   mxFree(solver_file);
index 9389685..6dbf6b2 100644 (file)
@@ -11,7 +11,8 @@ caffe.reset_all();
 % put all test cases here
 results = [...
   run(caffe.test.test_net) ...
-  run(caffe.test.test_solver) ];
+  run(caffe.test.test_solver) ...
+  run(caffe.test.test_io) ];
 
 % reset caffe after testing
 caffe.reset_all();
index 0a0016d..4e8c81d 100644 (file)
@@ -39,8 +39,8 @@ function [curr_dat_sz, curr_lab_sz] = store2hdf5(filename, data, labels, create,
       info=h5info(filename);
       prev_dat_sz=info.Datasets(1).Dataspace.Size;
       prev_lab_sz=info.Datasets(2).Dataspace.Size;
-      assert(prev_dat_sz(1:end-1)==dat_dims(1:end-1), 'Data dimensions must match existing dimensions in dataset');
-      assert(prev_lab_sz(1:end-1)==lab_dims(1:end-1), 'Label dimensions must match existing dimensions in dataset');
+      assert(all(prev_dat_sz(1:end-1)==dat_dims(1:end-1)), 'Data dimensions must match existing dimensions in dataset');
+      assert(all(prev_lab_sz(1:end-1)==lab_dims(1:end-1)), 'Label dimensions must match existing dimensions in dataset');
       startloc.dat=[ones(1,length(dat_dims)-1), prev_dat_sz(end)+1];
       startloc.lab=[ones(1,length(lab_dims)-1), prev_lab_sz(end)+1];
     end
index c79472e..e3e4279 100644 (file)
@@ -45,7 +45,7 @@ layer {
 #    mean_value: 104
 #    mean_value: 117
 #    mean_value: 123
-#    mirror: true
+#    mirror: false
 #  }
   data_param {
     source: "examples/imagenet/ilsvrc12_val_lmdb"
index ccd5776..8687dd8 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "caffe/caffe.hpp"
 #include "caffe/python_layer.hpp"
+#include "caffe/sgd_solvers.hpp"
 
 // Temporary solution for numpy < 1.7 versions: old macro, no promises.
 // You're strongly advised to upgrade to >= 1.7.
@@ -133,8 +134,8 @@ void Net_SetInputArrays(Net<Dtype>* net, bp::object data_obj,
 
 Solver<Dtype>* GetSolverFromFile(const string& filename) {
   SolverParameter param;
-  ReadProtoFromTextFileOrDie(filename, &param);
-  return GetSolver<Dtype>(param);
+  ReadSolverParamsFromTextFileOrDie(filename, &param);
+  return SolverRegistry<Dtype>::CreateSolver(param);
 }
 
 struct NdarrayConverterGenerator {
index a002b60..f8bf572 100644 (file)
@@ -82,11 +82,11 @@ def get_layer_label(layer, rankdir):
                       separator,
                       layer.type,
                       separator,
-                      layer.convolution_param.kernel_size,
+                      layer.convolution_param.kernel_size[0] if len(layer.convolution_param.kernel_size._values) else 1,
                       separator,
-                      layer.convolution_param.stride,
+                      layer.convolution_param.stride[0] if len(layer.convolution_param.stride._values) else 1,
                       separator,
-                      layer.convolution_param.pad)
+                      layer.convolution_param.pad[0] if len(layer.convolution_param.pad._values) else 0)
     elif layer.type == 'Pooling':
         pooling_types_dict = get_pooling_types_dict()
         node_label = '"%s%s(%s %s)%skernel size: %d%sstride: %d%spad: %d"' %\
index 40b7ac1..11c8426 100644 (file)
@@ -20,11 +20,18 @@ def blobproto_to_array(blob, return_diff=False):
     Convert a blob proto to an array. In default, we will just return the data,
     unless return_diff is True, in which case we will return the diff.
     """
+    # Read the data into an array
     if return_diff:
-        return np.array(blob.diff).reshape(*blob.shape.dim)
+        data = np.array(blob.diff)
     else:
-        return np.array(blob.data).reshape(*blob.shape.dim)
+        data = np.array(blob.data)
 
+    # Reshape the array
+    if blob.HasField('num') or blob.HasField('channels') or blob.HasField('height') or blob.HasField('width'):
+        # Use legacy 4D shape
+        return data.reshape(blob.num, blob.channels, blob.height, blob.width)
+    else:
+        return data.reshape(blob.shape.dim)
 
 def array_to_blobproto(arr, diff=None):
     """Converts a N-dimensional array to blob proto. If diff is given, also
index 8ea24da..7bd4f41 100644 (file)
@@ -146,8 +146,6 @@ def _Net_backward(self, diffs=None, start=None, end=None, **kwargs):
         # Set top diffs according to defined shapes and make arrays single and
         # C-contiguous as Caffe expects.
         for top, diff in kwargs.iteritems():
-            if diff.ndim != 4:
-                raise Exception('{} diff is not 4-d'.format(top))
             if diff.shape[0] != self.blobs[top].num:
                 raise Exception('Diff is not batch sized')
             self.blobs[top].diff[...] = diff
diff --git a/python/caffe/test/test_io.py b/python/caffe/test/test_io.py
new file mode 100644 (file)
index 0000000..8c86ef7
--- /dev/null
@@ -0,0 +1,41 @@
+import numpy as np
+import unittest
+
+import caffe
+
+class TestBlobProtoToArray(unittest.TestCase):
+
+    def test_old_format(self):
+        data = np.zeros((10,10))
+        blob = caffe.proto.caffe_pb2.BlobProto()
+        blob.data.extend(list(data.flatten()))
+        shape = (1,1,10,10)
+        blob.num, blob.channels, blob.height, blob.width = shape
+
+        arr = caffe.io.blobproto_to_array(blob)
+        self.assertEqual(arr.shape, shape)
+
+    def test_new_format(self):
+        data = np.zeros((10,10))
+        blob = caffe.proto.caffe_pb2.BlobProto()
+        blob.data.extend(list(data.flatten()))
+        blob.shape.dim.extend(list(data.shape))
+
+        arr = caffe.io.blobproto_to_array(blob)
+        self.assertEqual(arr.shape, data.shape)
+
+    def test_no_shape(self):
+        data = np.zeros((10,10))
+        blob = caffe.proto.caffe_pb2.BlobProto()
+        blob.data.extend(list(data.flatten()))
+
+        with self.assertRaises(ValueError):
+            caffe.io.blobproto_to_array(blob)
+
+    def test_scalar(self):
+        data = np.ones((1)) * 123
+        blob = caffe.proto.caffe_pb2.BlobProto()
+        blob.data.extend(list(data.flatten()))
+
+        arr = caffe.io.blobproto_to_array(blob)
+        self.assertEqual(arr, 123)
index 926c7d8..417ffe9 100644 (file)
@@ -54,10 +54,8 @@ shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
     return shared_ptr<Layer<Dtype> >(new PoolingLayer<Dtype>(param));
 #ifdef USE_CUDNN
   } else if (engine == PoolingParameter_Engine_CUDNN) {
-    PoolingParameter p_param = param.pooling_param();
-    if (p_param.pad() || p_param.pad_h() || p_param.pad_w() ||
-        param.top_size() > 1) {
-      LOG(INFO) << "CUDNN does not support padding or multiple tops. "
+    if (param.top_size() > 1) {
+      LOG(INFO) << "cuDNN does not support multiple tops. "
                 << "Using Caffe's own pooling layer.";
       return shared_ptr<Layer<Dtype> >(new PoolingLayer<Dtype>(param));
     }
@@ -70,6 +68,43 @@ shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
 
 REGISTER_LAYER_CREATOR(Pooling, GetPoolingLayer);
 
+// Get LRN layer according to engine
+template <typename Dtype>
+shared_ptr<Layer<Dtype> > GetLRNLayer(const LayerParameter& param) {
+  LRNParameter_Engine engine = param.lrn_param().engine();
+
+  if (engine == LRNParameter_Engine_DEFAULT) {
+#ifdef USE_CUDNN
+    engine = LRNParameter_Engine_CUDNN;
+#else
+    engine = LRNParameter_Engine_CAFFE;
+#endif
+  }
+
+  if (engine == LRNParameter_Engine_CAFFE) {
+    return shared_ptr<Layer<Dtype> >(new LRNLayer<Dtype>(param));
+#ifdef USE_CUDNN
+  } else if (engine == LRNParameter_Engine_CUDNN) {
+    LRNParameter lrn_param = param.lrn_param();
+
+    if (lrn_param.norm_region() ==LRNParameter_NormRegion_WITHIN_CHANNEL) {
+      return shared_ptr<Layer<Dtype> >(new CuDNNLCNLayer<Dtype>(param));
+    } else {
+      // local size is too big to be handled through cuDNN
+      if (param.lrn_param().local_size() > CUDNN_LRN_MAX_N) {
+        return shared_ptr<Layer<Dtype> >(new LRNLayer<Dtype>(param));
+      } else {
+        return shared_ptr<Layer<Dtype> >(new CuDNNLRNLayer<Dtype>(param));
+      }
+    }
+#endif
+  } else {
+    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+  }
+}
+
+REGISTER_LAYER_CREATOR(LRN, GetLRNLayer);
+
 // Get relu layer according to engine.
 template <typename Dtype>
 shared_ptr<Layer<Dtype> > GetReLULayer(const LayerParameter& param) {
index 5ce28c9..7e55235 100644 (file)
@@ -1,6 +1,5 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
 
index bb310e1..b5a6c25 100644 (file)
@@ -1,8 +1,7 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index e2d8d9f..ae2df1f 100644 (file)
@@ -1,12 +1,9 @@
-#include <algorithm>
 #include <functional>
 #include <utility>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/util/io.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 0c0a932..44df8d4 100644 (file)
@@ -3,8 +3,7 @@
 #include <utility>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/common_layers.hpp"
 
 namespace caffe {
 
index c6b4755..316cb0f 100644 (file)
@@ -2,7 +2,6 @@
 #include <vector>
 
 #include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/im2col.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
index b90bd4e..d77f91c 100644 (file)
@@ -1,10 +1,7 @@
 #include <boost/thread.hpp>
-#include <string>
 #include <vector>
 
 #include "caffe/data_layers.hpp"
-#include "caffe/net.hpp"
-#include "caffe/util/io.hpp"
 
 namespace caffe {
 
diff --git a/src/caffe/layers/batch_reindex_layer.cpp b/src/caffe/layers/batch_reindex_layer.cpp
new file mode 100644 (file)
index 0000000..3d3ce32
--- /dev/null
@@ -0,0 +1,78 @@
+#include <vector>
+
+#include "caffe/common_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template<typename Dtype>
+void BatchReindexLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+                                       const vector<Blob<Dtype>*>& top) {
+  CHECK_EQ(1, bottom[1]->num_axes());
+  vector<int> newshape;
+  newshape.push_back(bottom[1]->shape(0));
+  for (int i = 1; i < bottom[0]->shape().size(); ++i) {
+    newshape.push_back(bottom[0]->shape()[i]);
+  }
+  top[0]->Reshape(newshape);
+}
+
+template<typename Dtype>
+void BatchReindexLayer<Dtype>::check_batch_reindex(int initial_num,
+                                                   int final_num,
+                                                   const Dtype* ridx_data) {
+  for (int i = 0; i < final_num; ++i) {
+    CHECK_GE(ridx_data[i], 0)
+        << "Index specified for reindex layer was negative.";
+    CHECK_LT(ridx_data[i], initial_num)
+        << "Index specified for reindex layer was greater than batch size.";
+  }
+}
+
+template<typename Dtype>
+void BatchReindexLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+                                           const vector<Blob<Dtype>*>& top) {
+  check_batch_reindex(bottom[0]->shape(0), bottom[1]->count(),
+                      bottom[1]->cpu_data());
+  if (top[0]->count() == 0) {
+    return;
+  }
+  int inner_dim = bottom[0]->count() / bottom[0]->shape(0);
+  const Dtype* in = bottom[0]->cpu_data();
+  const Dtype* permut = bottom[1]->cpu_data();
+  Dtype* out = top[0]->mutable_cpu_data();
+  for (int index = 0; index < top[0]->count(); ++index) {
+    int n = index / (inner_dim);
+    int in_n = static_cast<int>(permut[n]);
+    out[index] = in[in_n * (inner_dim) + index % (inner_dim)];
+  }
+}
+
+template<typename Dtype>
+void BatchReindexLayer<Dtype>::Backward_cpu(
+    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  CHECK(!propagate_down[1]) << "Cannot backprop to index.";
+  if (!propagate_down[0]) {
+    return;
+  }
+  int inner_dim = bottom[0]->count() / bottom[0]->shape(0);
+  Dtype* bot_diff = bottom[0]->mutable_cpu_diff();
+  const Dtype* permut = bottom[1]->cpu_data();
+  const Dtype* top_diff = top[0]->cpu_diff();
+  caffe_set(bottom[0]->count(), Dtype(0), bot_diff);
+  for (int index = 0; index < top[0]->count(); ++index) {
+    int n = index / (inner_dim);
+    int in_n = static_cast<int>(permut[n]);
+    bot_diff[in_n * (inner_dim) + index % (inner_dim)] += top_diff[index];
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(BatchReindexLayer);
+#endif
+
+INSTANTIATE_CLASS(BatchReindexLayer);
+REGISTER_LAYER_CLASS(BatchReindex);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/batch_reindex_layer.cu b/src/caffe/layers/batch_reindex_layer.cu
new file mode 100644 (file)
index 0000000..0b5ccf0
--- /dev/null
@@ -0,0 +1,106 @@
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "caffe/common_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template<typename Dtype>
+__global__ void BRForward(const int count, const int inner_dim, const Dtype* in,
+                          const Dtype* permut, Dtype* out) {
+  CUDA_KERNEL_LOOP(index, count) {
+    int n = index / (inner_dim);
+    int in_n = static_cast<int>(permut[n]);
+    out[index] = in[in_n * (inner_dim) + index % (inner_dim)];
+  }
+}
+
+template<typename Dtype>
+void BatchReindexLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+                                           const vector<Blob<Dtype>*>& top) {
+  check_batch_reindex(bottom[0]->shape(0), bottom[1]->count(),
+                      bottom[1]->cpu_data());
+  if (top[0]->count() == 0) {
+    return;
+  }
+  int threads = top[0]->count();
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  BRForward<Dtype> <<<CAFFE_GET_BLOCKS(threads), CAFFE_CUDA_NUM_THREADS>>>(
+      top[0]->count(), bottom[0]->count() / bottom[0]->shape(0),
+      bottom[0]->gpu_data(), bottom[1]->gpu_data(), top[0]->mutable_gpu_data());
+  CUDA_POST_KERNEL_CHECK;
+}
+
+template<typename Dtype>
+__global__ void BRBackward(const int count, const int inner_dim,
+                           const Dtype* in, const Dtype* top_indexes,
+                           const Dtype* begins, const Dtype* counts,
+                           Dtype* out) {
+  CUDA_KERNEL_LOOP(index, count) {
+    int n = index / (inner_dim);
+    out[index] = 0;
+    int lower = static_cast<int>(begins[n]);
+    int upper = lower + static_cast<int>(counts[n]);
+    for (int i = lower; i < upper; ++i) {
+      int in_n = static_cast<int>(top_indexes[i]);
+      out[index] += in[in_n * (inner_dim) + index % (inner_dim)];
+    }
+  }
+}
+
+template<typename Dtype>
+void BatchReindexLayer<Dtype>::Backward_gpu(
+    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  CHECK(!propagate_down[1]) << "Cannot backprop to index.";
+  if (!propagate_down[0]) {
+    return;
+  }
+
+  vector<std::pair<int, int> > mapping;
+  const Dtype* perm = bottom[1]->cpu_data();
+  for (int i = 0; i < bottom[1]->count(); ++i) {
+    mapping.push_back(pair<int, int>(static_cast<int>(perm[i]), i));
+  }
+  std::sort(mapping.begin(), mapping.end(), pair_sort_first());
+
+  // Each element of the bottom diff is potentially the sum of many top diffs.
+  // However, we'd like each CUDA thread to handle exactly one output.  Hence,
+  // we first pre-compute a list of lists of indices that need to be summed for
+  // each output. `top_indexes` holds the data of this list of lists.  The
+  // k'th element of `begins` points to the location in `top_indexes` where the
+  // list for the k'th example begin, and the k'th element of `counts` is the
+  // length of that list.
+  vector<int> shape;
+  shape.push_back(bottom[1]->count());
+  Blob<Dtype> top_indexes(shape);
+  shape[0] = bottom[0]->shape(0);
+  Blob<Dtype> counts(shape);
+  Blob<Dtype> begins(shape);
+  Dtype* t_i_data = top_indexes.mutable_cpu_data();
+  Dtype* c_data = counts.mutable_cpu_data();
+  Dtype* b_data = begins.mutable_cpu_data();
+  caffe_set(begins.count(), Dtype(-1), b_data);
+  caffe_set(counts.count(), Dtype(0), c_data);
+  for (int i = 0; i < mapping.size(); ++i) {
+    t_i_data[i] = mapping[i].second;
+    if (b_data[mapping[i].first] == -1) {
+      b_data[mapping[i].first] = i;
+    }
+    c_data[mapping[i].first] += 1;
+  }
+
+  int threads = bottom[0]->count();
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  BRBackward<Dtype> <<<CAFFE_GET_BLOCKS(threads), CAFFE_CUDA_NUM_THREADS>>>(
+      bottom[0]->count(), bottom[0]->count() / bottom[0]->shape(0),
+      top[0]->gpu_diff(), top_indexes.gpu_data(), begins.gpu_data(),
+      counts.gpu_data(), bottom[0]->mutable_gpu_diff());
+  CUDA_POST_KERNEL_CHECK;
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(BatchReindexLayer);
+
+}  // namespace caffe
index 9ba0ea9..1e422a5 100644 (file)
@@ -1,8 +1,7 @@
 #include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index d963d06..3e328ef 100644 (file)
@@ -1,8 +1,7 @@
 #include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index 86b500d..14cbfb1 100644 (file)
@@ -1,8 +1,7 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 617701e..e1e9449 100644 (file)
@@ -1,8 +1,7 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 25e1678..7400208 100644 (file)
@@ -1,9 +1,7 @@
 #include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/loss_layers.hpp"
-#include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
index 9312393..ee27840 100644 (file)
@@ -1,10 +1,8 @@
 #include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/util/io.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index fb50bb0..efd69d4 100644 (file)
@@ -1,9 +1,5 @@
 #include <vector>
 
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index b429d2b..a534b35 100644 (file)
@@ -1,9 +1,5 @@
 #include <vector>
 
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index 3514fe2..8b61249 100644 (file)
@@ -1,10 +1,7 @@
 #ifdef USE_CUDNN
+#include <algorithm>
 #include <vector>
 
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
@@ -24,13 +21,38 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
   // Initialize CUDA streams and cuDNN.
   stream_         = new cudaStream_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
   handle_         = new cudnnHandle_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
+
+  // Initialize algorithm arrays
+  fwd_algo_       = new cudnnConvolutionFwdAlgo_t[bottom.size()];
+  bwd_filter_algo_= new cudnnConvolutionBwdFilterAlgo_t[bottom.size()];
+  bwd_data_algo_  = new cudnnConvolutionBwdDataAlgo_t[bottom.size()];
+
+  // initialize size arrays
+  workspace_fwd_sizes_ = new size_t[bottom.size()];
+  workspace_bwd_filter_sizes_ = new size_t[bottom.size()];
+  workspace_bwd_data_sizes_ = new size_t[bottom.size()];
+
+  // workspace data
   workspaceSizeInBytes = 0;
-  workspace = NULL;
+  workspaceData = NULL;
+  workspace = new void*[this->group_ * CUDNN_STREAMS_PER_GROUP];
+
+  for (size_t i = 0; i < bottom.size(); ++i) {
+    // initialize all to default algorithms
+    fwd_algo_[i] = (cudnnConvolutionFwdAlgo_t)0;
+    bwd_filter_algo_[i] = (cudnnConvolutionBwdFilterAlgo_t)0;
+    bwd_data_algo_[i] = (cudnnConvolutionBwdDataAlgo_t)0;
+    // default algorithms don't require workspace
+    workspace_fwd_sizes_[i] = 0;
+    workspace_bwd_data_sizes_[i] = 0;
+    workspace_bwd_filter_sizes_[i] = 0;
+  }
 
   for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) {
     CUDA_CHECK(cudaStreamCreate(&stream_[g]));
     CUDNN_CHECK(cudnnCreate(&handle_[g]));
     CUDNN_CHECK(cudnnSetStream(handle_[g], stream_[g]));
+    workspace[g] = NULL;
   }
 
   // Set the indexing parameters.
@@ -86,6 +108,10 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
   const int stride_h = stride_data[0];
   const int stride_w = stride_data[1];
 
+  // Specify workspace limit for kernels directly until we have a
+  // planning strategy and a rewrite of Caffe's GPU memory mangagement
+  size_t workspace_limit_bytes = 8*1024*1024;
+
   for (int i = 0; i < bottom.size(); i++) {
     cudnn::setTensor4dDesc<Dtype>(&bottom_descs_[i],
         this->num_,
@@ -98,7 +124,104 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
         this->num_output_ * this->out_spatial_dim_,
         this->out_spatial_dim_, width_out, 1);
     cudnn::setConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i],
-        filter_desc_, pad_h, pad_w, stride_h, stride_w);
+        filter_desc_, pad_h, pad_w,
+        stride_h, stride_w);
+
+    // choose forward and backward algorithms + workspace(s)
+    CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[0],
+      bottom_descs_[i],
+      filter_desc_,
+      conv_descs_[i],
+      top_descs_[i],
+      CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
+      workspace_limit_bytes,
+      &fwd_algo_[i]));
+
+    CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[0],
+      bottom_descs_[i],
+      filter_desc_,
+      conv_descs_[i],
+      top_descs_[i],
+      fwd_algo_[i],
+      &(workspace_fwd_sizes_[i])));
+
+    // choose backward algorithm for filter
+    CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(handle_[0],
+          bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_,
+          CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
+          workspace_limit_bytes, &bwd_filter_algo_[i]) );
+
+    // get workspace for backwards filter algorithm
+    CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(handle_[0],
+          bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_,
+          bwd_filter_algo_[i], &workspace_bwd_filter_sizes_[i]));
+
+    // choose backward algo for data
+    CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(handle_[0],
+          filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i],
+          CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
+        workspace_limit_bytes, &bwd_data_algo_[i]));
+
+    // get workspace size
+    CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(handle_[0],
+          filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i],
+          bwd_data_algo_[i], &workspace_bwd_data_sizes_[i]) );
+  }
+
+  // reduce over all workspace sizes to get a maximum to allocate / reallocate
+  size_t total_workspace_fwd = 0;
+  size_t total_workspace_bwd_data = 0;
+  size_t total_workspace_bwd_filter = 0;
+
+  for (size_t i = 0; i < bottom.size(); i++) {
+    total_workspace_fwd        = std::max(total_workspace_fwd,
+                                     workspace_fwd_sizes_[i]);
+    total_workspace_bwd_data   = std::max(total_workspace_bwd_data,
+                                     workspace_bwd_data_sizes_[i]);
+    total_workspace_bwd_filter = std::max(total_workspace_bwd_filter,
+                                     workspace_bwd_filter_sizes_[i]);
+  }
+  // get max over all operations
+  size_t max_workspace = std::max(total_workspace_fwd,
+                             total_workspace_bwd_data);
+  max_workspace = std::max(max_workspace, total_workspace_bwd_filter);
+  // ensure all groups have enough workspace
+  size_t total_max_workspace = max_workspace *
+                               (this->group_ * CUDNN_STREAMS_PER_GROUP);
+
+  // this is the total amount of storage needed over all groups + streams
+  if (total_max_workspace > workspaceSizeInBytes) {
+    LOG(INFO) << "Reallocating workspace storage: " << total_max_workspace;
+    workspaceSizeInBytes = total_max_workspace;
+
+    // free the existing workspace and allocate a new (larger) one
+    cudaFree(this->workspaceData);
+
+    cudaError_t err = cudaMalloc(&(this->workspaceData), workspaceSizeInBytes);
+    if (err != cudaSuccess) {
+      // force zero memory path
+      for (int i = 0; i < bottom.size(); i++) {
+        workspace_fwd_sizes_[i] = 0;
+        workspace_bwd_filter_sizes_[i] = 0;
+        workspace_bwd_data_sizes_[i] = 0;
+        fwd_algo_[i] = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
+        bwd_filter_algo_[i] = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
+        bwd_data_algo_[i] = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
+      }
+
+      // NULL out all workspace pointers
+      for (int g = 0; g < (this->group_ * CUDNN_STREAMS_PER_GROUP); g++) {
+        workspace[g] = NULL;
+      }
+      // NULL out underlying data
+      workspaceData = NULL;
+      workspaceSizeInBytes = 0;
+    }
+
+    // if we succeed in the allocation, set pointer aliases for workspaces
+    for (int g = 0; g < (this->group_ * CUDNN_STREAMS_PER_GROUP); g++) {
+      workspace[g] = reinterpret_cast<char *>(workspaceData) + g*max_workspace;
+    }
   }
 
   // Tensor descriptor for bias.
@@ -128,8 +251,15 @@ CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
     cudnnDestroy(handle_[g]);
   }
 
+  cudaFree(workspaceData);
   delete [] stream_;
   delete [] handle_;
+  delete [] fwd_algo_;
+  delete [] bwd_filter_algo_;
+  delete [] bwd_data_algo_;
+  delete [] workspace_fwd_sizes_;
+  delete [] workspace_bwd_data_sizes_;
+  delete [] workspace_bwd_filter_sizes_;
 }
 
 INSTANTIATE_CLASS(CuDNNConvolutionLayer);
index 6911520..63b6ab9 100644 (file)
@@ -1,10 +1,6 @@
 #ifdef USE_CUDNN
 #include <vector>
 
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
@@ -14,11 +10,6 @@ __global__ void sync_conv_groups() { }
 template <typename Dtype>
 void CuDNNConvolutionLayer<Dtype>::Forward_gpu(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  const int* kernel_shape_data = this->kernel_shape_.cpu_data();
-  const int kernel_h = kernel_shape_data[0];
-  const int kernel_w = kernel_shape_data[1];
-  const size_t workspace_limit_bytes =
-      kernel_h * kernel_w * this->channels_ * sizeof(int) + 1;
   const Dtype* weight = this->blobs_[0]->gpu_data();
   for (int i = 0; i < bottom.size(); ++i) {
     const Dtype* bottom_data = bottom[i]->gpu_data();
@@ -26,52 +17,13 @@ void CuDNNConvolutionLayer<Dtype>::Forward_gpu(
 
     // Forward through cuDNN in parallel over groups.
     for (int g = 0; g < this->group_; g++) {
-      cudnnConvolutionFwdAlgo_t algo;
-
-      // pick the convolution algorithm
-      // TODO(shelhamer) this should be done during reshape
-      // TODO(shelhamer) the choice of automatic or manual algorithm picking
-      // should be exposed in proto
-      CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[g],
-        bottom_descs_[i],
-        filter_desc_,
-        conv_descs_[i],
-        top_descs_[i],
-        CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
-        workspace_limit_bytes,  // memoryLimitInBytes,
-        &algo));
-
-      // get minimum size of the workspace needed for the desired algorithm
-      size_t workspaceSizeInBytes_temp = 0;
-
-      CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[g],
-        bottom_descs_[i],
-        filter_desc_,
-        conv_descs_[i],
-        top_descs_[i],
-        algo,
-        &workspaceSizeInBytes_temp));
-
-      if (workspaceSizeInBytes_temp > workspaceSizeInBytes) {
-        workspaceSizeInBytes = workspaceSizeInBytes_temp;
-        // free the existing workspace and allocate a new (larger) one
-        cudaFree(this->workspace);
-        cudaError_t err = cudaMalloc(&(this->workspace), workspaceSizeInBytes);
-        if (err != cudaSuccess) {
-          // force zero memory path
-          algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-          workspace = NULL;
-          workspaceSizeInBytes = 0;
-        }
-      }
-
       // Filters.
       CUDNN_CHECK(cudnnConvolutionForward(handle_[g],
             cudnn::dataType<Dtype>::one,
             bottom_descs_[i], bottom_data + bottom_offset_ * g,
             filter_desc_, weight + this->weight_offset_ * g,
             conv_descs_[i],
-            algo, workspace, workspaceSizeInBytes,
+            fwd_algo_[i], workspace[g], workspace_fwd_sizes_[i],
             cudnn::dataType<Dtype>::zero,
             top_descs_[i], top_data + top_offset_ * g));
 
@@ -101,10 +53,12 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   if (this->param_propagate_down_[0]) {
     weight = this->blobs_[0]->gpu_data();
     weight_diff = this->blobs_[0]->mutable_gpu_diff();
+    caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
   }
   Dtype* bias_diff = NULL;
   if (this->bias_term_ && this->param_propagate_down_[1]) {
     bias_diff = this->blobs_[1]->mutable_gpu_diff();
+    caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
   }
   for (int i = 0; i < top.size(); ++i) {
     const Dtype* top_diff = top[i]->gpu_diff();
@@ -122,11 +76,14 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       // Gradient w.r.t. weights.
       if (this->param_propagate_down_[0]) {
         const Dtype* bottom_data = bottom[i]->gpu_data();
-        CUDNN_CHECK(cudnnConvolutionBackwardFilter(handle_[1*this->group_ + g],
+        CUDNN_CHECK(cudnnConvolutionBackwardFilter_v3(
+              handle_[1*this->group_ + g],
               cudnn::dataType<Dtype>::one,
               bottom_descs_[i], bottom_data + bottom_offset_ * g,
               top_descs_[i],    top_diff + top_offset_ * g,
               conv_descs_[i],
+              bwd_filter_algo_[i], workspace[1*this->group_ + g],
+              workspace_bwd_filter_sizes_[i],
               cudnn::dataType<Dtype>::one,
               filter_desc_, weight_diff + this->weight_offset_ * g));
       }
@@ -137,11 +94,14 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
           weight = this->blobs_[0]->gpu_data();
         }
         Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
-        CUDNN_CHECK(cudnnConvolutionBackwardData(handle_[2*this->group_ + g],
+        CUDNN_CHECK(cudnnConvolutionBackwardData_v3(
+              handle_[2*this->group_ + g],
               cudnn::dataType<Dtype>::one,
               filter_desc_, weight + this->weight_offset_ * g,
               top_descs_[i], top_diff + top_offset_ * g,
               conv_descs_[i],
+              bwd_data_algo_[i], workspace[2*this->group_ + g],
+              workspace_bwd_data_sizes_[i],
               cudnn::dataType<Dtype>::zero,
               bottom_descs_[i], bottom_diff + bottom_offset_ * g));
       }
diff --git a/src/caffe/layers/cudnn_lcn_layer.cpp b/src/caffe/layers/cudnn_lcn_layer.cpp
new file mode 100644 (file)
index 0000000..4c70078
--- /dev/null
@@ -0,0 +1,73 @@
+#ifdef USE_CUDNN
+#include <vector>
+
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CuDNNLCNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  LRNLayer<Dtype>::LayerSetUp(bottom, top);
+
+  CUDNN_CHECK(cudnnCreate(&handle_));
+  CUDNN_CHECK(cudnnCreateLRNDescriptor(&norm_desc_));
+  cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
+  cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+
+  // create a LRN handle
+  handles_setup_ = true;
+
+  size_ = this->layer_param().lrn_param().local_size();
+  pre_pad_ = (size_ - 1) / 2;
+  alpha_ = this->layer_param().lrn_param().alpha();
+  beta_ = this->layer_param().lrn_param().beta();
+  k_ = this->layer_param().lrn_param().k();
+}
+
+template <typename Dtype>
+void CuDNNLCNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  LRNLayer<Dtype>::Reshape(bottom, top);
+  cudnn::setTensor4dDesc<Dtype>(&bottom_desc_, bottom[0]->num(),
+      this->channels_, this->height_, this->width_);
+  cudnn::setTensor4dDesc<Dtype>(&top_desc_, bottom[0]->num(),
+      this->channels_, this->height_, this->width_);
+  CUDNN_CHECK(cudnnSetLRNDescriptor(norm_desc_, size_, alpha_, beta_, k_));
+
+  // allocate / reallocate tempData buffers
+  size_t totalSizeInBytes = sizeof(Dtype)*bottom[0]->num()* \
+                            this->channels_*this->height_*this->width_;
+
+  if (totalSizeInBytes > tempDataSize) {
+    tempDataSize = totalSizeInBytes;
+
+    cudaFree(tempData1);
+    cudaFree(tempData2);
+
+    // allocate new buffers
+    CUDA_CHECK(cudaMalloc(&tempData1, totalSizeInBytes));
+    CUDA_CHECK(cudaMalloc(&tempData2, totalSizeInBytes));
+  }
+}
+
+template <typename Dtype>
+CuDNNLCNLayer<Dtype>::~CuDNNLCNLayer() {
+  // Check that handles have been setup before destroying.
+  if (!handles_setup_) { return; }
+
+  cudnnDestroyTensorDescriptor(bottom_desc_);
+  cudnnDestroyTensorDescriptor(top_desc_);
+
+  // destroy LRN handle
+  cudnnDestroy(handle_);
+
+  // free temp buffers
+  cudaFree(tempData1);
+  cudaFree(tempData2);
+}
+
+INSTANTIATE_CLASS(CuDNNLCNLayer);
+
+}   // namespace caffe
+#endif
diff --git a/src/caffe/layers/cudnn_lcn_layer.cu b/src/caffe/layers/cudnn_lcn_layer.cu
new file mode 100644 (file)
index 0000000..e79c745
--- /dev/null
@@ -0,0 +1,46 @@
+#ifdef USE_CUDNN
+#include <vector>
+
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CuDNNLCNLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  Dtype* top_data = top[0]->mutable_gpu_data();
+
+  CUDNN_CHECK(cudnnDivisiveNormalizationForward(
+        handle_, norm_desc_, CUDNN_DIVNORM_PRECOMPUTED_MEANS,
+        cudnn::dataType<Dtype>::one,
+        bottom_desc_, bottom_data,
+        NULL,  // srcMeansData
+        this->tempData1, this->tempData2,
+        cudnn::dataType<Dtype>::zero,
+        top_desc_, top_data) );
+}
+
+template <typename Dtype>
+void CuDNNLCNLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  const Dtype* top_diff = top[0]->gpu_diff();
+  const Dtype* top_data = top[0]->gpu_data();
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+
+  CUDNN_CHECK(cudnnDivisiveNormalizationBackward(
+        handle_, norm_desc_, CUDNN_DIVNORM_PRECOMPUTED_MEANS,
+        cudnn::dataType<Dtype>::one,
+        bottom_desc_, bottom_data,
+        NULL, top_diff,  // NULL - srcMeansData
+        this->tempData1, this->tempData2,
+        cudnn::dataType<Dtype>::zero,
+        bottom_desc_, bottom_diff,
+        NULL) );
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(CuDNNLCNLayer);
+
+}  // namespace caffe
+#endif
diff --git a/src/caffe/layers/cudnn_lrn_layer.cpp b/src/caffe/layers/cudnn_lrn_layer.cpp
new file mode 100644 (file)
index 0000000..a03db3b
--- /dev/null
@@ -0,0 +1,53 @@
+#ifdef USE_CUDNN
+#include <vector>
+
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CuDNNLRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  LRNLayer<Dtype>::LayerSetUp(bottom, top);
+
+  CUDNN_CHECK(cudnnCreate(&handle_));
+  CUDNN_CHECK(cudnnCreateLRNDescriptor(&norm_desc_));
+  cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
+  cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+
+  // create a LRN handle
+  handles_setup_ = true;
+
+  size_ = this->layer_param().lrn_param().local_size();
+  alpha_ = this->layer_param().lrn_param().alpha();
+  beta_ = this->layer_param().lrn_param().beta();
+  k_ = this->layer_param().lrn_param().k();
+}
+
+template <typename Dtype>
+void CuDNNLRNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  LRNLayer<Dtype>::Reshape(bottom, top);
+  cudnn::setTensor4dDesc<Dtype>(&bottom_desc_, bottom[0]->num(),
+      this->channels_, this->height_, this->width_);
+  cudnn::setTensor4dDesc<Dtype>(&top_desc_, bottom[0]->num(),
+      this->channels_, this->height_, this->width_);
+  CUDNN_CHECK(cudnnSetLRNDescriptor(norm_desc_, size_, alpha_, beta_, k_));
+}
+
+template <typename Dtype>
+CuDNNLRNLayer<Dtype>::~CuDNNLRNLayer() {
+  // Check that handles have been setup before destroying.
+  if (!handles_setup_) { return; }
+
+  cudnnDestroyTensorDescriptor(bottom_desc_);
+  cudnnDestroyTensorDescriptor(top_desc_);
+
+  // destroy LRN handle
+  cudnnDestroy(handle_);
+}
+
+INSTANTIATE_CLASS(CuDNNLRNLayer);
+
+}   // namespace caffe
+#endif
diff --git a/src/caffe/layers/cudnn_lrn_layer.cu b/src/caffe/layers/cudnn_lrn_layer.cu
new file mode 100644 (file)
index 0000000..327e44b
--- /dev/null
@@ -0,0 +1,44 @@
+#ifdef USE_CUDNN
+#include <vector>
+
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CuDNNLRNLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  Dtype* top_data = top[0]->mutable_gpu_data();
+
+  CUDNN_CHECK(cudnnLRNCrossChannelForward(
+        handle_, norm_desc_, CUDNN_LRN_CROSS_CHANNEL_DIM1,
+        cudnn::dataType<Dtype>::one,
+        bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        top_desc_, top_data) );
+}
+
+template <typename Dtype>
+void CuDNNLRNLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  const Dtype* top_diff = top[0]->gpu_diff();
+  const Dtype* top_data = top[0]->gpu_data();
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+
+  CUDNN_CHECK(cudnnLRNCrossChannelBackward(
+        handle_, norm_desc_, CUDNN_LRN_CROSS_CHANNEL_DIM1,
+        cudnn::dataType<Dtype>::one,
+        top_desc_, top_data,
+        top_desc_, top_diff,
+        bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        bottom_desc_, bottom_diff) );
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(CuDNNLRNLayer);
+
+};  // namespace caffe
+
+#endif
index c92c4e4..5f995d4 100644 (file)
@@ -1,10 +1,6 @@
 #ifdef USE_CUDNN
 #include <vector>
 
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index a952b85..9aa39ed 100644 (file)
@@ -1,10 +1,6 @@
 #ifdef USE_CUDNN
 #include <vector>
 
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index 759d839..e6b6d5a 100644 (file)
@@ -1,8 +1,6 @@
 #ifdef USE_CUDNN
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index 21d1485..2a53a49 100644 (file)
@@ -1,8 +1,6 @@
 #ifdef USE_CUDNN
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index 3263787..4b489fa 100644 (file)
@@ -1,8 +1,6 @@
 #ifdef USE_CUDNN
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index 7a06cf7..9de5c74 100644 (file)
@@ -1,8 +1,6 @@
 #ifdef USE_CUDNN
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index 77a3225..f5cd045 100644 (file)
@@ -1,12 +1,8 @@
 #ifdef USE_CUDNN
-#include <algorithm>
-#include <cfloat>
 #include <vector>
 
 #include "thrust/device_vector.h"
 
-#include "caffe/layer.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index a9e2fce..c270202 100644 (file)
@@ -1,12 +1,8 @@
 #ifdef USE_CUDNN
-#include <algorithm>
-#include <cfloat>
 #include <vector>
 
 #include "thrust/device_vector.h"
 
-#include "caffe/layer.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index 376faad..4629681 100644 (file)
@@ -1,9 +1,7 @@
 #ifdef USE_CUDNN
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index d287f6f..84f784b 100644 (file)
@@ -1,9 +1,7 @@
 #ifdef USE_CUDNN
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index 71f8cb0..49ac858 100644 (file)
@@ -3,15 +3,11 @@
 #endif  // USE_OPENCV
 #include <stdint.h>
 
-#include <string>
 #include <vector>
 
-#include "caffe/common.hpp"
 #include "caffe/data_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/util/benchmark.hpp"
-#include "caffe/util/io.hpp"
 
 namespace caffe {
 
index 91aabb3..5038b63 100644 (file)
@@ -1,9 +1,5 @@
 #include <vector>
 
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index 5dbdcc3..0e8e2ed 100644 (file)
@@ -1,9 +1,5 @@
 #include <vector>
 
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index ec1256f..eb7a8a9 100644 (file)
@@ -2,11 +2,8 @@
 
 #include <vector>
 
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/syncedmem.hpp"
+#include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index f9ea04f..028fc02 100644 (file)
@@ -1,12 +1,7 @@
-#include <algorithm>
-#include <limits>
 #include <vector>
 
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/syncedmem.hpp"
+#include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 6b0d617..ab0478c 100644 (file)
@@ -1,8 +1,7 @@
 #include <vector>
 
+#include "caffe/data_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index a807007..7924fbe 100644 (file)
@@ -1,9 +1,8 @@
 #include <cfloat>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 2247870..0140420 100644 (file)
@@ -1,9 +1,8 @@
 #include <cfloat>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index be6b2cd..52704a0 100644 (file)
@@ -1,10 +1,7 @@
 #include <vector>
 
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
 #include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
index 62a4db8..cd4b40f 100644 (file)
@@ -1,10 +1,7 @@
 #include <vector>
 
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
 #include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/gpu_util.cuh"
 #include "caffe/util/math_functions.hpp"
 
index 80efa31..7338953 100644 (file)
@@ -1,9 +1,7 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/util/io.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 5b1de3a..1aa79bd 100644 (file)
@@ -1,9 +1,7 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/util/io.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index c7e7c60..f85692d 100644 (file)
@@ -1,9 +1,7 @@
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 2d75d8d..9e24bbe 100644 (file)
@@ -1,9 +1,7 @@
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index be1db32..e8b62a5 100644 (file)
@@ -1,9 +1,7 @@
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index cf929ee..746e91c 100644 (file)
@@ -1,8 +1,7 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index f7e5c9c..d831fb5 100644 (file)
@@ -1,8 +1,6 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/common_layers.hpp"
 
 namespace caffe {
 
index 8ced510..c765fa0 100644 (file)
@@ -15,7 +15,6 @@ TODO:
 #include "stdint.h"
 
 #include "caffe/data_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/hdf5.hpp"
 
 namespace caffe {
index 5e3e4ce..6ac499c 100644 (file)
@@ -4,15 +4,12 @@ TODO:
 */
 
 #include <stdint.h>
-#include <string>
 #include <vector>
 
 #include "hdf5.h"
 #include "hdf5_hl.h"
 
 #include "caffe/data_layers.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/io.hpp"
 
 namespace caffe {
 
index 56788c2..dbde65d 100644 (file)
@@ -3,11 +3,8 @@
 #include "hdf5.h"
 #include "hdf5_hl.h"
 
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
+#include "caffe/data_layers.hpp"
 #include "caffe/util/hdf5.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index eb6d0e4..ca8f261 100644 (file)
@@ -3,10 +3,7 @@
 #include "hdf5.h"
 #include "hdf5_hl.h"
 
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/data_layers.hpp"
 
 namespace caffe {
 
index a2fb2a1..a88c877 100644 (file)
@@ -1,12 +1,8 @@
 #include <algorithm>
-#include <cfloat>
-#include <cmath>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/util/io.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 595c9db..f3b0f71 100644 (file)
@@ -1,7 +1,5 @@
 #include <vector>
 
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/im2col.hpp"
 #include "caffe/vision_layers.hpp"
 
index cd50762..4633628 100644 (file)
@@ -1,7 +1,5 @@
 #include <vector>
 
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/im2col.hpp"
 #include "caffe/vision_layers.hpp"
 
index 3d2190f..9a7df5a 100644 (file)
@@ -8,7 +8,6 @@
 #include <vector>
 
 #include "caffe/data_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/benchmark.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
index a1e0b40..88bd8aa 100644 (file)
@@ -1,12 +1,9 @@
 #include <algorithm>
-#include <cfloat>
 #include <cmath>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/io.hpp"
-#include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 83c3235..274744e 100644 (file)
@@ -1,11 +1,8 @@
 #include <vector>
 
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index c0ebd2c..e91e94f 100644 (file)
@@ -1,11 +1,8 @@
 #include <vector>
 
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 55a227f..a1876b9 100644 (file)
@@ -1,7 +1,5 @@
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
 
index 847c86c..055b713 100644 (file)
@@ -1,7 +1,5 @@
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
 
index 3496a5c..c10466d 100644 (file)
@@ -1,12 +1,6 @@
-#include <algorithm>
-#include <cfloat>
-#include <cmath>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/util/io.hpp"
-#include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/loss_layers.hpp"
 
 namespace caffe {
 
index 36c1ace..cc56181 100644 (file)
@@ -1,6 +1,5 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
@@ -254,6 +253,5 @@ STUB_GPU_BACKWARD(LRNLayer, CrossChannelBackward);
 #endif
 
 INSTANTIATE_CLASS(LRNLayer);
-REGISTER_LAYER_CLASS(LRN);
 
 }  // namespace caffe
index 001b3c3..4523d41 100644 (file)
@@ -1,6 +1,5 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
index 2370aa0..13a3d9f 100644 (file)
@@ -5,8 +5,6 @@
 #include <vector>
 
 #include "caffe/data_layers.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/io.hpp"
 
 namespace caffe {
 
index 4267a59..5974592 100644 (file)
@@ -1,12 +1,9 @@
 #include <algorithm>
-#include <cfloat>
 #include <cmath>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/util/io.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 61c2141..0e73014 100644 (file)
@@ -1,8 +1,6 @@
-#include <algorithm>
 #include <vector>
 
 #include "caffe/common_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
index 5cbb112..b7e3b3c 100644 (file)
@@ -1,8 +1,6 @@
-#include <algorithm>
 #include <vector>
 
 #include "caffe/common_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
index ba67b43..1dcb2c0 100644 (file)
@@ -1,7 +1,6 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index c8d4149..3a7de42 100644 (file)
@@ -2,9 +2,6 @@
 #include <cfloat>
 #include <vector>
 
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/syncedmem.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
index ca4b13f..5e94ce2 100644 (file)
@@ -2,7 +2,6 @@
 #include <cfloat>
 #include <vector>
 
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
index 4fe34c4..6304fad 100644 (file)
@@ -1,9 +1,7 @@
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 90d9440..680faad 100644 (file)
@@ -1,9 +1,7 @@
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/neuron_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 8183175..b5a294e 100644 (file)
@@ -2,8 +2,7 @@
 #include <vector>
 
 #include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index e1f2004..992cd88 100644 (file)
@@ -1,8 +1,7 @@
 #include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
@@ -31,10 +30,15 @@ __global__ void PReLUBackward(const int n, const int channels, const int dim,
 
 // CUDA kernel for element-wise parameter backward
 template <typename Dtype>
-__global__ void PReLUParamBackward(const int n, const Dtype* in_diff,
+__global__ void PReLUParamBackward(const int n,
+    const int rows, const int rowPitch, const Dtype* in_diff,
     const Dtype* in_data, Dtype* out_diff) {
   CUDA_KERNEL_LOOP(index, n) {
     out_diff[index] = in_diff[index] * in_data[index] * (in_data[index] <= 0);
+    for ( int k = 1; k < rows; k++ ) {
+        out_diff[index] += in_diff[index + k*rowPitch]
+           * in_data[index + k*rowPitch] * (in_data[index + k*rowPitch] <= 0);
+    }
   }
 }
 
@@ -82,29 +86,24 @@ void PReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   if (this->param_propagate_down_[0]) {
     Dtype* slope_diff = this->blobs_[0]->mutable_gpu_diff();
     int cdim = channels * dim;
-    Dtype dsum = 0.;
-    for (int n = 0; n < bottom[0]->num(); ++n) {
-      // compute element-wise diff
-      // NOLINT_NEXT_LINE(whitespace/operators)
-      PReLUParamBackward<Dtype><<<CAFFE_GET_BLOCKS(cdim),
-          CAFFE_CUDA_NUM_THREADS>>>(
-          cdim, top_diff + top[0]->offset(n),
-          bottom_data + bottom[0]->offset(n),
-          backward_buff_.mutable_gpu_diff());
-      CUDA_POST_KERNEL_CHECK;
-      if (channel_shared_) {
-        Dtype d;
-        caffe_gpu_dot<Dtype>(channels * dim, backward_buff_.gpu_diff(),
-            multiplier_.gpu_data(), &d);
-        dsum += d;
-      } else {
-        caffe_gpu_gemv<Dtype>(CblasNoTrans, channels, dim, 1.,
-            backward_buff_.gpu_diff(), multiplier_.gpu_data(), 1.,
-            slope_diff);
-      }
-    }
+
+    // compute element-wise diff
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    PReLUParamBackward<Dtype><<<CAFFE_GET_BLOCKS(cdim),
+      CAFFE_CUDA_NUM_THREADS>>>(
+      cdim, bottom[0]->num(), top[0]->offset(1), top_diff ,
+      bottom_data ,
+      backward_buff_.mutable_gpu_diff());
+    CUDA_POST_KERNEL_CHECK;
     if (channel_shared_) {
+      Dtype dsum;
+      caffe_gpu_dot<Dtype>(channels * dim, backward_buff_.gpu_diff(),
+       multiplier_.gpu_data(), &dsum);
       caffe_gpu_add_scalar(this->blobs_[0]->count(), Dtype(dsum), slope_diff);
+    } else {
+      caffe_gpu_gemv<Dtype>(CblasNoTrans, channels, dim, 1.,
+        backward_buff_.gpu_diff(), multiplier_.gpu_data(), 1.,
+        slope_diff);
     }
   }
   // Propagate to bottom
index 8ae6329..6b7925e 100644 (file)
@@ -1,10 +1,7 @@
-#include <algorithm>
-#include <cfloat>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 2dbd3bc..a9a8c8d 100644 (file)
@@ -1,9 +1,7 @@
-#include <cfloat>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index cc00319..93d0902 100644 (file)
@@ -1,8 +1,7 @@
 #include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index b8924c8..c18ab61 100644 (file)
@@ -1,8 +1,7 @@
 #include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index ffe970f..8659049 100644 (file)
@@ -1,7 +1,6 @@
 #include <vector>
 
 #include "caffe/common_layers.hpp"
-#include "caffe/layer.hpp"
 
 namespace caffe {
 
index cc236fe..9858863 100644 (file)
@@ -1,10 +1,7 @@
-#include <algorithm>
-#include <cfloat>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 547fa80..48dbec4 100644 (file)
@@ -1,10 +1,7 @@
-#include <algorithm>
-#include <cfloat>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 48c3849..d4a3f87 100644 (file)
@@ -1,9 +1,7 @@
-#include <algorithm>
 #include <cmath>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index e1af065..5730636 100644 (file)
@@ -1,9 +1,7 @@
-#include <algorithm>
 #include <cmath>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index 7e70ab4..3974f5d 100644 (file)
@@ -1,7 +1,6 @@
 #include <vector>
 
 #include "caffe/common_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
index 34faef2..c49ecb2 100644 (file)
@@ -1,7 +1,6 @@
 #include <vector>
 
 #include "caffe/common_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
index 0a059ae..f368a24 100644 (file)
@@ -1,9 +1,8 @@
 #include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index e8dc6cd..d555f7d 100644 (file)
@@ -1,8 +1,7 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 04712c9..8ae7d49 100644 (file)
@@ -1,9 +1,8 @@
 #include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 1f9c3a4..a620fcc 100644 (file)
@@ -4,9 +4,8 @@
 
 #include "thrust/device_vector.h"
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index ba312f6..dee50ac 100644 (file)
@@ -2,10 +2,8 @@
 #include <cfloat>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/layer_factory.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 7e0f3da..42e91fa 100644 (file)
@@ -2,9 +2,8 @@
 #include <cfloat>
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index 272cb59..5333e57 100644 (file)
@@ -1,8 +1,7 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index a4f5df2..73d04c9 100644 (file)
@@ -1,8 +1,7 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
 
 namespace caffe {
 
index d762291..2ef4ac7 100644 (file)
@@ -1,11 +1,6 @@
 #include <algorithm>
-#include <cfloat>
 #include <vector>
 
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/syncedmem.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/vision_layers.hpp"
 
 namespace caffe {
index ee5ed77..9d1cac7 100644 (file)
@@ -1,11 +1,9 @@
 // TanH neuron activation function layer.
 // Adapted from ReLU layer code written by Yangqing Jia
 
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index ccd6e63..d87bcce 100644 (file)
@@ -1,11 +1,9 @@
 // TanH neuron activation function layer.
 // Adapted from ReLU layer code written by Yangqing Jia
 
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index 2365e7b..d651473 100644 (file)
@@ -1,7 +1,6 @@
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 
 namespace caffe {
index bfa7f15..1cd62d9 100644 (file)
@@ -1,8 +1,6 @@
-#include <algorithm>
 #include <vector>
 
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 namespace caffe {
 
index f55008c..581546c 100644 (file)
@@ -1,7 +1,6 @@
 #include <vector>
 
 #include "caffe/common_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
index 7fd3bc4..fdf9609 100644 (file)
@@ -1,7 +1,6 @@
 #include <vector>
 
 #include "caffe/common_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
index f8db61c..3f937bc 100644 (file)
@@ -12,9 +12,7 @@
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 
-#include "caffe/common.hpp"
 #include "caffe/data_layers.hpp"
-#include "caffe/layer.hpp"
 #include "caffe/util/benchmark.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
index ebb8b5d..1ad93e6 100644 (file)
@@ -46,10 +46,9 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   // the current NetState.
   NetParameter filtered_param;
   FilterNet(in_param, &filtered_param);
-  if (Caffe::root_solver()) {
-    LOG(INFO) << "Initializing net from parameters: " << std::endl
-              << filtered_param.DebugString();
-  }
+  LOG_IF(INFO, Caffe::root_solver())
+      << "Initializing net from parameters: " << std::endl
+      << filtered_param.DebugString();
   // Create a copy of filtered_param with splits added where necessary.
   NetParameter param;
   InsertSplits(filtered_param, &param);
@@ -73,8 +72,6 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
     const int layer_id = -1;  // inputs have fake layer ID -1
     AppendTop(param, layer_id, input_id, &available_blobs, &blob_name_to_idx);
   }
-  DLOG_IF(INFO, Caffe::root_solver())
-      << "Memory required for data: " << memory_used_ * sizeof(Dtype);
   // For each layer, set up its input and output
   bottom_vecs_.resize(param.layer_size());
   top_vecs_.resize(param.layer_size());
@@ -106,9 +103,8 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
       layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));
     }
     layer_names_.push_back(layer_param.name());
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "Creating Layer " << layer_param.name();
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "Creating Layer " << layer_param.name();
     bool need_backward = false;
 
     // Figure out this layer's input and output
@@ -151,29 +147,23 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
     } else {
       layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
     }
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "Setting up " << layer_names_[layer_id];
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "Setting up " << layer_names_[layer_id];
     for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
       if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) {
         blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));
       }
       blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);
-      if (Caffe::root_solver()) {
-        LOG(INFO) << "Top shape: "
-                  << top_vecs_[layer_id][top_id]->shape_string();
-      }
+      LOG_IF(INFO, Caffe::root_solver())
+          << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string();
       if (layer->loss(top_id)) {
-        if (Caffe::root_solver()) {
-          LOG(INFO) << "    with loss weight " << layer->loss(top_id);
-        }
+        LOG_IF(INFO, Caffe::root_solver())
+            << "    with loss weight " << layer->loss(top_id);
       }
       memory_used_ += top_vecs_[layer_id][top_id]->count();
     }
-    if (Caffe::root_solver()) {
-      DLOG(INFO) << "Memory required for data: "
-                 << memory_used_ * sizeof(Dtype);
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "Memory required for data: " << memory_used_ * sizeof(Dtype);
     const int param_size = layer_param.param_size();
     const int num_param_blobs = layers_[layer_id]->blobs().size();
     CHECK_LE(param_size, num_param_blobs)
@@ -231,14 +221,12 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
       }
     }
     if (!layer_contributes_loss) { layer_need_backward_[layer_id] = false; }
-    if (layer_need_backward_[layer_id]) {
-      if (Caffe::root_solver()) {
+    if (Caffe::root_solver()) {
+      if (layer_need_backward_[layer_id]) {
         LOG(INFO) << layer_names_[layer_id] << " needs backward computation.";
-      }
-    } else {
-      if (Caffe::root_solver()) {
+      } else {
         LOG(INFO) << layer_names_[layer_id]
-                  << " does not need backward computation.";
+            << " does not need backward computation.";
       }
     }
     for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
@@ -279,9 +267,8 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   // In the end, all remaining blobs are considered output blobs.
   for (set<string>::iterator it = available_blobs.begin();
       it != available_blobs.end(); ++it) {
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "This network produces output " << *it;
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "This network produces output " << *it;
     net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
     net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
   }
@@ -293,10 +280,7 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   }
   ShareWeights();
   debug_info_ = param.debug_info();
-  if (Caffe::root_solver()) {
-    LOG(INFO) << "Network initialization done.";
-    LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);
-  }
+  LOG_IF(INFO, Caffe::root_solver()) << "Network initialization done.";
 }
 
 template <typename Dtype>
@@ -335,33 +319,30 @@ bool Net<Dtype>::StateMeetsRule(const NetState& state,
   // Check whether the rule is broken due to phase.
   if (rule.has_phase()) {
       if (rule.phase() != state.phase()) {
-        if (Caffe::root_solver()) {
-          LOG(INFO) << "The NetState phase (" << state.phase()
-                    << ") differed from the phase (" << rule.phase()
-                    << ") specified by a rule in layer " << layer_name;
-        }
+        LOG_IF(INFO, Caffe::root_solver())
+            << "The NetState phase (" << state.phase()
+            << ") differed from the phase (" << rule.phase()
+            << ") specified by a rule in layer " << layer_name;
         return false;
       }
   }
   // Check whether the rule is broken due to min level.
   if (rule.has_min_level()) {
     if (state.level() < rule.min_level()) {
-      if (Caffe::root_solver()) {
-        LOG(INFO) << "The NetState level (" << state.level()
-                  << ") is above the min_level (" << rule.min_level()
-                  << ") specified by a rule in layer " << layer_name;
-      }
+      LOG_IF(INFO, Caffe::root_solver())
+          << "The NetState level (" << state.level()
+          << ") is above the min_level (" << rule.min_level()
+          << ") specified by a rule in layer " << layer_name;
       return false;
     }
   }
   // Check whether the rule is broken due to max level.
   if (rule.has_max_level()) {
     if (state.level() > rule.max_level()) {
-      if (Caffe::root_solver()) {
-        LOG(INFO) << "The NetState level (" << state.level()
-                  << ") is above the max_level (" << rule.max_level()
-                  << ") specified by a rule in layer " << layer_name;
-      }
+      LOG_IF(INFO, Caffe::root_solver())
+          << "The NetState level (" << state.level()
+          << ") is above the max_level (" << rule.max_level()
+          << ") specified by a rule in layer " << layer_name;
       return false;
     }
   }
@@ -374,10 +355,9 @@ bool Net<Dtype>::StateMeetsRule(const NetState& state,
       if (rule.stage(i) == state.stage(j)) { has_stage = true; }
     }
     if (!has_stage) {
-      if (Caffe::root_solver()) {
-        LOG(INFO) << "The NetState did not contain stage '" << rule.stage(i)
-                  << "' specified by a rule in layer " << layer_name;
-      }
+      LOG_IF(INFO, Caffe::root_solver())
+          << "The NetState did not contain stage '" << rule.stage(i)
+          << "' specified by a rule in layer " << layer_name;
       return false;
     }
   }
@@ -390,10 +370,9 @@ bool Net<Dtype>::StateMeetsRule(const NetState& state,
       if (rule.not_stage(i) == state.stage(j)) { has_stage = true; }
     }
     if (has_stage) {
-      if (Caffe::root_solver()) {
-        LOG(INFO) << "The NetState contained a not_stage '" << rule.not_stage(i)
-                  << "' specified by a rule in layer " << layer_name;
-      }
+      LOG_IF(INFO, Caffe::root_solver())
+          << "The NetState contained a not_stage '" << rule.not_stage(i)
+          << "' specified by a rule in layer " << layer_name;
       return false;
     }
   }
@@ -415,9 +394,8 @@ void Net<Dtype>::AppendTop(const NetParameter& param, const int layer_id,
   if (blob_name_to_idx && layer_param && layer_param->bottom_size() > top_id &&
       blob_name == layer_param->bottom(top_id)) {
     // In-place computation
-    if (Caffe::root_solver()) {
-      LOG(INFO) << layer_param->name() << " -> " << blob_name << " (in-place)";
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << layer_param->name() << " -> " << blob_name << " (in-place)";
     top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get());
     top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]);
   } else if (blob_name_to_idx &&
@@ -473,9 +451,8 @@ int Net<Dtype>::AppendBottom(const NetParameter& param, const int layer_id,
                << layer_param.name() << "', bottom index " << bottom_id << ")";
   }
   const int blob_id = (*blob_name_to_idx)[blob_name];
-  if (Caffe::root_solver()) {
-    LOG(INFO) << layer_names_[layer_id] << " <- " << blob_name;
-  }
+  LOG_IF(INFO, Caffe::root_solver())
+      << layer_names_[layer_id] << " <- " << blob_name;
   bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());
   bottom_id_vecs_[layer_id].push_back(blob_id);
   available_blobs->erase(blob_name);
@@ -672,10 +649,9 @@ void Net<Dtype>::InputDebugInfo(const int input_id) {
   const Blob<Dtype>& blob = *net_input_blobs_[input_id];
   const string& blob_name = blob_names_[net_input_blob_indices_[input_id]];
   const Dtype data_abs_val_mean = blob.asum_data() / blob.count();
-  if (Caffe::root_solver()) {
-    LOG(INFO) << "    [Forward] "
-              << "Input " << blob_name << " data: " << data_abs_val_mean;
-  }
+  LOG_IF(INFO, Caffe::root_solver())
+      << "    [Forward] "
+      << "Input " << blob_name << " data: " << data_abs_val_mean;
 }
 
 template <typename Dtype>
@@ -684,12 +660,11 @@ void Net<Dtype>::ForwardDebugInfo(const int layer_id) {
     const Blob<Dtype>& blob = *top_vecs_[layer_id][top_id];
     const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];
     const Dtype data_abs_val_mean = blob.asum_data() / blob.count();
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "    [Forward] "
-                << "Layer " << layer_names_[layer_id]
-                << ", top blob " << blob_name
-                << " data: " << data_abs_val_mean;
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "    [Forward] "
+        << "Layer " << layer_names_[layer_id]
+        << ", top blob " << blob_name
+        << " data: " << data_abs_val_mean;
   }
   for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
        ++param_id) {
@@ -697,12 +672,11 @@ void Net<Dtype>::ForwardDebugInfo(const int layer_id) {
     const int net_param_id = param_id_vecs_[layer_id][param_id];
     const string& blob_name = param_display_names_[net_param_id];
     const Dtype data_abs_val_mean = blob.asum_data() / blob.count();
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "    [Forward] "
-                << "Layer " << layer_names_[layer_id]
-                << ", param blob " << blob_name
-                << " data: " << data_abs_val_mean;
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "    [Forward] "
+        << "Layer " << layer_names_[layer_id]
+        << ", param blob " << blob_name
+        << " data: " << data_abs_val_mean;
   }
 }
 
@@ -714,24 +688,22 @@ void Net<Dtype>::BackwardDebugInfo(const int layer_id) {
     const Blob<Dtype>& blob = *bottom_vec[bottom_id];
     const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
     const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "    [Backward] "
-                << "Layer " << layer_names_[layer_id]
-                << ", bottom blob " << blob_name
-                << " diff: " << diff_abs_val_mean;
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "    [Backward] "
+        << "Layer " << layer_names_[layer_id]
+        << ", bottom blob " << blob_name
+        << " diff: " << diff_abs_val_mean;
   }
   for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
        ++param_id) {
     if (!layers_[layer_id]->param_propagate_down(param_id)) { continue; }
     const Blob<Dtype>& blob = *layers_[layer_id]->blobs()[param_id];
     const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "    [Backward] "
-                << "Layer " << layer_names_[layer_id]
-                << ", param blob " << param_id
-                << " diff: " << diff_abs_val_mean;
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "    [Backward] "
+        << "Layer " << layer_names_[layer_id]
+        << ", param blob " << param_id
+        << " diff: " << diff_abs_val_mean;
   }
 }
 
@@ -744,22 +716,20 @@ void Net<Dtype>::UpdateDebugInfo(const int param_id) {
   const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
   if (param_owner < 0) {
     const Dtype data_abs_val_mean = blob.asum_data() / blob.count();
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "    [Update] Layer " << layer_name
-                << ", param " << param_display_name
-                << " data: " << data_abs_val_mean
-                << "; diff: " << diff_abs_val_mean;
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "    [Update] Layer " << layer_name
+        << ", param " << param_display_name
+        << " data: " << data_abs_val_mean
+        << "; diff: " << diff_abs_val_mean;
   } else {
     const string& owner_layer_name =
         layer_names_[param_layer_indices_[param_owner].first];
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "    [Update] Layer " << layer_name
-                << ", param blob " << param_display_name
-                << " (owned by layer " << owner_layer_name << ", " << "param "
-                << param_display_names_[param_owners_[param_id]] << ")"
-                << " diff: " << diff_abs_val_mean;
-    }
+    LOG_IF(INFO, Caffe::root_solver())
+        << "    [Update] Layer " << layer_name
+        << ", param blob " << param_display_name
+        << " (owned by layer " << owner_layer_name << ", " << "param "
+        << param_display_names_[param_owners_[param_id]] << ")"
+        << " diff: " << diff_abs_val_mean;
   }
 }
 
index a6d154e..9abc92b 100644 (file)
@@ -7,7 +7,6 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 
-#include <cstdlib>
 #include <sstream>
 #include <string>
 #include <vector>
index a8747c1..76c869c 100644 (file)
@@ -98,7 +98,7 @@ message NetParameter {
 // NOTE
 // Update the next available ID when you add a new SolverParameter field.
 //
-// SolverParameter next available ID: 40 (last added: momentum2)
+// SolverParameter next available ID: 41 (last added: type)
 message SolverParameter {
   //////////////////////////////////////////////////////////////////////////////
   // Specifying the train and test networks
@@ -209,16 +209,9 @@ message SolverParameter {
   // (and by default) initialize using a seed derived from the system clock.
   optional int64 random_seed = 20 [default = -1];
 
-  // Solver type
-  enum SolverType {
-    SGD = 0;
-    NESTEROV = 1;
-    ADAGRAD = 2;
-    RMSPROP = 3;
-    ADADELTA = 4;
-    ADAM = 5;
-  }
-  optional SolverType solver_type = 30 [default = SGD];
+  // type of the solver
+  optional string type = 40 [default = "SGD"];
+
   // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
   optional float delta = 31 [default = 1e-8];
   // parameters for the Adam solver
@@ -234,6 +227,18 @@ message SolverParameter {
 
   // If false, don't save a snapshot after training finishes.
   optional bool snapshot_after_train = 28 [default = true];
+
+  // DEPRECATED: old solver enum types, use string instead
+  enum SolverType {
+    SGD = 0;
+    NESTEROV = 1;
+    ADAGRAD = 2;
+    RMSPROP = 3;
+    ADADELTA = 4;
+    ADAM = 5;
+  }
+  // DEPRECATED: use type instead of solver_type
+  optional SolverType solver_type = 30 [default = SGD];
 }
 
 // A message that stores the solver snapshots
@@ -726,6 +731,12 @@ message LRNParameter {
   }
   optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
   optional float k = 5 [default = 1.];
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+    CUDNN = 2;
+  }
+  optional Engine engine = 6 [default = DEFAULT];
 }
 
 message MemoryDataParameter {
index 12c13dd..d3bc736 100644 (file)
@@ -1,18 +1,11 @@
 #include <cstdio>
 
-#include <algorithm>
 #include <string>
 #include <vector>
 
-#include "hdf5.h"
-#include "hdf5_hl.h"
-
-#include "caffe/net.hpp"
-#include "caffe/proto/caffe.pb.h"
 #include "caffe/solver.hpp"
 #include "caffe/util/hdf5.hpp"
 #include "caffe/util/io.hpp"
-#include "caffe/util/math_functions.hpp"
 #include "caffe/util/upgrade_proto.hpp"
 
 namespace caffe {
@@ -43,7 +36,7 @@ Solver<Dtype>::Solver(const string& param_file, const Solver* root_solver)
     : net_(), callbacks_(), root_solver_(root_solver),
       requested_early_exit_(false) {
   SolverParameter param;
-  ReadProtoFromTextFileOrDie(param_file, &param);
+  ReadSolverParamsFromTextFileOrDie(param_file, &param);
   Init(param);
 }
 
@@ -492,810 +485,6 @@ void Solver<Dtype>::Restore(const char* state_file) {
   }
 }
 
-// Return the current learning rate. The currently implemented learning rate
-// policies are as follows:
-//    - fixed: always return base_lr.
-//    - step: return base_lr * gamma ^ (floor(iter / step))
-//    - exp: return base_lr * gamma ^ iter
-//    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
-//    - multistep: similar to step but it allows non uniform steps defined by
-//      stepvalue
-//    - poly: the effective learning rate follows a polynomial decay, to be
-//      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
-//    - sigmoid: the effective learning rate follows a sigmod decay
-//      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
-//
-// where base_lr, max_iter, gamma, step, stepvalue and power are defined
-// in the solver parameter protocol buffer, and iter is the current iteration.
-template <typename Dtype>
-Dtype SGDSolver<Dtype>::GetLearningRate() {
-  Dtype rate;
-  const string& lr_policy = this->param_.lr_policy();
-  if (lr_policy == "fixed") {
-    rate = this->param_.base_lr();
-  } else if (lr_policy == "step") {
-    this->current_step_ = this->iter_ / this->param_.stepsize();
-    rate = this->param_.base_lr() *
-        pow(this->param_.gamma(), this->current_step_);
-  } else if (lr_policy == "exp") {
-    rate = this->param_.base_lr() * pow(this->param_.gamma(), this->iter_);
-  } else if (lr_policy == "inv") {
-    rate = this->param_.base_lr() *
-        pow(Dtype(1) + this->param_.gamma() * this->iter_,
-            - this->param_.power());
-  } else if (lr_policy == "multistep") {
-    if (this->current_step_ < this->param_.stepvalue_size() &&
-          this->iter_ >= this->param_.stepvalue(this->current_step_)) {
-      this->current_step_++;
-      LOG(INFO) << "MultiStep Status: Iteration " <<
-      this->iter_ << ", step = " << this->current_step_;
-    }
-    rate = this->param_.base_lr() *
-        pow(this->param_.gamma(), this->current_step_);
-  } else if (lr_policy == "poly") {
-    rate = this->param_.base_lr() * pow(Dtype(1.) -
-        (Dtype(this->iter_) / Dtype(this->param_.max_iter())),
-        this->param_.power());
-  } else if (lr_policy == "sigmoid") {
-    rate = this->param_.base_lr() * (Dtype(1.) /
-        (Dtype(1.) + exp(-this->param_.gamma() * (Dtype(this->iter_) -
-          Dtype(this->param_.stepsize())))));
-  } else {
-    LOG(FATAL) << "Unknown learning rate policy: " << lr_policy;
-  }
-  return rate;
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::PreSolve() {
-  // Initialize the history
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  history_.clear();
-  update_.clear();
-  temp_.clear();
-  for (int i = 0; i < net_params.size(); ++i) {
-    const vector<int>& shape = net_params[i]->shape();
-    history_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
-    update_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
-    temp_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
-  }
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::ClipGradients() {
-  const Dtype clip_gradients = this->param_.clip_gradients();
-  if (clip_gradients < 0) { return; }
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  Dtype sumsq_diff = 0;
-  for (int i = 0; i < net_params.size(); ++i) {
-    sumsq_diff += net_params[i]->sumsq_diff();
-  }
-  const Dtype l2norm_diff = std::sqrt(sumsq_diff);
-  if (l2norm_diff > clip_gradients) {
-    Dtype scale_factor = clip_gradients / l2norm_diff;
-    LOG(INFO) << "Gradient clipping: scaling down gradients (L2 norm "
-        << l2norm_diff << " > " << clip_gradients << ") "
-        << "by scale factor " << scale_factor;
-    for (int i = 0; i < net_params.size(); ++i) {
-      net_params[i]->scale_diff(scale_factor);
-    }
-  }
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::ApplyUpdate() {
-  CHECK(Caffe::root_solver());
-  Dtype rate = GetLearningRate();
-  if (this->param_.display() && this->iter_ % this->param_.display() == 0) {
-    LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;
-  }
-  ClipGradients();
-  for (int param_id = 0; param_id < this->net_->learnable_params().size();
-       ++param_id) {
-    Normalize(param_id);
-    Regularize(param_id);
-    ComputeUpdateValue(param_id, rate);
-  }
-  this->net_->Update();
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::Normalize(int param_id) {
-  if (this->param_.iter_size() == 1) { return; }
-  // Scale gradient to counterbalance accumulation.
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  const Dtype accum_normalization = Dtype(1.) / this->param_.iter_size();
-  switch (Caffe::mode()) {
-  case Caffe::CPU: {
-    caffe_scal(net_params[param_id]->count(), accum_normalization,
-        net_params[param_id]->mutable_cpu_diff());
-    break;
-  }
-  case Caffe::GPU: {
-#ifndef CPU_ONLY
-    caffe_gpu_scal(net_params[param_id]->count(), accum_normalization,
-        net_params[param_id]->mutable_gpu_diff());
-#else
-    NO_GPU;
-#endif
-    break;
-  }
-  default:
-    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
-  }
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::Regularize(int param_id) {
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  const vector<float>& net_params_weight_decay =
-      this->net_->params_weight_decay();
-  Dtype weight_decay = this->param_.weight_decay();
-  string regularization_type = this->param_.regularization_type();
-  Dtype local_decay = weight_decay * net_params_weight_decay[param_id];
-  switch (Caffe::mode()) {
-  case Caffe::CPU: {
-    if (local_decay) {
-      if (regularization_type == "L2") {
-        // add weight decay
-        caffe_axpy(net_params[param_id]->count(),
-            local_decay,
-            net_params[param_id]->cpu_data(),
-            net_params[param_id]->mutable_cpu_diff());
-      } else if (regularization_type == "L1") {
-        caffe_cpu_sign(net_params[param_id]->count(),
-            net_params[param_id]->cpu_data(),
-            temp_[param_id]->mutable_cpu_data());
-        caffe_axpy(net_params[param_id]->count(),
-            local_decay,
-            temp_[param_id]->cpu_data(),
-            net_params[param_id]->mutable_cpu_diff());
-      } else {
-        LOG(FATAL) << "Unknown regularization type: " << regularization_type;
-      }
-    }
-    break;
-  }
-  case Caffe::GPU: {
-#ifndef CPU_ONLY
-    if (local_decay) {
-      if (regularization_type == "L2") {
-        // add weight decay
-        caffe_gpu_axpy(net_params[param_id]->count(),
-            local_decay,
-            net_params[param_id]->gpu_data(),
-            net_params[param_id]->mutable_gpu_diff());
-      } else if (regularization_type == "L1") {
-        caffe_gpu_sign(net_params[param_id]->count(),
-            net_params[param_id]->gpu_data(),
-            temp_[param_id]->mutable_gpu_data());
-        caffe_gpu_axpy(net_params[param_id]->count(),
-            local_decay,
-            temp_[param_id]->gpu_data(),
-            net_params[param_id]->mutable_gpu_diff());
-      } else {
-        LOG(FATAL) << "Unknown regularization type: " << regularization_type;
-      }
-    }
-#else
-    NO_GPU;
-#endif
-    break;
-  }
-  default:
-    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
-  }
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  const vector<float>& net_params_lr = this->net_->params_lr();
-  Dtype momentum = this->param_.momentum();
-  Dtype local_rate = rate * net_params_lr[param_id];
-  // Compute the update to history, then copy it to the parameter diff.
-  switch (Caffe::mode()) {
-  case Caffe::CPU: {
-    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
-              net_params[param_id]->cpu_diff(), momentum,
-              history_[param_id]->mutable_cpu_data());
-    caffe_copy(net_params[param_id]->count(),
-        history_[param_id]->cpu_data(),
-        net_params[param_id]->mutable_cpu_diff());
-    break;
-  }
-  case Caffe::GPU: {
-#ifndef CPU_ONLY
-    caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
-              net_params[param_id]->gpu_diff(), momentum,
-              history_[param_id]->mutable_gpu_data());
-    caffe_copy(net_params[param_id]->count(),
-        history_[param_id]->gpu_data(),
-        net_params[param_id]->mutable_gpu_diff());
-#else
-    NO_GPU;
-#endif
-    break;
-  }
-  default:
-    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
-  }
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::SnapshotSolverState(const string& model_filename) {
-  switch (this->param_.snapshot_format()) {
-    case caffe::SolverParameter_SnapshotFormat_BINARYPROTO:
-      SnapshotSolverStateToBinaryProto(model_filename);
-      break;
-    case caffe::SolverParameter_SnapshotFormat_HDF5:
-      SnapshotSolverStateToHDF5(model_filename);
-      break;
-    default:
-      LOG(FATAL) << "Unsupported snapshot format.";
-  }
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::SnapshotSolverStateToBinaryProto(
-    const string& model_filename) {
-  SolverState state;
-  state.set_iter(this->iter_);
-  state.set_learned_net(model_filename);
-  state.set_current_step(this->current_step_);
-  state.clear_history();
-  for (int i = 0; i < history_.size(); ++i) {
-    // Add history
-    BlobProto* history_blob = state.add_history();
-    history_[i]->ToProto(history_blob);
-  }
-  string snapshot_filename = Solver<Dtype>::SnapshotFilename(".solverstate");
-  LOG(INFO)
-    << "Snapshotting solver state to binary proto file " << snapshot_filename;
-  WriteProtoToBinaryFile(state, snapshot_filename.c_str());
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::SnapshotSolverStateToHDF5(
-    const string& model_filename) {
-  string snapshot_filename =
-      Solver<Dtype>::SnapshotFilename(".solverstate.h5");
-  LOG(INFO) << "Snapshotting solver state to HDF5 file " << snapshot_filename;
-  hid_t file_hid = H5Fcreate(snapshot_filename.c_str(), H5F_ACC_TRUNC,
-      H5P_DEFAULT, H5P_DEFAULT);
-  CHECK_GE(file_hid, 0)
-      << "Couldn't open " << snapshot_filename << " to save solver state.";
-  hdf5_save_int(file_hid, "iter", this->iter_);
-  hdf5_save_string(file_hid, "learned_net", model_filename);
-  hdf5_save_int(file_hid, "current_step", this->current_step_);
-  hid_t history_hid = H5Gcreate2(file_hid, "history", H5P_DEFAULT, H5P_DEFAULT,
-      H5P_DEFAULT);
-  CHECK_GE(history_hid, 0)
-      << "Error saving solver state to " << snapshot_filename << ".";
-  for (int i = 0; i < history_.size(); ++i) {
-    ostringstream oss;
-    oss << i;
-    hdf5_save_nd_dataset<Dtype>(history_hid, oss.str(), *history_[i]);
-  }
-  H5Gclose(history_hid);
-  H5Fclose(file_hid);
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::RestoreSolverStateFromBinaryProto(
-    const string& state_file) {
-  SolverState state;
-  ReadProtoFromBinaryFile(state_file, &state);
-  this->iter_ = state.iter();
-  if (state.has_learned_net()) {
-    NetParameter net_param;
-    ReadNetParamsFromBinaryFileOrDie(state.learned_net().c_str(), &net_param);
-    this->net_->CopyTrainedLayersFrom(net_param);
-  }
-  this->current_step_ = state.current_step();
-  CHECK_EQ(state.history_size(), history_.size())
-      << "Incorrect length of history blobs.";
-  LOG(INFO) << "SGDSolver: restoring history";
-  for (int i = 0; i < history_.size(); ++i) {
-    history_[i]->FromProto(state.history(i));
-  }
-}
-
-template <typename Dtype>
-void SGDSolver<Dtype>::RestoreSolverStateFromHDF5(const string& state_file) {
-  hid_t file_hid = H5Fopen(state_file.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
-  CHECK_GE(file_hid, 0) << "Couldn't open solver state file " << state_file;
-  this->iter_ = hdf5_load_int(file_hid, "iter");
-  if (H5LTfind_dataset(file_hid, "learned_net")) {
-    string learned_net = hdf5_load_string(file_hid, "learned_net");
-    this->net_->CopyTrainedLayersFrom(learned_net);
-  }
-  this->current_step_ = hdf5_load_int(file_hid, "current_step");
-  hid_t history_hid = H5Gopen2(file_hid, "history", H5P_DEFAULT);
-  CHECK_GE(history_hid, 0) << "Error reading history from " << state_file;
-  int state_history_size = hdf5_get_num_links(history_hid);
-  CHECK_EQ(state_history_size, history_.size())
-      << "Incorrect length of history blobs.";
-  for (int i = 0; i < history_.size(); ++i) {
-    ostringstream oss;
-    oss << i;
-    hdf5_load_nd_dataset<Dtype>(history_hid, oss.str().c_str(), 0,
-                                kMaxBlobAxes, history_[i].get());
-  }
-  H5Gclose(history_hid);
-  H5Fclose(file_hid);
-}
-
-template <typename Dtype>
-void NesterovSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
-  CHECK(Caffe::root_solver());
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  const vector<float>& net_params_lr = this->net_->params_lr();
-  Dtype momentum = this->param_.momentum();
-  Dtype local_rate = rate * net_params_lr[param_id];
-  switch (Caffe::mode()) {
-  case Caffe::CPU: {
-    // save history momentum for stepping back
-    caffe_copy(net_params[param_id]->count(),
-        this->history_[param_id]->cpu_data(),
-        this->update_[param_id]->mutable_cpu_data());
-
-    // update history
-    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
-              net_params[param_id]->cpu_diff(), momentum,
-              this->history_[param_id]->mutable_cpu_data());
-
-    // compute update: step back then over step
-    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum,
-        this->history_[param_id]->cpu_data(), -momentum,
-        this->update_[param_id]->mutable_cpu_data());
-
-    // copy
-    caffe_copy(net_params[param_id]->count(),
-        this->update_[param_id]->cpu_data(),
-        net_params[param_id]->mutable_cpu_diff());
-    break;
-  }
-  case Caffe::GPU: {
-#ifndef CPU_ONLY
-    // save history momentum for stepping back
-    caffe_copy(net_params[param_id]->count(),
-        this->history_[param_id]->gpu_data(),
-        this->update_[param_id]->mutable_gpu_data());
-
-    // update history
-    caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
-              net_params[param_id]->gpu_diff(), momentum,
-              this->history_[param_id]->mutable_gpu_data());
-
-    // compute update: step back then over step
-    caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum,
-        this->history_[param_id]->gpu_data(), -momentum,
-        this->update_[param_id]->mutable_gpu_data());
-
-    // copy
-    caffe_copy(net_params[param_id]->count(),
-        this->update_[param_id]->gpu_data(),
-        net_params[param_id]->mutable_gpu_diff());
-#else
-    NO_GPU;
-#endif
-    break;
-  }
-  default:
-    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
-  }
-}
-
-template <typename Dtype>
-void AdaGradSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
-  CHECK(Caffe::root_solver());
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  const vector<float>& net_params_lr = this->net_->params_lr();
-  Dtype delta = this->param_.delta();
-  Dtype local_rate = rate * net_params_lr[param_id];
-  switch (Caffe::mode()) {
-  case Caffe::CPU: {
-    // compute square of gradient in update
-    caffe_powx(net_params[param_id]->count(),
-        net_params[param_id]->cpu_diff(), Dtype(2),
-        this->update_[param_id]->mutable_cpu_data());
-
-    // update history
-    caffe_add(net_params[param_id]->count(),
-        this->update_[param_id]->cpu_data(),
-        this->history_[param_id]->cpu_data(),
-        this->history_[param_id]->mutable_cpu_data());
-
-    // prepare update
-    caffe_powx(net_params[param_id]->count(),
-              this->history_[param_id]->cpu_data(), Dtype(0.5),
-              this->update_[param_id]->mutable_cpu_data());
-
-    caffe_add_scalar(net_params[param_id]->count(),
-              delta, this->update_[param_id]->mutable_cpu_data());
-
-    caffe_div(net_params[param_id]->count(),
-              net_params[param_id]->cpu_diff(),
-              this->update_[param_id]->cpu_data(),
-              this->update_[param_id]->mutable_cpu_data());
-
-    // scale and copy
-    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
-        this->update_[param_id]->cpu_data(), Dtype(0),
-        net_params[param_id]->mutable_cpu_diff());
-    break;
-  }
-  case Caffe::GPU: {
-#ifndef CPU_ONLY
-    // compute square of gradient in update
-    caffe_gpu_powx(net_params[param_id]->count(),
-        net_params[param_id]->gpu_diff(), Dtype(2),
-        this->update_[param_id]->mutable_gpu_data());
-
-    // update history
-    caffe_gpu_add(net_params[param_id]->count(),
-        this->update_[param_id]->gpu_data(),
-        this->history_[param_id]->gpu_data(),
-        this->history_[param_id]->mutable_gpu_data());
-
-    // prepare update
-    caffe_gpu_powx(net_params[param_id]->count(),
-              this->history_[param_id]->gpu_data(), Dtype(0.5),
-              this->update_[param_id]->mutable_gpu_data());
-
-    caffe_gpu_add_scalar(net_params[param_id]->count(),
-              delta, this->update_[param_id]->mutable_gpu_data());
-
-    caffe_gpu_div(net_params[param_id]->count(),
-              net_params[param_id]->gpu_diff(),
-              this->update_[param_id]->gpu_data(),
-              this->update_[param_id]->mutable_gpu_data());
-
-    // scale and copy
-    caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
-        this->update_[param_id]->gpu_data(), Dtype(0),
-        net_params[param_id]->mutable_gpu_diff());
-#else
-    NO_GPU;
-#endif
-    break;
-  }
-  default:
-    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
-  }
-}
-
-template <typename Dtype>
-void RMSPropSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  const vector<float>& net_params_lr = this->net_->params_lr();
-
-  // get the learning rate
-  Dtype delta = this->param_.delta();
-  Dtype rms_decay = this->param_.rms_decay();
-  Dtype local_rate = rate * net_params_lr[param_id];
-
-  switch (Caffe::mode()) {
-  case Caffe::CPU:
-    // compute square of gradient in update
-    caffe_powx(net_params[param_id]->count(),
-        net_params[param_id]->cpu_diff(), Dtype(2),
-        this->update_[param_id]->mutable_cpu_data());
-
-    // update history
-    caffe_cpu_axpby(net_params[param_id] -> count(),
-        Dtype(1-rms_decay), this->update_[param_id]->cpu_data(),
-        rms_decay, this->history_[param_id]-> mutable_cpu_data());
-
-    // prepare update
-    caffe_powx(net_params[param_id]->count(),
-        this->history_[param_id]->cpu_data(), Dtype(0.5),
-        this->update_[param_id]->mutable_cpu_data());
-
-    caffe_add_scalar(net_params[param_id]->count(),
-        delta, this->update_[param_id]->mutable_cpu_data());
-
-    caffe_div(net_params[param_id]->count(),
-        net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(),
-        this->update_[param_id]->mutable_cpu_data());
-
-    // scale and copy
-    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
-        this->update_[param_id]->cpu_data(), Dtype(0),
-        net_params[param_id]->mutable_cpu_diff());
-    break;
-  case Caffe::GPU:
-#ifndef CPU_ONLY
-    // compute square of gradient in update
-    caffe_gpu_powx(net_params[param_id]->count(),
-        net_params[param_id]->gpu_diff(), Dtype(2),
-        this->update_[param_id]->mutable_gpu_data());
-
-    // update history
-    caffe_gpu_axpby(net_params[param_id] -> count(),
-        Dtype(1-rms_decay), this->update_[param_id]->gpu_data(),
-        rms_decay, this->history_[param_id]-> mutable_gpu_data());
-
-    // prepare update
-    caffe_gpu_powx(net_params[param_id]->count(),
-        this->history_[param_id]->gpu_data(), Dtype(0.5),
-        this->update_[param_id]->mutable_gpu_data());
-
-    caffe_gpu_add_scalar(net_params[param_id]->count(),
-        delta, this->update_[param_id]->mutable_gpu_data());
-
-    caffe_gpu_div(net_params[param_id]->count(),
-        net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(),
-        this->update_[param_id]->mutable_gpu_data());
-
-    caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
-        this->update_[param_id]->gpu_data(), Dtype(0),
-        net_params[param_id]->mutable_gpu_diff());
-#else
-    NO_GPU;
-#endif
-    break;
-  default:
-    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
-  }
-}
-
-template <typename Dtype>
-void AdaDeltaSolver<Dtype>::AdaDeltaPreSolve() {
-  // Add the extra history entries for AdaDelta after those from
-  // SGDSolver::PreSolve
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  for (int i = 0; i < net_params.size(); ++i) {
-        const vector<int>& shape = net_params[i]->shape();
-        this->history_.push_back(
-                shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
-  }
-}
-
-template <typename Dtype>
-void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  const vector<float>& net_params_lr = this->net_->params_lr();
-  Dtype delta = this->param_.delta();
-  Dtype momentum = this->param_.momentum();
-  Dtype local_rate = rate * net_params_lr[param_id];
-  size_t update_history_offset = net_params.size();
-  switch (Caffe::mode()) {
-  case Caffe::CPU: {
-    // compute square of gradient in update
-    caffe_powx(net_params[param_id]->count(),
-        net_params[param_id]->cpu_diff(), Dtype(2),
-        this->update_[param_id]->mutable_cpu_data());
-
-    // update history of gradients
-    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
-        this->update_[param_id]->cpu_data(), momentum,
-        this->history_[param_id]->mutable_cpu_data());
-
-    // add delta to history to guard against dividing by zero later
-    caffe_set(net_params[param_id]->count(), delta,
-        this->temp_[param_id]->mutable_cpu_data());
-
-    caffe_add(net_params[param_id]->count(),
-        this->temp_[param_id]->cpu_data(),
-        this->history_[update_history_offset + param_id]->cpu_data(),
-        this->update_[param_id]->mutable_cpu_data());
-
-    caffe_add(net_params[param_id]->count(),
-        this->temp_[param_id]->cpu_data(),
-        this->history_[param_id]->cpu_data(),
-        this->temp_[param_id]->mutable_cpu_data());
-
-    // divide history of updates by history of gradients
-    caffe_div(net_params[param_id]->count(),
-        this->update_[param_id]->cpu_data(),
-        this->temp_[param_id]->cpu_data(),
-        this->update_[param_id]->mutable_cpu_data());
-
-    // jointly compute the RMS of both for update and gradient history
-    caffe_powx(net_params[param_id]->count(),
-        this->update_[param_id]->cpu_data(), Dtype(0.5),
-        this->update_[param_id]->mutable_cpu_data());
-
-    // compute the update
-    caffe_mul(net_params[param_id]->count(),
-        net_params[param_id]->cpu_diff(),
-        this->update_[param_id]->cpu_data(),
-        net_params[param_id]->mutable_cpu_diff());
-
-    // compute square of update
-    caffe_powx(net_params[param_id]->count(),
-        net_params[param_id]->cpu_diff(), Dtype(2),
-        this->update_[param_id]->mutable_cpu_data());
-
-    // update history of updates
-    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
-        this->update_[param_id]->cpu_data(), momentum,
-        this->history_[update_history_offset + param_id]->mutable_cpu_data());
-
-    // apply learning rate
-    caffe_cpu_scale(net_params[param_id]->count(), local_rate,
-        net_params[param_id]->cpu_diff(),
-        net_params[param_id]->mutable_cpu_diff());
-    break;
-  }
-  case Caffe::GPU: {
-#ifndef CPU_ONLY
-    // compute square of gradient in update
-    caffe_gpu_powx(net_params[param_id]->count(),
-        net_params[param_id]->gpu_diff(), Dtype(2),
-        this->update_[param_id]->mutable_gpu_data());
-
-    // update history of gradients
-    caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
-        this->update_[param_id]->gpu_data(), momentum,
-        this->history_[param_id]->mutable_gpu_data());
-
-    // add delta to history to guard against dividing by zero later
-    caffe_gpu_set(net_params[param_id]->count(), delta,
-        this->temp_[param_id]->mutable_gpu_data());
-
-    caffe_gpu_add(net_params[param_id]->count(),
-        this->temp_[param_id]->gpu_data(),
-        this->history_[update_history_offset + param_id]->gpu_data(),
-        this->update_[param_id]->mutable_gpu_data());
-
-    caffe_gpu_add(net_params[param_id]->count(),
-        this->temp_[param_id]->gpu_data(),
-        this->history_[param_id]->gpu_data(),
-        this->temp_[param_id]->mutable_gpu_data());
-
-    // divide history of updates by history of gradients
-    caffe_gpu_div(net_params[param_id]->count(),
-        this->update_[param_id]->gpu_data(),
-        this->temp_[param_id]->gpu_data(),
-        this->update_[param_id]->mutable_gpu_data());
-
-    // jointly compute the RMS of both for update and gradient history
-    caffe_gpu_powx(net_params[param_id]->count(),
-        this->update_[param_id]->gpu_data(), Dtype(0.5),
-        this->update_[param_id]->mutable_gpu_data());
-
-    // compute the update and copy to net_diff
-    caffe_gpu_mul(net_params[param_id]->count(),
-        net_params[param_id]->gpu_diff(),
-        this->update_[param_id]->gpu_data(),
-        net_params[param_id]->mutable_gpu_diff());
-
-    // compute square of update
-    caffe_gpu_powx(net_params[param_id]->count(),
-        net_params[param_id]->gpu_diff(), Dtype(2),
-        this->update_[param_id]->mutable_gpu_data());
-
-    // update history of updates
-    caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
-        this->update_[param_id]->gpu_data(), momentum,
-        this->history_[update_history_offset + param_id]->mutable_gpu_data());
-
-    // apply learning rate
-    caffe_gpu_scale(net_params[param_id]->count(), local_rate,
-        net_params[param_id]->gpu_diff(),
-        net_params[param_id]->mutable_gpu_diff());
-#else
-    NO_GPU;
-#endif
-    break;
-  }
-  default:
-    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
-  }
-}
-
-template <typename Dtype>
-void AdamSolver<Dtype>::AdamPreSolve() {
-  // Add the extra history entries for Adam after those from
-  // SGDSolver::PreSolve
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  for (int i = 0; i < net_params.size(); ++i) {
-    const vector<int>& shape = net_params[i]->shape();
-    this->history_.push_back(
-            shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
-  }
-}
-
-template <typename Dtype>
-void AdamSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
-  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
-  const vector<float>& net_params_lr = this->net_->params_lr();
-  Dtype local_rate = rate * net_params_lr[param_id];
-  const Dtype beta1 = this->param_.momentum();
-  const Dtype beta2 = this->param_.momentum2();
-
-  // we create aliases for convenience
-  size_t update_history_offset = net_params.size();
-  Blob<Dtype>* val_m = this->history_[param_id].get();
-  Blob<Dtype>* val_v = this->history_[param_id + update_history_offset].get();
-  Blob<Dtype>* val_t = this->temp_[param_id].get();
-
-  const int t = this->iter_  + 1;
-  const Dtype correction = std::sqrt(Dtype(1) - pow(beta2, t)) /
-      (Dtype(1.) - pow(beta1, t));
-  const int N = net_params[param_id]->count();
-  const Dtype eps_hat = this->param_.delta();
-
-  switch (Caffe::mode()) {
-    case Caffe::CPU: {
-    // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t
-    caffe_cpu_axpby(N, Dtype(1)-beta1,
-        net_params[param_id]->cpu_diff(), beta1,
-        val_m->mutable_cpu_data());
-
-    // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2
-    caffe_mul(N,
-        net_params[param_id]->cpu_diff(),
-        net_params[param_id]->cpu_diff(),
-    val_t->mutable_cpu_data());
-    caffe_cpu_axpby(N, Dtype(1)-beta2,
-        val_t->cpu_data(), beta2,
-        val_v->mutable_cpu_data());
-
-    // set update
-    caffe_powx(N,
-        val_v->cpu_data(), Dtype(0.5),
-        val_t->mutable_cpu_data());
-    caffe_add_scalar(N, eps_hat, val_t->mutable_cpu_data());
-    caffe_div(N,
-        val_m->cpu_data(),
-        val_t->cpu_data(),
-        val_t->mutable_cpu_data());
-
-    caffe_cpu_scale(N, local_rate*correction,
-        val_t->cpu_data(),
-        net_params[param_id]->mutable_cpu_diff());
-    break;
-  }
-  case Caffe::GPU: {
-#ifndef CPU_ONLY
-    // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t
-    caffe_gpu_axpby(N, Dtype(1)-beta1,
-        net_params[param_id]->gpu_diff(), beta1,
-        val_m->mutable_gpu_data());
-
-    // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2
-    caffe_gpu_mul(N,
-        net_params[param_id]->gpu_diff(),
-        net_params[param_id]->gpu_diff(),
-        val_t->mutable_gpu_data());
-    caffe_gpu_axpby(N, Dtype(1)-beta2,
-        val_t->gpu_data(), beta2,
-        val_v->mutable_gpu_data());
-
-    // set update
-    caffe_gpu_powx(N,
-        val_v->gpu_data(), Dtype(0.5),
-        val_t->mutable_gpu_data());
-    caffe_gpu_add_scalar(N, eps_hat,
-        val_t->mutable_gpu_data());
-    caffe_gpu_div(N,
-        val_m->gpu_data(),
-        val_t->gpu_data(),
-        val_t->mutable_gpu_data());
-
-    caffe_gpu_scale(N, local_rate*correction,
-        val_t->gpu_data(),
-        net_params[param_id]->mutable_gpu_diff());
-#else
-    NO_GPU;
-#endif
-    break;
-  }
-  default:
-    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
-  }
-}
-
 INSTANTIATE_CLASS(Solver);
-INSTANTIATE_CLASS(SGDSolver);
-INSTANTIATE_CLASS(NesterovSolver);
-INSTANTIATE_CLASS(AdaGradSolver);
-INSTANTIATE_CLASS(RMSPropSolver);
-INSTANTIATE_CLASS(AdaDeltaSolver);
-INSTANTIATE_CLASS(AdamSolver);
 
 }  // namespace caffe
diff --git a/src/caffe/solvers/adadelta_solver.cpp b/src/caffe/solvers/adadelta_solver.cpp
new file mode 100644 (file)
index 0000000..a37899e
--- /dev/null
@@ -0,0 +1,156 @@
+#include <vector>
+
+#include "caffe/sgd_solvers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void AdaDeltaSolver<Dtype>::AdaDeltaPreSolve() {
+  // Add the extra history entries for AdaDelta after those from
+  // SGDSolver::PreSolve
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  for (int i = 0; i < net_params.size(); ++i) {
+        const vector<int>& shape = net_params[i]->shape();
+        this->history_.push_back(
+                shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
+  }
+}
+
+template <typename Dtype>
+void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  const vector<float>& net_params_lr = this->net_->params_lr();
+  Dtype delta = this->param_.delta();
+  Dtype momentum = this->param_.momentum();
+  Dtype local_rate = rate * net_params_lr[param_id];
+  size_t update_history_offset = net_params.size();
+  switch (Caffe::mode()) {
+  case Caffe::CPU: {
+    // compute square of gradient in update
+    caffe_powx(net_params[param_id]->count(),
+        net_params[param_id]->cpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // update history of gradients
+    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
+        this->update_[param_id]->cpu_data(), momentum,
+        this->history_[param_id]->mutable_cpu_data());
+
+    // add delta to history to guard against dividing by zero later
+    caffe_set(net_params[param_id]->count(), delta,
+        this->temp_[param_id]->mutable_cpu_data());
+
+    caffe_add(net_params[param_id]->count(),
+        this->temp_[param_id]->cpu_data(),
+        this->history_[update_history_offset + param_id]->cpu_data(),
+        this->update_[param_id]->mutable_cpu_data());
+
+    caffe_add(net_params[param_id]->count(),
+        this->temp_[param_id]->cpu_data(),
+        this->history_[param_id]->cpu_data(),
+        this->temp_[param_id]->mutable_cpu_data());
+
+    // divide history of updates by history of gradients
+    caffe_div(net_params[param_id]->count(),
+        this->update_[param_id]->cpu_data(),
+        this->temp_[param_id]->cpu_data(),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // jointly compute the RMS of both for update and gradient history
+    caffe_powx(net_params[param_id]->count(),
+        this->update_[param_id]->cpu_data(), Dtype(0.5),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // compute the update
+    caffe_mul(net_params[param_id]->count(),
+        net_params[param_id]->cpu_diff(),
+        this->update_[param_id]->cpu_data(),
+        net_params[param_id]->mutable_cpu_diff());
+
+    // compute square of update
+    caffe_powx(net_params[param_id]->count(),
+        net_params[param_id]->cpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // update history of updates
+    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
+        this->update_[param_id]->cpu_data(), momentum,
+        this->history_[update_history_offset + param_id]->mutable_cpu_data());
+
+    // apply learning rate
+    caffe_cpu_scale(net_params[param_id]->count(), local_rate,
+        net_params[param_id]->cpu_diff(),
+        net_params[param_id]->mutable_cpu_diff());
+    break;
+  }
+  case Caffe::GPU: {
+#ifndef CPU_ONLY
+    // compute square of gradient in update
+    caffe_gpu_powx(net_params[param_id]->count(),
+        net_params[param_id]->gpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // update history of gradients
+    caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
+        this->update_[param_id]->gpu_data(), momentum,
+        this->history_[param_id]->mutable_gpu_data());
+
+    // add delta to history to guard against dividing by zero later
+    caffe_gpu_set(net_params[param_id]->count(), delta,
+        this->temp_[param_id]->mutable_gpu_data());
+
+    caffe_gpu_add(net_params[param_id]->count(),
+        this->temp_[param_id]->gpu_data(),
+        this->history_[update_history_offset + param_id]->gpu_data(),
+        this->update_[param_id]->mutable_gpu_data());
+
+    caffe_gpu_add(net_params[param_id]->count(),
+        this->temp_[param_id]->gpu_data(),
+        this->history_[param_id]->gpu_data(),
+        this->temp_[param_id]->mutable_gpu_data());
+
+    // divide history of updates by history of gradients
+    caffe_gpu_div(net_params[param_id]->count(),
+        this->update_[param_id]->gpu_data(),
+        this->temp_[param_id]->gpu_data(),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // jointly compute the RMS of both for update and gradient history
+    caffe_gpu_powx(net_params[param_id]->count(),
+        this->update_[param_id]->gpu_data(), Dtype(0.5),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // compute the update and copy to net_diff
+    caffe_gpu_mul(net_params[param_id]->count(),
+        net_params[param_id]->gpu_diff(),
+        this->update_[param_id]->gpu_data(),
+        net_params[param_id]->mutable_gpu_diff());
+
+    // compute square of update
+    caffe_gpu_powx(net_params[param_id]->count(),
+        net_params[param_id]->gpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // update history of updates
+    caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
+        this->update_[param_id]->gpu_data(), momentum,
+        this->history_[update_history_offset + param_id]->mutable_gpu_data());
+
+    // apply learning rate
+    caffe_gpu_scale(net_params[param_id]->count(), local_rate,
+        net_params[param_id]->gpu_diff(),
+        net_params[param_id]->mutable_gpu_diff());
+#else
+    NO_GPU;
+#endif
+    break;
+  }
+  default:
+    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
+  }
+}
+
+INSTANTIATE_CLASS(AdaDeltaSolver);
+REGISTER_SOLVER_CLASS(AdaDelta);
+
+}  // namespace caffe
diff --git a/src/caffe/solvers/adagrad_solver.cpp b/src/caffe/solvers/adagrad_solver.cpp
new file mode 100644 (file)
index 0000000..5e40632
--- /dev/null
@@ -0,0 +1,89 @@
+#include <vector>
+
+#include "caffe/sgd_solvers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void AdaGradSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
+  CHECK(Caffe::root_solver());
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  const vector<float>& net_params_lr = this->net_->params_lr();
+  Dtype delta = this->param_.delta();
+  Dtype local_rate = rate * net_params_lr[param_id];
+  switch (Caffe::mode()) {
+  case Caffe::CPU: {
+    // compute square of gradient in update
+    caffe_powx(net_params[param_id]->count(),
+        net_params[param_id]->cpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // update history
+    caffe_add(net_params[param_id]->count(),
+        this->update_[param_id]->cpu_data(),
+        this->history_[param_id]->cpu_data(),
+        this->history_[param_id]->mutable_cpu_data());
+
+    // prepare update
+    caffe_powx(net_params[param_id]->count(),
+              this->history_[param_id]->cpu_data(), Dtype(0.5),
+              this->update_[param_id]->mutable_cpu_data());
+
+    caffe_add_scalar(net_params[param_id]->count(),
+              delta, this->update_[param_id]->mutable_cpu_data());
+
+    caffe_div(net_params[param_id]->count(),
+              net_params[param_id]->cpu_diff(),
+              this->update_[param_id]->cpu_data(),
+              this->update_[param_id]->mutable_cpu_data());
+
+    // scale and copy
+    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
+        this->update_[param_id]->cpu_data(), Dtype(0),
+        net_params[param_id]->mutable_cpu_diff());
+    break;
+  }
+  case Caffe::GPU: {
+#ifndef CPU_ONLY
+    // compute square of gradient in update
+    caffe_gpu_powx(net_params[param_id]->count(),
+        net_params[param_id]->gpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // update history
+    caffe_gpu_add(net_params[param_id]->count(),
+        this->update_[param_id]->gpu_data(),
+        this->history_[param_id]->gpu_data(),
+        this->history_[param_id]->mutable_gpu_data());
+
+    // prepare update
+    caffe_gpu_powx(net_params[param_id]->count(),
+              this->history_[param_id]->gpu_data(), Dtype(0.5),
+              this->update_[param_id]->mutable_gpu_data());
+
+    caffe_gpu_add_scalar(net_params[param_id]->count(),
+              delta, this->update_[param_id]->mutable_gpu_data());
+
+    caffe_gpu_div(net_params[param_id]->count(),
+              net_params[param_id]->gpu_diff(),
+              this->update_[param_id]->gpu_data(),
+              this->update_[param_id]->mutable_gpu_data());
+
+    // scale and copy
+    caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
+        this->update_[param_id]->gpu_data(), Dtype(0),
+        net_params[param_id]->mutable_gpu_diff());
+#else
+    NO_GPU;
+#endif
+    break;
+  }
+  default:
+    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
+  }
+}
+
+INSTANTIATE_CLASS(AdaGradSolver);
+REGISTER_SOLVER_CLASS(AdaGrad);
+
+}  // namespace caffe
diff --git a/src/caffe/solvers/adam_solver.cpp b/src/caffe/solvers/adam_solver.cpp
new file mode 100644 (file)
index 0000000..cb0fbfe
--- /dev/null
@@ -0,0 +1,113 @@
+#include <vector>
+
+#include "caffe/sgd_solvers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void AdamSolver<Dtype>::AdamPreSolve() {
+  // Add the extra history entries for Adam after those from
+  // SGDSolver::PreSolve
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  for (int i = 0; i < net_params.size(); ++i) {
+    const vector<int>& shape = net_params[i]->shape();
+    this->history_.push_back(
+            shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
+  }
+}
+
+template <typename Dtype>
+void AdamSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  const vector<float>& net_params_lr = this->net_->params_lr();
+  Dtype local_rate = rate * net_params_lr[param_id];
+  const Dtype beta1 = this->param_.momentum();
+  const Dtype beta2 = this->param_.momentum2();
+
+  // we create aliases for convenience
+  size_t update_history_offset = net_params.size();
+  Blob<Dtype>* val_m = this->history_[param_id].get();
+  Blob<Dtype>* val_v = this->history_[param_id + update_history_offset].get();
+  Blob<Dtype>* val_t = this->temp_[param_id].get();
+
+  const int t = this->iter_  + 1;
+  const Dtype correction = std::sqrt(Dtype(1) - pow(beta2, t)) /
+      (Dtype(1.) - pow(beta1, t));
+  const int N = net_params[param_id]->count();
+  const Dtype eps_hat = this->param_.delta();
+
+  switch (Caffe::mode()) {
+    case Caffe::CPU: {
+    // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t
+    caffe_cpu_axpby(N, Dtype(1)-beta1,
+        net_params[param_id]->cpu_diff(), beta1,
+        val_m->mutable_cpu_data());
+
+    // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2
+    caffe_mul(N,
+        net_params[param_id]->cpu_diff(),
+        net_params[param_id]->cpu_diff(),
+    val_t->mutable_cpu_data());
+    caffe_cpu_axpby(N, Dtype(1)-beta2,
+        val_t->cpu_data(), beta2,
+        val_v->mutable_cpu_data());
+
+    // set update
+    caffe_powx(N,
+        val_v->cpu_data(), Dtype(0.5),
+        val_t->mutable_cpu_data());
+    caffe_add_scalar(N, eps_hat, val_t->mutable_cpu_data());
+    caffe_div(N,
+        val_m->cpu_data(),
+        val_t->cpu_data(),
+        val_t->mutable_cpu_data());
+
+    caffe_cpu_scale(N, local_rate*correction,
+        val_t->cpu_data(),
+        net_params[param_id]->mutable_cpu_diff());
+    break;
+  }
+  case Caffe::GPU: {
+#ifndef CPU_ONLY
+    // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t
+    caffe_gpu_axpby(N, Dtype(1)-beta1,
+        net_params[param_id]->gpu_diff(), beta1,
+        val_m->mutable_gpu_data());
+
+    // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2
+    caffe_gpu_mul(N,
+        net_params[param_id]->gpu_diff(),
+        net_params[param_id]->gpu_diff(),
+        val_t->mutable_gpu_data());
+    caffe_gpu_axpby(N, Dtype(1)-beta2,
+        val_t->gpu_data(), beta2,
+        val_v->mutable_gpu_data());
+
+    // set update
+    caffe_gpu_powx(N,
+        val_v->gpu_data(), Dtype(0.5),
+        val_t->mutable_gpu_data());
+    caffe_gpu_add_scalar(N, eps_hat,
+        val_t->mutable_gpu_data());
+    caffe_gpu_div(N,
+        val_m->gpu_data(),
+        val_t->gpu_data(),
+        val_t->mutable_gpu_data());
+
+    caffe_gpu_scale(N, local_rate*correction,
+        val_t->gpu_data(),
+        net_params[param_id]->mutable_gpu_diff());
+#else
+    NO_GPU;
+#endif
+    break;
+  }
+  default:
+    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
+  }
+}
+
+INSTANTIATE_CLASS(AdamSolver);
+REGISTER_SOLVER_CLASS(Adam);
+
+}  // namespace caffe
diff --git a/src/caffe/solvers/nesterov_solver.cpp b/src/caffe/solvers/nesterov_solver.cpp
new file mode 100644 (file)
index 0000000..34bf01e
--- /dev/null
@@ -0,0 +1,71 @@
+#include <vector>
+
+#include "caffe/sgd_solvers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void NesterovSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
+  CHECK(Caffe::root_solver());
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  const vector<float>& net_params_lr = this->net_->params_lr();
+  Dtype momentum = this->param_.momentum();
+  Dtype local_rate = rate * net_params_lr[param_id];
+  switch (Caffe::mode()) {
+  case Caffe::CPU: {
+    // save history momentum for stepping back
+    caffe_copy(net_params[param_id]->count(),
+        this->history_[param_id]->cpu_data(),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // update history
+    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
+              net_params[param_id]->cpu_diff(), momentum,
+              this->history_[param_id]->mutable_cpu_data());
+
+    // compute update: step back then over step
+    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum,
+        this->history_[param_id]->cpu_data(), -momentum,
+        this->update_[param_id]->mutable_cpu_data());
+
+    // copy
+    caffe_copy(net_params[param_id]->count(),
+        this->update_[param_id]->cpu_data(),
+        net_params[param_id]->mutable_cpu_diff());
+    break;
+  }
+  case Caffe::GPU: {
+#ifndef CPU_ONLY
+    // save history momentum for stepping back
+    caffe_copy(net_params[param_id]->count(),
+        this->history_[param_id]->gpu_data(),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // update history
+    caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
+              net_params[param_id]->gpu_diff(), momentum,
+              this->history_[param_id]->mutable_gpu_data());
+
+    // compute update: step back then over step
+    caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum,
+        this->history_[param_id]->gpu_data(), -momentum,
+        this->update_[param_id]->mutable_gpu_data());
+
+    // copy
+    caffe_copy(net_params[param_id]->count(),
+        this->update_[param_id]->gpu_data(),
+        net_params[param_id]->mutable_gpu_diff());
+#else
+    NO_GPU;
+#endif
+    break;
+  }
+  default:
+    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
+  }
+}
+
+INSTANTIATE_CLASS(NesterovSolver);
+REGISTER_SOLVER_CLASS(Nesterov);
+
+}  // namespace caffe
diff --git a/src/caffe/solvers/rmsprop_solver.cpp b/src/caffe/solvers/rmsprop_solver.cpp
new file mode 100644 (file)
index 0000000..c624767
--- /dev/null
@@ -0,0 +1,85 @@
+#include <vector>
+
+#include "caffe/sgd_solvers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void RMSPropSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  const vector<float>& net_params_lr = this->net_->params_lr();
+
+  // get the learning rate
+  Dtype delta = this->param_.delta();
+  Dtype rms_decay = this->param_.rms_decay();
+  Dtype local_rate = rate * net_params_lr[param_id];
+
+  switch (Caffe::mode()) {
+  case Caffe::CPU:
+    // compute square of gradient in update
+    caffe_powx(net_params[param_id]->count(),
+        net_params[param_id]->cpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // update history
+    caffe_cpu_axpby(net_params[param_id] -> count(),
+        Dtype(1-rms_decay), this->update_[param_id]->cpu_data(),
+        rms_decay, this->history_[param_id]-> mutable_cpu_data());
+
+    // prepare update
+    caffe_powx(net_params[param_id]->count(),
+        this->history_[param_id]->cpu_data(), Dtype(0.5),
+        this->update_[param_id]->mutable_cpu_data());
+
+    caffe_add_scalar(net_params[param_id]->count(),
+        delta, this->update_[param_id]->mutable_cpu_data());
+
+    caffe_div(net_params[param_id]->count(),
+        net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // scale and copy
+    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
+        this->update_[param_id]->cpu_data(), Dtype(0),
+        net_params[param_id]->mutable_cpu_diff());
+    break;
+  case Caffe::GPU:
+#ifndef CPU_ONLY
+    // compute square of gradient in update
+    caffe_gpu_powx(net_params[param_id]->count(),
+        net_params[param_id]->gpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // update history
+    caffe_gpu_axpby(net_params[param_id] -> count(),
+        Dtype(1-rms_decay), this->update_[param_id]->gpu_data(),
+        rms_decay, this->history_[param_id]-> mutable_gpu_data());
+
+    // prepare update
+    caffe_gpu_powx(net_params[param_id]->count(),
+        this->history_[param_id]->gpu_data(), Dtype(0.5),
+        this->update_[param_id]->mutable_gpu_data());
+
+    caffe_gpu_add_scalar(net_params[param_id]->count(),
+        delta, this->update_[param_id]->mutable_gpu_data());
+
+    caffe_gpu_div(net_params[param_id]->count(),
+        net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(),
+        this->update_[param_id]->mutable_gpu_data());
+
+    caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
+        this->update_[param_id]->gpu_data(), Dtype(0),
+        net_params[param_id]->mutable_gpu_diff());
+#else
+    NO_GPU;
+#endif
+    break;
+  default:
+    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
+  }
+}
+
+INSTANTIATE_CLASS(RMSPropSolver);
+REGISTER_SOLVER_CLASS(RMSProp);
+
+}  // namespace caffe
diff --git a/src/caffe/solvers/sgd_solver.cpp b/src/caffe/solvers/sgd_solver.cpp
new file mode 100644 (file)
index 0000000..32bf19b
--- /dev/null
@@ -0,0 +1,348 @@
+#include <string>
+#include <vector>
+
+#include "caffe/sgd_solvers.hpp"
+#include "caffe/util/hdf5.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/util/upgrade_proto.hpp"
+
+namespace caffe {
+
+// Return the current learning rate. The currently implemented learning rate
+// policies are as follows:
+//    - fixed: always return base_lr.
+//    - step: return base_lr * gamma ^ (floor(iter / step))
+//    - exp: return base_lr * gamma ^ iter
+//    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
+//    - multistep: similar to step but it allows non uniform steps defined by
+//      stepvalue
+//    - poly: the effective learning rate follows a polynomial decay, to be
+//      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
+//    - sigmoid: the effective learning rate follows a sigmod decay
+//      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
+//
+// where base_lr, max_iter, gamma, step, stepvalue and power are defined
+// in the solver parameter protocol buffer, and iter is the current iteration.
+template <typename Dtype>
+Dtype SGDSolver<Dtype>::GetLearningRate() {
+  Dtype rate;
+  const string& lr_policy = this->param_.lr_policy();
+  if (lr_policy == "fixed") {
+    rate = this->param_.base_lr();
+  } else if (lr_policy == "step") {
+    this->current_step_ = this->iter_ / this->param_.stepsize();
+    rate = this->param_.base_lr() *
+        pow(this->param_.gamma(), this->current_step_);
+  } else if (lr_policy == "exp") {
+    rate = this->param_.base_lr() * pow(this->param_.gamma(), this->iter_);
+  } else if (lr_policy == "inv") {
+    rate = this->param_.base_lr() *
+        pow(Dtype(1) + this->param_.gamma() * this->iter_,
+            - this->param_.power());
+  } else if (lr_policy == "multistep") {
+    if (this->current_step_ < this->param_.stepvalue_size() &&
+          this->iter_ >= this->param_.stepvalue(this->current_step_)) {
+      this->current_step_++;
+      LOG(INFO) << "MultiStep Status: Iteration " <<
+      this->iter_ << ", step = " << this->current_step_;
+    }
+    rate = this->param_.base_lr() *
+        pow(this->param_.gamma(), this->current_step_);
+  } else if (lr_policy == "poly") {
+    rate = this->param_.base_lr() * pow(Dtype(1.) -
+        (Dtype(this->iter_) / Dtype(this->param_.max_iter())),
+        this->param_.power());
+  } else if (lr_policy == "sigmoid") {
+    rate = this->param_.base_lr() * (Dtype(1.) /
+        (Dtype(1.) + exp(-this->param_.gamma() * (Dtype(this->iter_) -
+          Dtype(this->param_.stepsize())))));
+  } else {
+    LOG(FATAL) << "Unknown learning rate policy: " << lr_policy;
+  }
+  return rate;
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::PreSolve() {
+  // Initialize the history
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  history_.clear();
+  update_.clear();
+  temp_.clear();
+  for (int i = 0; i < net_params.size(); ++i) {
+    const vector<int>& shape = net_params[i]->shape();
+    history_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
+    update_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
+    temp_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
+  }
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::ClipGradients() {
+  const Dtype clip_gradients = this->param_.clip_gradients();
+  if (clip_gradients < 0) { return; }
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  Dtype sumsq_diff = 0;
+  for (int i = 0; i < net_params.size(); ++i) {
+    sumsq_diff += net_params[i]->sumsq_diff();
+  }
+  const Dtype l2norm_diff = std::sqrt(sumsq_diff);
+  if (l2norm_diff > clip_gradients) {
+    Dtype scale_factor = clip_gradients / l2norm_diff;
+    LOG(INFO) << "Gradient clipping: scaling down gradients (L2 norm "
+        << l2norm_diff << " > " << clip_gradients << ") "
+        << "by scale factor " << scale_factor;
+    for (int i = 0; i < net_params.size(); ++i) {
+      net_params[i]->scale_diff(scale_factor);
+    }
+  }
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::ApplyUpdate() {
+  CHECK(Caffe::root_solver());
+  Dtype rate = GetLearningRate();
+  if (this->param_.display() && this->iter_ % this->param_.display() == 0) {
+    LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;
+  }
+  ClipGradients();
+  for (int param_id = 0; param_id < this->net_->learnable_params().size();
+       ++param_id) {
+    Normalize(param_id);
+    Regularize(param_id);
+    ComputeUpdateValue(param_id, rate);
+  }
+  this->net_->Update();
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::Normalize(int param_id) {
+  if (this->param_.iter_size() == 1) { return; }
+  // Scale gradient to counterbalance accumulation.
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  const Dtype accum_normalization = Dtype(1.) / this->param_.iter_size();
+  switch (Caffe::mode()) {
+  case Caffe::CPU: {
+    caffe_scal(net_params[param_id]->count(), accum_normalization,
+        net_params[param_id]->mutable_cpu_diff());
+    break;
+  }
+  case Caffe::GPU: {
+#ifndef CPU_ONLY
+    caffe_gpu_scal(net_params[param_id]->count(), accum_normalization,
+        net_params[param_id]->mutable_gpu_diff());
+#else
+    NO_GPU;
+#endif
+    break;
+  }
+  default:
+    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
+  }
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::Regularize(int param_id) {
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  const vector<float>& net_params_weight_decay =
+      this->net_->params_weight_decay();
+  Dtype weight_decay = this->param_.weight_decay();
+  string regularization_type = this->param_.regularization_type();
+  Dtype local_decay = weight_decay * net_params_weight_decay[param_id];
+  switch (Caffe::mode()) {
+  case Caffe::CPU: {
+    if (local_decay) {
+      if (regularization_type == "L2") {
+        // add weight decay
+        caffe_axpy(net_params[param_id]->count(),
+            local_decay,
+            net_params[param_id]->cpu_data(),
+            net_params[param_id]->mutable_cpu_diff());
+      } else if (regularization_type == "L1") {
+        caffe_cpu_sign(net_params[param_id]->count(),
+            net_params[param_id]->cpu_data(),
+            temp_[param_id]->mutable_cpu_data());
+        caffe_axpy(net_params[param_id]->count(),
+            local_decay,
+            temp_[param_id]->cpu_data(),
+            net_params[param_id]->mutable_cpu_diff());
+      } else {
+        LOG(FATAL) << "Unknown regularization type: " << regularization_type;
+      }
+    }
+    break;
+  }
+  case Caffe::GPU: {
+#ifndef CPU_ONLY
+    if (local_decay) {
+      if (regularization_type == "L2") {
+        // add weight decay
+        caffe_gpu_axpy(net_params[param_id]->count(),
+            local_decay,
+            net_params[param_id]->gpu_data(),
+            net_params[param_id]->mutable_gpu_diff());
+      } else if (regularization_type == "L1") {
+        caffe_gpu_sign(net_params[param_id]->count(),
+            net_params[param_id]->gpu_data(),
+            temp_[param_id]->mutable_gpu_data());
+        caffe_gpu_axpy(net_params[param_id]->count(),
+            local_decay,
+            temp_[param_id]->gpu_data(),
+            net_params[param_id]->mutable_gpu_diff());
+      } else {
+        LOG(FATAL) << "Unknown regularization type: " << regularization_type;
+      }
+    }
+#else
+    NO_GPU;
+#endif
+    break;
+  }
+  default:
+    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
+  }
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  const vector<float>& net_params_lr = this->net_->params_lr();
+  Dtype momentum = this->param_.momentum();
+  Dtype local_rate = rate * net_params_lr[param_id];
+  // Compute the update to history, then copy it to the parameter diff.
+  switch (Caffe::mode()) {
+  case Caffe::CPU: {
+    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
+              net_params[param_id]->cpu_diff(), momentum,
+              history_[param_id]->mutable_cpu_data());
+    caffe_copy(net_params[param_id]->count(),
+        history_[param_id]->cpu_data(),
+        net_params[param_id]->mutable_cpu_diff());
+    break;
+  }
+  case Caffe::GPU: {
+#ifndef CPU_ONLY
+    caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
+              net_params[param_id]->gpu_diff(), momentum,
+              history_[param_id]->mutable_gpu_data());
+    caffe_copy(net_params[param_id]->count(),
+        history_[param_id]->gpu_data(),
+        net_params[param_id]->mutable_gpu_diff());
+#else
+    NO_GPU;
+#endif
+    break;
+  }
+  default:
+    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
+  }
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::SnapshotSolverState(const string& model_filename) {
+  switch (this->param_.snapshot_format()) {
+    case caffe::SolverParameter_SnapshotFormat_BINARYPROTO:
+      SnapshotSolverStateToBinaryProto(model_filename);
+      break;
+    case caffe::SolverParameter_SnapshotFormat_HDF5:
+      SnapshotSolverStateToHDF5(model_filename);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported snapshot format.";
+  }
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::SnapshotSolverStateToBinaryProto(
+    const string& model_filename) {
+  SolverState state;
+  state.set_iter(this->iter_);
+  state.set_learned_net(model_filename);
+  state.set_current_step(this->current_step_);
+  state.clear_history();
+  for (int i = 0; i < history_.size(); ++i) {
+    // Add history
+    BlobProto* history_blob = state.add_history();
+    history_[i]->ToProto(history_blob);
+  }
+  string snapshot_filename = Solver<Dtype>::SnapshotFilename(".solverstate");
+  LOG(INFO)
+    << "Snapshotting solver state to binary proto file " << snapshot_filename;
+  WriteProtoToBinaryFile(state, snapshot_filename.c_str());
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::SnapshotSolverStateToHDF5(
+    const string& model_filename) {
+  string snapshot_filename =
+      Solver<Dtype>::SnapshotFilename(".solverstate.h5");
+  LOG(INFO) << "Snapshotting solver state to HDF5 file " << snapshot_filename;
+  hid_t file_hid = H5Fcreate(snapshot_filename.c_str(), H5F_ACC_TRUNC,
+      H5P_DEFAULT, H5P_DEFAULT);
+  CHECK_GE(file_hid, 0)
+      << "Couldn't open " << snapshot_filename << " to save solver state.";
+  hdf5_save_int(file_hid, "iter", this->iter_);
+  hdf5_save_string(file_hid, "learned_net", model_filename);
+  hdf5_save_int(file_hid, "current_step", this->current_step_);
+  hid_t history_hid = H5Gcreate2(file_hid, "history", H5P_DEFAULT, H5P_DEFAULT,
+      H5P_DEFAULT);
+  CHECK_GE(history_hid, 0)
+      << "Error saving solver state to " << snapshot_filename << ".";
+  for (int i = 0; i < history_.size(); ++i) {
+    ostringstream oss;
+    oss << i;
+    hdf5_save_nd_dataset<Dtype>(history_hid, oss.str(), *history_[i]);
+  }
+  H5Gclose(history_hid);
+  H5Fclose(file_hid);
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::RestoreSolverStateFromBinaryProto(
+    const string& state_file) {
+  SolverState state;
+  ReadProtoFromBinaryFile(state_file, &state);
+  this->iter_ = state.iter();
+  if (state.has_learned_net()) {
+    NetParameter net_param;
+    ReadNetParamsFromBinaryFileOrDie(state.learned_net().c_str(), &net_param);
+    this->net_->CopyTrainedLayersFrom(net_param);
+  }
+  this->current_step_ = state.current_step();
+  CHECK_EQ(state.history_size(), history_.size())
+      << "Incorrect length of history blobs.";
+  LOG(INFO) << "SGDSolver: restoring history";
+  for (int i = 0; i < history_.size(); ++i) {
+    history_[i]->FromProto(state.history(i));
+  }
+}
+
+template <typename Dtype>
+void SGDSolver<Dtype>::RestoreSolverStateFromHDF5(const string& state_file) {
+  hid_t file_hid = H5Fopen(state_file.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+  CHECK_GE(file_hid, 0) << "Couldn't open solver state file " << state_file;
+  this->iter_ = hdf5_load_int(file_hid, "iter");
+  if (H5LTfind_dataset(file_hid, "learned_net")) {
+    string learned_net = hdf5_load_string(file_hid, "learned_net");
+    this->net_->CopyTrainedLayersFrom(learned_net);
+  }
+  this->current_step_ = hdf5_load_int(file_hid, "current_step");
+  hid_t history_hid = H5Gopen2(file_hid, "history", H5P_DEFAULT);
+  CHECK_GE(history_hid, 0) << "Error reading history from " << state_file;
+  int state_history_size = hdf5_get_num_links(history_hid);
+  CHECK_EQ(state_history_size, history_.size())
+      << "Incorrect length of history blobs.";
+  for (int i = 0; i < history_.size(); ++i) {
+    ostringstream oss;
+    oss << i;
+    hdf5_load_nd_dataset<Dtype>(history_hid, oss.str().c_str(), 0,
+                                kMaxBlobAxes, history_[i].get());
+  }
+  H5Gclose(history_hid);
+  H5Fclose(file_hid);
+}
+
+INSTANTIATE_CLASS(SGDSolver);
+REGISTER_SOLVER_CLASS(SGD);
+
+}  // namespace caffe
index 632bf1f..ec4665e 100644 (file)
@@ -1,5 +1,3 @@
-#include <cstring>
-
 #include "caffe/common.hpp"
 #include "caffe/syncedmem.hpp"
 #include "caffe/util/math_functions.hpp"
index ef0e57a..5960a66 100644 (file)
@@ -1,6 +1,4 @@
 #include <cfloat>
-#include <cmath>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -8,8 +6,8 @@
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
+#include "caffe/loss_layers.hpp"
 #include "caffe/util/rng.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 
index bbf1909..f3f2094 100644 (file)
@@ -5,8 +5,8 @@
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 
diff --git a/src/caffe/test/test_batch_reindex_layer.cpp b/src/caffe/test/test_batch_reindex_layer.cpp
new file mode 100644 (file)
index 0000000..17e47f0
--- /dev/null
@@ -0,0 +1,118 @@
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
+#include "caffe/filler.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template<typename TypeParam>
+class BatchReindexLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+  BatchReindexLayerTest()
+      : blob_bottom_(new Blob<Dtype>()),
+        blob_bottom_permute_(new Blob<Dtype>()),
+        blob_top_(new Blob<Dtype>()) {
+  }
+  virtual void SetUp() {
+    Caffe::set_random_seed(1701);
+    vector<int> sz;
+    sz.push_back(5);
+    sz.push_back(4);
+    sz.push_back(3);
+    sz.push_back(2);
+    blob_bottom_->Reshape(sz);
+    vector<int> permsz;
+    permsz.push_back(6);
+    blob_bottom_permute_->Reshape(permsz);
+
+    // fill the values
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    int perm[] = { 4, 0, 4, 0, 1, 2 };
+    for (int i = 0; i < blob_bottom_permute_->count(); ++i) {
+      blob_bottom_permute_->mutable_cpu_data()[i] = perm[i];
+    }
+
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_bottom_vec_.push_back(blob_bottom_permute_);
+    blob_top_vec_.push_back(blob_top_);
+  }
+  virtual ~BatchReindexLayerTest() {
+    delete blob_bottom_permute_;
+    delete blob_bottom_;
+    delete blob_top_;
+  }
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_bottom_permute_;
+  Blob<Dtype>* const blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+
+  void TestForward() {
+    LayerParameter layer_param;
+
+    vector<int> sz;
+    sz.push_back(5);
+    sz.push_back(4);
+    sz.push_back(3);
+    sz.push_back(2);
+    blob_bottom_->Reshape(sz);
+    for (int i = 0; i < blob_bottom_->count(); ++i) {
+      blob_bottom_->mutable_cpu_data()[i] = i;
+    }
+
+    vector<int> permsz;
+    permsz.push_back(6);
+    blob_bottom_permute_->Reshape(permsz);
+    int perm[] = { 4, 0, 4, 0, 1, 2 };
+    for (int i = 0; i < blob_bottom_permute_->count(); ++i) {
+      blob_bottom_permute_->mutable_cpu_data()[i] = perm[i];
+    }
+    BatchReindexLayer<Dtype> layer(layer_param);
+    layer.SetUp(blob_bottom_vec_, blob_top_vec_);
+    EXPECT_EQ(blob_top_->num(), blob_bottom_permute_->num());
+    EXPECT_EQ(blob_top_->channels(), blob_bottom_->channels());
+    EXPECT_EQ(blob_top_->height(), blob_bottom_->height());
+    EXPECT_EQ(blob_top_->width(), blob_bottom_->width());
+
+    layer.Forward(blob_bottom_vec_, blob_top_vec_);
+    int channels = blob_top_->channels();
+    int height = blob_top_->height();
+    int width = blob_top_->width();
+    for (int i = 0; i < blob_top_->count(); ++i) {
+      int n = i / (channels * width * height);
+      int inner_idx = (i % (channels * width * height));
+      EXPECT_EQ(
+          blob_top_->cpu_data()[i],
+          blob_bottom_->cpu_data()[perm[n] * channels * width * height
+              + inner_idx]);
+    }
+  }
+};
+
+TYPED_TEST_CASE(BatchReindexLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(BatchReindexLayerTest, TestForward) {
+  this->TestForward();
+}
+
+TYPED_TEST(BatchReindexLayerTest, TestGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  BatchReindexLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-4, 1e-2);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+  }
+
+}  // namespace caffe
index 7da6423..a9d7d51 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index b3a61b0..58ae5c6 100644 (file)
@@ -1,5 +1,3 @@
-#include <cstring>
-
 #include "gtest/gtest.h"
 
 #include "caffe/common.hpp"
index ccd97eb..8ba51f4 100644 (file)
@@ -1,12 +1,11 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 1e9447c..592997e 100644 (file)
@@ -1,7 +1,5 @@
 #include <algorithm>
 #include <cmath>
-#include <cstdlib>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -9,7 +7,7 @@
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/loss_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 9df979a..b474735 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index 770e7b2..b473dbb 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index 8031f6e..3b56c5c 100644 (file)
@@ -5,8 +5,8 @@
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 7a4fb98..0f4caf1 100644 (file)
@@ -1,12 +1,11 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 1949742..9dc14de 100644 (file)
@@ -1,6 +1,4 @@
 #include <cmath>
-#include <cstdlib>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -8,7 +6,7 @@
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/loss_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 728b8dc..26e9b21 100644 (file)
@@ -1,5 +1,3 @@
-#include <cstring>
-
 #include "gtest/gtest.h"
 
 #include "caffe/filler.hpp"
index c641b6e..a2d0c29 100644 (file)
@@ -1,13 +1,11 @@
-#include <cstring>
-#include <limits>
 #include <vector>
 
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 7b6757c..5d1caac 100644 (file)
@@ -1,12 +1,11 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 7ad7467..84c6747 100644 (file)
@@ -10,7 +10,7 @@
 #include "caffe/common.hpp"
 #include "caffe/parallel.hpp"
 #include "caffe/proto/caffe.pb.h"
-#include "caffe/solver.hpp"
+#include "caffe/sgd_solvers.hpp"
 #include "caffe/util/io.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
@@ -47,7 +47,6 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
   // Test data: check out generate_sample_data.py in the same directory.
   string* input_file_;
 
-  virtual SolverParameter_SolverType solver_type() = 0;
   virtual void InitSolver(const SolverParameter& param) = 0;
 
   virtual void InitSolverFromProtoString(const string& proto) {
@@ -290,8 +289,8 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
           ((i == D) ? bias.cpu_data()[0] : weights.cpu_data()[i]);
       // Finally, compute update.
       const vector<shared_ptr<Blob<Dtype> > >& history = solver_->history();
-      if (solver_type() != SolverParameter_SolverType_ADADELTA
-          && solver_type() != SolverParameter_SolverType_ADAM) {
+      if (solver_->type() != string("AdaDelta")
+          && solver_->type() != string("Adam")) {
         ASSERT_EQ(2, history.size());  // 1 blob for weights, 1 for bias
       } else {
         ASSERT_EQ(4, history.size());  // additional blobs for update history
@@ -300,26 +299,19 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
       const Dtype history_value = (i == D) ?
             history[1]->cpu_data()[0] : history[0]->cpu_data()[i];
       const Dtype temp = momentum * history_value;
-      switch (solver_type()) {
-      case SolverParameter_SolverType_SGD:
+      if (solver_->type() == string("SGD")) {
         update_value += temp;
-        break;
-      case SolverParameter_SolverType_NESTEROV:
+      } else if (solver_->type() == string("Nesterov")) {
         update_value += temp;
         // step back then over-step
         update_value = (1 + momentum) * update_value - temp;
-        break;
-      case SolverParameter_SolverType_ADAGRAD:
+      } else if (solver_->type() == string("AdaGrad")) {
         update_value /= std::sqrt(history_value + grad * grad) + delta_;
-        break;
-      case SolverParameter_SolverType_RMSPROP: {
+      } else if (solver_->type() == string("RMSProp")) {
         const Dtype rms_decay = 0.95;
         update_value /= std::sqrt(rms_decay*history_value
             + grad * grad * (1 - rms_decay)) + delta_;
-        }
-        break;
-      case SolverParameter_SolverType_ADADELTA:
-      {
+      } else if (solver_->type() == string("AdaDelta")) {
         const Dtype update_history_value = (i == D) ?
             history[1 + num_param_blobs]->cpu_data()[0] :
             history[0 + num_param_blobs]->cpu_data()[i];
@@ -330,9 +322,7 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
         // not actually needed, just here for illustrative purposes
         // const Dtype weighted_update_average =
         //   momentum * update_history_value + (1 - momentum) * (update_value);
-        break;
-      }
-      case SolverParameter_SolverType_ADAM: {
+      } else if (solver_->type() == string("Adam")) {
         const Dtype momentum2 = 0.999;
         const Dtype m = history_value;
         const Dtype v = (i == D) ?
@@ -344,10 +334,8 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
             std::sqrt(Dtype(1) - pow(momentum2, num_iters)) /
             (Dtype(1.) - pow(momentum, num_iters));
         update_value = alpha_t * val_m / (std::sqrt(val_v) + delta_);
-        break;
-      }
-      default:
-        LOG(FATAL) << "Unknown solver type: " << solver_type();
+      } else {
+        LOG(FATAL) << "Unknown solver type: " << solver_->type();
       }
       if (i == D) {
         updated_bias.mutable_cpu_diff()[0] = update_value;
@@ -392,7 +380,7 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
     EXPECT_NEAR(expected_updated_bias, solver_updated_bias, error_margin);
 
     // Check the solver's history -- should contain the previous update value.
-    if (solver_type() == SolverParameter_SolverType_SGD) {
+    if (solver_->type() == string("SGD")) {
       const vector<shared_ptr<Blob<Dtype> > >& history = solver_->history();
       ASSERT_EQ(2, history.size());
       for (int i = 0; i < D; ++i) {
@@ -581,10 +569,6 @@ class SGDSolverTest : public GradientBasedSolverTest<TypeParam> {
   virtual void InitSolver(const SolverParameter& param) {
     this->solver_.reset(new SGDSolver<Dtype>(param));
   }
-
-  virtual SolverParameter_SolverType solver_type() {
-    return SolverParameter_SolverType_SGD;
-  }
 };
 
 TYPED_TEST_CASE(SGDSolverTest, TestDtypesAndDevices);
@@ -721,9 +705,6 @@ class AdaGradSolverTest : public GradientBasedSolverTest<TypeParam> {
   virtual void InitSolver(const SolverParameter& param) {
     this->solver_.reset(new AdaGradSolver<Dtype>(param));
   }
-  virtual SolverParameter_SolverType solver_type() {
-    return SolverParameter_SolverType_ADAGRAD;
-  }
 };
 
 TYPED_TEST_CASE(AdaGradSolverTest, TestDtypesAndDevices);
@@ -824,9 +805,6 @@ class NesterovSolverTest : public GradientBasedSolverTest<TypeParam> {
   virtual void InitSolver(const SolverParameter& param) {
     this->solver_.reset(new NesterovSolver<Dtype>(param));
   }
-  virtual SolverParameter_SolverType solver_type() {
-    return SolverParameter_SolverType_NESTEROV;
-  }
 };
 
 TYPED_TEST_CASE(NesterovSolverTest, TestDtypesAndDevices);
@@ -960,10 +938,6 @@ class AdaDeltaSolverTest : public GradientBasedSolverTest<TypeParam> {
   virtual void InitSolver(const SolverParameter& param) {
     this->solver_.reset(new AdaDeltaSolver<Dtype>(param));
   }
-
-  virtual SolverParameter_SolverType solver_type() {
-    return SolverParameter_SolverType_ADADELTA;
-  }
 };
 
 TYPED_TEST_CASE(AdaDeltaSolverTest, TestDtypesAndDevices);
@@ -1098,9 +1072,6 @@ class AdamSolverTest : public GradientBasedSolverTest<TypeParam> {
     new_param.set_momentum2(momentum2);
     this->solver_.reset(new AdamSolver<Dtype>(new_param));
   }
-  virtual SolverParameter_SolverType solver_type() {
-    return SolverParameter_SolverType_ADAM;
-  }
 };
 
 TYPED_TEST_CASE(AdamSolverTest, TestDtypesAndDevices);
@@ -1201,9 +1172,6 @@ class RMSPropSolverTest : public GradientBasedSolverTest<TypeParam> {
     new_param.set_rms_decay(rms_decay);
     this->solver_.reset(new RMSPropSolver<Dtype>(new_param));
   }
-  virtual SolverParameter_SolverType solver_type() {
-    return SolverParameter_SolverType_RMSPROP;
-  }
 };
 
 TYPED_TEST_CASE(RMSPropSolverTest, TestDtypesAndDevices);
index b56277b..adc27df 100644 (file)
@@ -5,10 +5,10 @@
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/data_layers.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/util/hdf5.hpp"
 #include "caffe/util/io.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 
index c9b027f..7169e7b 100644 (file)
@@ -5,9 +5,8 @@
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
-#include "caffe/filler.hpp"
+#include "caffe/data_layers.hpp"
 #include "caffe/proto/caffe.pb.h"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 
index b6a9902..dfdd01d 100644 (file)
@@ -1,6 +1,4 @@
 #include <cmath>
-#include <cstdlib>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -8,7 +6,7 @@
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/loss_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index f0b75fc..bafcacf 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index 293aa26..ec055b2 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index 481fcef..7769024 100644 (file)
@@ -7,10 +7,10 @@
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/data_layers.hpp"
 #include "caffe/filler.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/util/io.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 
index 7ec2f80..b2a6754 100644 (file)
@@ -1,6 +1,3 @@
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index fbf0c85..1ad2c97 100644 (file)
@@ -1,12 +1,11 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index c4e2f8e..bd1c4fe 100644 (file)
@@ -1,5 +1,4 @@
 #include <algorithm>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -246,5 +245,201 @@ TYPED_TEST(LRNLayerTest, TestGradientWithinChannel) {
       this->blob_top_vec_);
 }
 
+#ifdef USE_CUDNN
+template <typename Dtype>
+class CuDNNLRNLayerTest : public GPUDeviceTest<Dtype> {
+ protected:
+  CuDNNLRNLayerTest()
+      : epsilon_(Dtype(1e-5)),
+        blob_bottom_(new Blob<Dtype>()),
+        blob_top_(new Blob<Dtype>()) {}
+  virtual void SetUp() {
+    Caffe::set_random_seed(1701);
+    blob_bottom_->Reshape(2, 7, 3, 3);
+    // fill the values
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_top_vec_.push_back(blob_top_);
+  }
+  virtual ~CuDNNLRNLayerTest() { delete blob_bottom_; delete blob_top_; }
+  void ReferenceLRNForward(const Blob<Dtype>& blob_bottom,
+      const LayerParameter& layer_param, Blob<Dtype>* blob_top);
+
+  Dtype epsilon_;
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+template <typename TypeParam>
+void CuDNNLRNLayerTest<TypeParam>::ReferenceLRNForward(
+    const Blob<TypeParam>& blob_bottom, const LayerParameter& layer_param,
+    Blob<TypeParam>* blob_top) {
+  typedef TypeParam Dtype;
+  blob_top->Reshape(blob_bottom.num(), blob_bottom.channels(),
+      blob_bottom.height(), blob_bottom.width());
+  Dtype* top_data = blob_top->mutable_cpu_data();
+  LRNParameter lrn_param = layer_param.lrn_param();
+  Dtype alpha = lrn_param.alpha();
+  Dtype beta = lrn_param.beta();
+  int size = lrn_param.local_size();
+  switch (lrn_param.norm_region()) {
+  case LRNParameter_NormRegion_ACROSS_CHANNELS:
+    for (int n = 0; n < blob_bottom.num(); ++n) {
+      for (int c = 0; c < blob_bottom.channels(); ++c) {
+        for (int h = 0; h < blob_bottom.height(); ++h) {
+          for (int w = 0; w < blob_bottom.width(); ++w) {
+            int c_start = c - (size - 1) / 2;
+            int c_end = min(c_start + size, blob_bottom.channels());
+            c_start = max(c_start, 0);
+            Dtype scale = 1.;
+            for (int i = c_start; i < c_end; ++i) {
+              Dtype value = blob_bottom.data_at(n, i, h, w);
+              scale += value * value * alpha / size;
+            }
+            *(top_data + blob_top->offset(n, c, h, w)) =
+              blob_bottom.data_at(n, c, h, w) / pow(scale, beta);
+          }
+        }
+      }
+    }
+    break;
+  case LRNParameter_NormRegion_WITHIN_CHANNEL:
+    for (int n = 0; n < blob_bottom.num(); ++n) {
+      for (int c = 0; c < blob_bottom.channels(); ++c) {
+        for (int h = 0; h < blob_bottom.height(); ++h) {
+          int h_start = h - (size - 1) / 2;
+          int h_end = min(h_start + size, blob_bottom.height());
+          h_start = max(h_start, 0);
+          for (int w = 0; w < blob_bottom.width(); ++w) {
+            Dtype scale = 1.;
+            int w_start = w - (size - 1) / 2;
+            int w_end = min(w_start + size, blob_bottom.width());
+            w_start = max(w_start, 0);
+            for (int nh = h_start; nh < h_end; ++nh) {
+              for (int nw = w_start; nw < w_end; ++nw) {
+                Dtype value = blob_bottom.data_at(n, c, nh, nw);
+                scale += value * value * alpha / (size * size);
+              }
+            }
+            *(top_data + blob_top->offset(n, c, h, w)) =
+              blob_bottom.data_at(n, c, h, w) / pow(scale, beta);
+          }
+        }
+      }
+    }
+    break;
+  default:
+    LOG(FATAL) << "Unknown normalization region.";
+  }
+}
+
+TYPED_TEST_CASE(CuDNNLRNLayerTest, TestDtypes);
+
+TYPED_TEST(CuDNNLRNLayerTest, TestForwardAcrossChannelsCuDNN) {
+  // typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  CuDNNLRNLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  Blob<TypeParam> top_reference;
+  this->ReferenceLRNForward(*(this->blob_bottom_), layer_param,
+      &top_reference);
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    EXPECT_NEAR(this->blob_top_->cpu_data()[i], top_reference.cpu_data()[i],
+                this->epsilon_);
+  }
+}
+
+TYPED_TEST(CuDNNLRNLayerTest, TestForwardAcrossChannelsLargeRegionCuDNN) {
+  typedef TypeParam Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_lrn_param()->set_local_size(15);
+  CuDNNLRNLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  Blob<Dtype> top_reference;
+  this->ReferenceLRNForward(*(this->blob_bottom_), layer_param,
+      &top_reference);
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    EXPECT_NEAR(this->blob_top_->cpu_data()[i], top_reference.cpu_data()[i],
+                this->epsilon_);
+  }
+}
+
+TYPED_TEST(CuDNNLRNLayerTest, TestGradientAcrossChannelsCuDNN) {
+  typedef TypeParam Dtype;
+  LayerParameter layer_param;
+  CuDNNLRNLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-2);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    this->blob_top_->mutable_cpu_diff()[i] = 1.;
+  }
+  vector<bool> propagate_down(this->blob_bottom_vec_.size(), true);
+  layer.Backward(this->blob_top_vec_, propagate_down,
+                 this->blob_bottom_vec_);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+
+TYPED_TEST(CuDNNLRNLayerTest, TestForwardWithinChannel) {
+  typedef TypeParam Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_lrn_param()->set_norm_region(
+      LRNParameter_NormRegion_WITHIN_CHANNEL);
+  layer_param.mutable_lrn_param()->set_local_size(3);
+  CuDNNLCNLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  Blob<Dtype> top_reference;
+  this->ReferenceLRNForward(*(this->blob_bottom_), layer_param,
+      &top_reference);
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    EXPECT_NEAR(this->blob_top_->cpu_data()[i], top_reference.cpu_data()[i],
+                this->epsilon_);
+  }
+}
+
+TYPED_TEST(CuDNNLRNLayerTest, TestGradientWithinChannel) {
+  typedef TypeParam Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_lrn_param()->set_norm_region(
+      LRNParameter_NormRegion_WITHIN_CHANNEL);
+  layer_param.mutable_lrn_param()->set_local_size(3);
+  CuDNNLCNLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-2);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    this->blob_top_->mutable_cpu_diff()[i] = 1.;
+  }
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+
+TYPED_TEST(CuDNNLRNLayerTest, TestGradientAcrossChannelsLargeRegionCuDNN) {
+  typedef TypeParam Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_lrn_param()->set_local_size(15);
+  CuDNNLRNLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-2);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    this->blob_top_->mutable_cpu_diff()[i] = 1.;
+  }
+  vector<bool> propagate_down(this->blob_bottom_vec_.size(), true);
+  layer.Backward(this->blob_top_vec_, propagate_down,
+                 this->blob_bottom_vec_);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+
+#endif
 
 }  // namespace caffe
index a095b54..fbee3f9 100644 (file)
@@ -1,8 +1,6 @@
 #include <stdint.h>  // for uint32_t & uint64_t
 #include <time.h>
-#include <climits>
 #include <cmath>  // for std::fabs
-#include <cstdlib>  // for rand_r
 
 #include "gtest/gtest.h"
 
index b2db984..0404aa2 100644 (file)
@@ -1,6 +1,3 @@
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -8,7 +5,7 @@
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/loss_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index be23d86..e9a7d54 100644 (file)
@@ -1,5 +1,3 @@
-#include <cmath>
-#include <cstring>
 #include <vector>
 
 #include "caffe/blob.hpp"
index c6e4d27..b333fde 100644 (file)
@@ -1,5 +1,4 @@
 #include <algorithm>
-#include <cstring>
 #include <vector>
 
 #include "google/protobuf/text_format.h"
@@ -7,8 +6,9 @@
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 69f2d5c..9e986e6 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index 76c9e85..1041ddd 100644 (file)
@@ -6,7 +6,7 @@
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index f568a18..a8d4372 100644 (file)
@@ -1,12 +1,11 @@
-#include <algorithm>
 #include <vector>
 
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 9d08ec6..e0f4ba4 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index e5737e4..b4f831c 100644 (file)
@@ -1,6 +1,4 @@
 #include <cmath>
-#include <cstdlib>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -8,7 +6,7 @@
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/loss_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 2d2d0fd..45fbcff 100644 (file)
@@ -1,12 +1,11 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 996da4b..4b01f5c 100644 (file)
@@ -1,13 +1,12 @@
 #include <cmath>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 1498d5c..0ae4cd6 100644 (file)
@@ -1,6 +1,4 @@
 #include <cmath>
-#include <cstdlib>
-#include <cstring>
 #include <vector>
 
 #include "boost/scoped_ptr.hpp"
@@ -9,7 +7,7 @@
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/loss_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index ceabc9c..b181642 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "caffe/common.hpp"
 #include "caffe/proto/caffe.pb.h"
+#include "caffe/sgd_solvers.hpp"
 #include "caffe/solver.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
diff --git a/src/caffe/test/test_solver_factory.cpp b/src/caffe/test/test_solver_factory.cpp
new file mode 100644 (file)
index 0000000..eef5290
--- /dev/null
@@ -0,0 +1,50 @@
+#include <map>
+#include <string>
+
+#include "boost/scoped_ptr.hpp"
+#include "google/protobuf/text_format.h"
+#include "gtest/gtest.h"
+
+#include "caffe/common.hpp"
+#include "caffe/solver.hpp"
+#include "caffe/solver_factory.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class SolverFactoryTest : public MultiDeviceTest<TypeParam> {
+ protected:
+  SolverParameter simple_solver_param() {
+    const string solver_proto =
+        "train_net_param { "
+        "  layer { "
+        "    name: 'data' type: 'DummyData' top: 'data' "
+        "    dummy_data_param { shape { dim: 1 } } "
+        "  } "
+        "} ";
+    SolverParameter solver_param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(
+        solver_proto, &solver_param));
+    return solver_param;
+  }
+};
+
+TYPED_TEST_CASE(SolverFactoryTest, TestDtypesAndDevices);
+
+TYPED_TEST(SolverFactoryTest, TestCreateSolver) {
+  typedef typename TypeParam::Dtype Dtype;
+  typename SolverRegistry<Dtype>::CreatorRegistry& registry =
+      SolverRegistry<Dtype>::Registry();
+  shared_ptr<Solver<Dtype> > solver;
+  SolverParameter solver_param = this->simple_solver_param();
+  for (typename SolverRegistry<Dtype>::CreatorRegistry::iterator iter =
+       registry.begin(); iter != registry.end(); ++iter) {
+    solver_param.set_type(iter->first);
+    solver.reset(SolverRegistry<Dtype>::CreateSolver(solver_param));
+    EXPECT_EQ(iter->first, solver->type());
+  }
+}
+
+}  // namespace caffe
index be5204b..e27e355 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <string>
 #include <vector>
 
@@ -7,10 +6,10 @@
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/util/insert_splits.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index b2585f1..1b48a84 100644 (file)
@@ -1,5 +1,3 @@
-#include <algorithm>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index f84464c..5a412bd 100644 (file)
@@ -1,5 +1,4 @@
 #include <algorithm>
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index b946233..16dfb58 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
index 5dc9283..f31579c 100644 (file)
@@ -5,8 +5,8 @@
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
-#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
+#include "caffe/neuron_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index 05ce821..903a9bc 100644 (file)
@@ -5,7 +5,7 @@
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
+#include "caffe/neuron_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 
index 540aac3..5c45960 100644 (file)
@@ -1,12 +1,11 @@
-#include <cstring>
 #include <vector>
 
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
 #include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/test/test_gradient_check_util.hpp"
index ee05b15..9dcc2aa 100644 (file)
@@ -1,4 +1,3 @@
-#include <cstring>
 #include <string>
 #include <vector>
 
@@ -2892,7 +2891,6 @@ TEST_F(NetUpgradeTest, TestImageNet) {
   this->RunV1UpgradeTest(expected_v1_proto, expected_v2_proto);
 }  // NOLINT(readability/fn_size)
 
-#ifdef USE_OPENCV
 TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) {
   LayerParameter layer_param;
   shared_ptr<Layer<float> > layer;
@@ -2927,5 +2925,65 @@ TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) {
     EXPECT_EQ(v2_layer_type, layer->type());
   }
 }
-#endif  // USE_OPENCV
+
+class SolverTypeUpgradeTest : public ::testing::Test {
+ protected:
+  void RunSolverTypeUpgradeTest(
+      const string& input_param_string, const string& output_param_string) {
+    // Test upgrading old solver_type field (enum) to new type field (string)
+    SolverParameter input_param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(
+        input_param_string, &input_param));
+    SolverParameter expected_output_param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(
+        output_param_string, &expected_output_param));
+    SolverParameter actual_output_param = input_param;
+    UpgradeSolverType(&actual_output_param);
+    EXPECT_EQ(expected_output_param.DebugString(),
+        actual_output_param.DebugString());
+  }
+};
+
+TEST_F(SolverTypeUpgradeTest, TestSimple) {
+  const char* old_type_vec[6] = { "SGD", "ADAGRAD", "NESTEROV", "RMSPROP",
+      "ADADELTA", "ADAM" };
+  const char* new_type_vec[6] = { "SGD", "AdaGrad", "Nesterov", "RMSProp",
+      "AdaDelta", "Adam" };
+  for (int i = 0; i < 6; ++i) {
+    const string& input_proto =
+        "net: 'examples/mnist/lenet_train_test.prototxt' "
+        "test_iter: 100 "
+        "test_interval: 500 "
+        "base_lr: 0.01 "
+        "momentum: 0.0 "
+        "weight_decay: 0.0005 "
+        "lr_policy: 'inv' "
+        "gamma: 0.0001 "
+        "power: 0.75 "
+        "display: 100 "
+        "max_iter: 10000 "
+        "snapshot: 5000 "
+        "snapshot_prefix: 'examples/mnist/lenet_rmsprop' "
+        "solver_mode: GPU "
+        "solver_type: " + std::string(old_type_vec[i]) + " ";
+    const string& expected_output_proto =
+        "net: 'examples/mnist/lenet_train_test.prototxt' "
+        "test_iter: 100 "
+        "test_interval: 500 "
+        "base_lr: 0.01 "
+        "momentum: 0.0 "
+        "weight_decay: 0.0005 "
+        "lr_policy: 'inv' "
+        "gamma: 0.0001 "
+        "power: 0.75 "
+        "display: 100 "
+        "max_iter: 10000 "
+        "snapshot: 5000 "
+        "snapshot_prefix: 'examples/mnist/lenet_rmsprop' "
+        "solver_mode: GPU "
+        "type: '" + std::string(new_type_vec[i]) + "' ";
+    this->RunSolverTypeUpgradeTest(input_proto, expected_output_proto);
+  }
+}
+
 }  // NOLINT(readability/fn_size)  // namespace caffe
index 8770f30..9ee8818 100644 (file)
@@ -1,7 +1,5 @@
 #ifndef CPU_ONLY  // CPU-GPU test
 
-#include <cstring>
-
 #include "gtest/gtest.h"
 
 #include "caffe/blob.hpp"
index 78dd880..0bc82b5 100644 (file)
@@ -19,7 +19,22 @@ void LMDB::Open(const string& source, Mode mode) {
   if (mode == READ) {
     flags = MDB_RDONLY | MDB_NOTLS;
   }
-  MDB_CHECK(mdb_env_open(mdb_env_, source.c_str(), flags, 0664));
+  int rc = mdb_env_open(mdb_env_, source.c_str(), flags, 0664);
+#ifndef ALLOW_LMDB_NOLOCK
+  MDB_CHECK(rc);
+#else
+  if (rc == EACCES) {
+    LOG(WARNING) << "Permission denied. Trying with MDB_NOLOCK ...";
+    // Close and re-open environment handle
+    mdb_env_close(mdb_env_);
+    MDB_CHECK(mdb_env_create(&mdb_env_));
+    // Try again with MDB_NOLOCK
+    flags |= MDB_NOLOCK;
+    MDB_CHECK(mdb_env_open(mdb_env_, source.c_str(), flags, 0664));
+  } else {
+    MDB_CHECK(rc);
+  }
+#endif
   LOG(INFO) << "Opened lmdb " << source;
 }
 
index b0a7be5..27e5b7c 100644 (file)
@@ -1,6 +1,3 @@
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
 #include <vector>
 
 #include "caffe/util/im2col.hpp"
@@ -14,22 +11,20 @@ void im2col_cpu(const Dtype* data_im, const int channels,
     const int pad_h, const int pad_w,
     const int stride_h, const int stride_w,
     Dtype* data_col) {
-  int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
-  int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
-  int channels_col = channels * kernel_h * kernel_w;
-  for (int c = 0; c < channels_col; ++c) {
-    int w_offset = c % kernel_w;
-    int h_offset = (c / kernel_w) % kernel_h;
-    int c_im = c / kernel_h / kernel_w;
-    for (int h = 0; h < height_col; ++h) {
-      for (int w = 0; w < width_col; ++w) {
-        int h_pad = h * stride_h - pad_h + h_offset;
-        int w_pad = w * stride_w - pad_w + w_offset;
-        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
-          data_col[(c * height_col + h) * width_col + w] =
-            data_im[(c_im * height + h_pad) * width + w_pad];
-        else
-          data_col[(c * height_col + h) * width_col + w] = 0;
+  const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
+  const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+  const int channels_col = channels * kernel_h * kernel_w;
+  for (int c_col = 0; c_col < channels_col; ++c_col) {
+    int w_offset = c_col % kernel_w;
+    int h_offset = (c_col / kernel_w) % kernel_h;
+    int c_im = c_col / kernel_h / kernel_w;
+    for (int h_col = 0; h_col < height_col; ++h_col) {
+      for (int w_col = 0; w_col < width_col; ++w_col) {
+        int h_im = h_col * stride_h - pad_h + h_offset;
+        int w_im = w_col * stride_w - pad_w + w_offset;
+        data_col[(c_col * height_col + h_col) * width_col + w_col] =
+            (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
+            data_im[(c_im * height + h_im) * width + w_im] : 0;
       }
     }
   }
@@ -64,9 +59,9 @@ inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col,
   const int channels_col = col_shape[0];
   vector<int> d_offset(num_spatial_axes, 0);
   vector<int> d_iter(num_spatial_axes, 0);
-  for (int c = 0; c < channels_col; ++c) {
+  for (int c_col = 0; c_col < channels_col; ++c_col) {
     // Loop over spatial axes in reverse order to compute a per-axis offset.
-    int offset = c;
+    int offset = c_col;
     for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) {
       if (d_i < num_spatial_axes - 1) {
         offset /= kernel_shape[d_i + 1];
@@ -76,17 +71,17 @@ inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col,
     for (bool incremented = true; incremented; ) {
       // Loop over spatial axes in forward order to compute the indices in the
       // image and column, and whether the index lies in the padding.
-      int index_col = c;
-      int index_im = c / kernel_size;
+      int index_col = c_col;
+      int index_im = c_col / kernel_size;
       bool is_padding = false;
       for (int d_i = 0; d_i < num_spatial_axes; ++d_i) {
         const int d = d_iter[d_i];
-        const int d_pad = d * stride[d_i] - pad[d_i] + d_offset[d_i];
-        is_padding |= d_pad < 0 || d_pad >= im_shape[d_i + 1];
+        const int d_im = d * stride[d_i] - pad[d_i] + d_offset[d_i];
+        is_padding |= d_im < 0 || d_im >= im_shape[d_i + 1];
         index_col *= col_shape[d_i + 1];
         index_col += d;
         index_im *= im_shape[d_i + 1];
-        index_im += d_pad;
+        index_im += d_im;
       }
       if (im2col) {
         if (is_padding) {
@@ -139,25 +134,25 @@ template void im2col_nd_cpu<double>(const double* data_im,
 
 template <typename Dtype>
 void col2im_cpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
+    const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w,
     const int stride_h, const int stride_w,
     Dtype* data_im) {
   caffe_set(height * width * channels, Dtype(0), data_im);
-  int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
-  int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
-  int channels_col = channels * patch_h * patch_w;
-  for (int c = 0; c < channels_col; ++c) {
-    int w_offset = c % patch_w;
-    int h_offset = (c / patch_w) % patch_h;
-    int c_im = c / patch_h / patch_w;
-    for (int h = 0; h < height_col; ++h) {
-      for (int w = 0; w < width_col; ++w) {
-        int h_pad = h * stride_h - pad_h + h_offset;
-        int w_pad = w * stride_w - pad_w + w_offset;
-        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
-          data_im[(c_im * height + h_pad) * width + w_pad] +=
-              data_col[(c * height_col + h) * width_col + w];
+  const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
+  const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+  const int channels_col = channels * kernel_h * kernel_w;
+  for (int c_col = 0; c_col < channels_col; ++c_col) {
+    int w_offset = c_col % kernel_w;
+    int h_offset = (c_col / kernel_w) % kernel_h;
+    int c_im = c_col / kernel_h / kernel_w;
+    for (int h_col = 0; h_col < height_col; ++h_col) {
+      for (int w_col = 0; w_col < width_col; ++w_col) {
+        int h_im = h_col * stride_h - pad_h + h_offset;
+        int w_im = w_col * stride_w - pad_w + w_offset;
+        if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
+          data_im[(c_im * height + h_im) * width + w_im] +=
+              data_col[(c_col * height_col + h_col) * width_col + w_col];
       }
     }
   }
@@ -165,11 +160,11 @@ void col2im_cpu(const Dtype* data_col, const int channels,
 
 // Explicit instantiation
 template void col2im_cpu<float>(const float* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
+    const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h,
     const int stride_w, float* data_im);
 template void col2im_cpu<double>(const double* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
+    const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h,
     const int stride_w, double* data_im);
 
index 5a478ba..49354ab 100644 (file)
@@ -1,7 +1,4 @@
 #include <algorithm>
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
 
 #include "caffe/common.hpp"
 #include "caffe/util/im2col.hpp"
@@ -16,22 +13,23 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
     const int height_col, const int width_col,
     Dtype* data_col) {
   CUDA_KERNEL_LOOP(index, n) {
-    int w_out = index % width_col;
-    int h_index = index / width_col;
-    int h_out = h_index % height_col;
-    int channel_in = h_index / height_col;
-    int channel_out = channel_in * kernel_h * kernel_w;
-    int h_in = h_out * stride_h - pad_h;
-    int w_in = w_out * stride_w - pad_w;
+    const int h_index = index / width_col;
+    const int h_col = h_index % height_col;
+    const int w_col = index % width_col;
+    const int c_im = h_index / height_col;
+    const int c_col = c_im * kernel_h * kernel_w;
+    const int h_offset = h_col * stride_h - pad_h;
+    const int w_offset = w_col * stride_w - pad_w;
     Dtype* data_col_ptr = data_col;
-    data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out;
+    data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
     const Dtype* data_im_ptr = data_im;
-    data_im_ptr += (channel_in * height + h_in) * width + w_in;
+    data_im_ptr += (c_im * height + h_offset) * width + w_offset;
     for (int i = 0; i < kernel_h; ++i) {
       for (int j = 0; j < kernel_w; ++j) {
-        int h = h_in + i;
-        int w = w_in + j;
-        *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
+        int h_im = h_offset + i;
+        int w_im = w_offset + j;
+        *data_col_ptr =
+            (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
             data_im_ptr[i * width + j] : 0;
         data_col_ptr += height_col * width_col;
       }
@@ -222,35 +220,39 @@ template void im2col_nd_gpu<double>(const double* data_im,
 template <typename Dtype>
 __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col,
     const int height, const int width, const int channels,
-    const int patch_h, const int patch_w,
+    const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w,
     const int stride_h, const int stride_w,
     const int height_col, const int width_col,
     Dtype* data_im) {
   CUDA_KERNEL_LOOP(index, n) {
     Dtype val = 0;
-    int w = index % width + pad_w;
-    int h = (index / width) % height + pad_h;
-    int c = index / (width * height);
+    const int w_im = index % width + pad_w;
+    const int h_im = (index / width) % height + pad_h;
+    const int c_im = index / (width * height);
     // compute the start and end of the output
-    int w_col_start = (w < patch_w) ? 0 : (w - patch_w) / stride_w + 1;
-    int w_col_end = min(w / stride_w + 1, width_col);
-    int h_col_start = (h < patch_h) ? 0 : (h - patch_h) / stride_h + 1;
-    int h_col_end = min(h / stride_h + 1, height_col);
+    const int w_col_start =
+        (w_im < kernel_w) ? 0 : (w_im - kernel_w) / stride_w + 1;
+    const int w_col_end =
+        min(w_im / stride_w + 1, width_col);
+    const int h_col_start =
+        (h_im < kernel_h) ? 0 : (h_im - kernel_h) / stride_h + 1;
+    const int h_col_end =
+        min(h_im / stride_h + 1, height_col);
     /*
     for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
       for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
         // the col location: [c * width * height + h_out, w_out]
-        int c_col = c * patch_h * patch_w + (h - h_col * stride_h) * ksize
-            + (w - w_col * stride_w);
+        int c_col = c_im * kernel_h * kernel_w
+            + (h_im - h_col * stride_h) * kernel_w + (w_im - w_col * stride_w);
         val += data_col[(c_col * height_col + h_col) * width_col + w_col];
       }
     }
     */
     // equivalent implementation
-    int offset =
-        (c * patch_h * patch_w + h * patch_w + w) * height_col * width_col;
-    int coeff_h_col = (1 - stride_h * patch_w * height_col) * width_col;
+    int offset = (c_im * kernel_h * kernel_w + h_im * kernel_w + w_im)
+        * height_col * width_col;
+    int coeff_h_col = (1 - stride_h * kernel_w * height_col) * width_col;
     int coeff_w_col = (1 - stride_w * height_col * width_col);
     for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
       for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
@@ -263,18 +265,18 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col,
 
 template <typename Dtype>
 void col2im_gpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
+    const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h,
     const int stride_w, Dtype* data_im) {
-  int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
-  int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
+  int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
   int num_kernels = channels * height * width;
   // To avoid involving atomic operations, we will launch one kernel per
   // bottom dimension, and then in the kernel add up the top dimensions.
   // NOLINT_NEXT_LINE(whitespace/operators)
   col2im_gpu_kernel<Dtype><<<CAFFE_GET_BLOCKS(num_kernels),
                              CAFFE_CUDA_NUM_THREADS>>>(
-      num_kernels, data_col, height, width, channels, patch_h, patch_w,
+      num_kernels, data_col, height, width, channels, kernel_h, kernel_w,
       pad_h, pad_w, stride_h, stride_w,
       height_col, width_col, data_im);
   CUDA_POST_KERNEL_CHECK;
@@ -282,11 +284,11 @@ void col2im_gpu(const Dtype* data_col, const int channels,
 
 // Explicit instantiation
 template void col2im_gpu<float>(const float* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
+    const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h,
     const int stride_w, float* data_im);
 template void col2im_gpu<double>(const double* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
+    const int height, const int width, const int kernel_h, const int kernel_w,
     const int pad_h, const int pad_w, const int stride_h,
     const int stride_w, double* data_im);
 
@@ -302,11 +304,11 @@ __global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col,
   CUDA_KERNEL_LOOP(index, n) {
     // Initialize channel_in, computed in the loop below, with intermediate
     // computations used to compute the spatial indices.
-    int channel_im = index;
+    int c_im = index;
     // Calculate d_im (image dimensions).
     for (int i = num_axes - 1; i >= 0; --i) {
-      d_im[i] = channel_im % im_shape[i + 1] + pad[i];
-      channel_im /= im_shape[i + 1];
+      d_im[i] = c_im % im_shape[i + 1] + pad[i];
+      c_im /= im_shape[i + 1];
     }
     // Calculate col start/end indices.
     bool done = false;
@@ -338,7 +340,7 @@ __global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col,
             (d_im[i] - d_col_iter[i] * stride[i]) * kernel_shape_prod;
         kernel_shape_prod *= kernel_shape[i];
       }
-      final_offset += kernel_shape_prod * channel_im;
+      final_offset += kernel_shape_prod * c_im;
       for (int i = 0; i < num_axes; ++i) {
         final_offset *= col_shape[i + 1];
         final_offset += d_col_iter[i];
index 2631a07..e4d0c4b 100644 (file)
@@ -4,8 +4,6 @@
 #include <thrust/reduce.h>
 
 #include <cmath>
-#include <cstdlib>
-#include <cstring>
 
 #include "caffe/common.hpp"
 #include "caffe/util/math_functions.hpp"
index ac379e5..ff3f8ff 100644 (file)
@@ -16,6 +16,67 @@ bool NetNeedsUpgrade(const NetParameter& net_param) {
   return NetNeedsV0ToV1Upgrade(net_param) || NetNeedsV1ToV2Upgrade(net_param);
 }
 
+bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) {
+  bool success = true;
+  if (NetNeedsV0ToV1Upgrade(*param)) {
+    // NetParameter was specified using the old style (V0LayerParameter); try to
+    // upgrade it.
+    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
+              << "V0LayerParameter: " << param_file;
+    NetParameter original_param(*param);
+    if (!UpgradeV0Net(original_param, param)) {
+      success = false;
+      LOG(ERROR) << "Warning: had one or more problems upgrading "
+          << "V0NetParameter to NetParameter (see above); continuing anyway.";
+    } else {
+      LOG(INFO) << "Successfully upgraded file specified using deprecated "
+                << "V0LayerParameter";
+    }
+    LOG(WARNING) << "Note that future Caffe releases will not support "
+        << "V0NetParameter; use ./build/tools/upgrade_net_proto_text for "
+        << "prototxt and ./build/tools/upgrade_net_proto_binary for model "
+        << "weights upgrade this and any other net protos to the new format.";
+  }
+  // NetParameter uses old style data transformation fields; try to upgrade it.
+  if (NetNeedsDataUpgrade(*param)) {
+    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
+              << "transformation parameters: " << param_file;
+    UpgradeNetDataTransformation(param);
+    LOG(INFO) << "Successfully upgraded file specified using deprecated "
+              << "data transformation parameters.";
+    LOG(WARNING) << "Note that future Caffe releases will only support "
+                 << "transform_param messages for transformation fields.";
+  }
+  if (NetNeedsV1ToV2Upgrade(*param)) {
+    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
+              << "V1LayerParameter: " << param_file;
+    NetParameter original_param(*param);
+    if (!UpgradeV1Net(original_param, param)) {
+      success = false;
+      LOG(ERROR) << "Warning: had one or more problems upgrading "
+                 << "V1LayerParameter (see above); continuing anyway.";
+    } else {
+      LOG(INFO) << "Successfully upgraded file specified using deprecated "
+                << "V1LayerParameter";
+    }
+  }
+  return success;
+}
+
+void ReadNetParamsFromTextFileOrDie(const string& param_file,
+                                    NetParameter* param) {
+  CHECK(ReadProtoFromTextFile(param_file, param))
+      << "Failed to parse NetParameter file: " << param_file;
+  UpgradeNetAsNeeded(param_file, param);
+}
+
+void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
+                                      NetParameter* param) {
+  CHECK(ReadProtoFromBinaryFile(param_file, param))
+      << "Failed to parse NetParameter file: " << param_file;
+  UpgradeNetAsNeeded(param_file, param);
+}
+
 bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param) {
   for (int i = 0; i < net_param.layers_size(); ++i) {
     if (net_param.layers(i).has_layer()) {
@@ -583,53 +644,6 @@ void UpgradeNetDataTransformation(NetParameter* net_param) {
   }
 }
 
-bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) {
-  bool success = true;
-  if (NetNeedsV0ToV1Upgrade(*param)) {
-    // NetParameter was specified using the old style (V0LayerParameter); try to
-    // upgrade it.
-    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
-              << "V0LayerParameter: " << param_file;
-    NetParameter original_param(*param);
-    if (!UpgradeV0Net(original_param, param)) {
-      success = false;
-      LOG(ERROR) << "Warning: had one or more problems upgrading "
-          << "V0NetParameter to NetParameter (see above); continuing anyway.";
-    } else {
-      LOG(INFO) << "Successfully upgraded file specified using deprecated "
-                << "V0LayerParameter";
-    }
-    LOG(WARNING) << "Note that future Caffe releases will not support "
-        << "V0NetParameter; use ./build/tools/upgrade_net_proto_text for "
-        << "prototxt and ./build/tools/upgrade_net_proto_binary for model "
-        << "weights upgrade this and any other net protos to the new format.";
-  }
-  // NetParameter uses old style data transformation fields; try to upgrade it.
-  if (NetNeedsDataUpgrade(*param)) {
-    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
-              << "transformation parameters: " << param_file;
-    UpgradeNetDataTransformation(param);
-    LOG(INFO) << "Successfully upgraded file specified using deprecated "
-              << "data transformation parameters.";
-    LOG(WARNING) << "Note that future Caffe releases will only support "
-                 << "transform_param messages for transformation fields.";
-  }
-  if (NetNeedsV1ToV2Upgrade(*param)) {
-    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
-              << "V1LayerParameter: " << param_file;
-    NetParameter original_param(*param);
-    if (!UpgradeV1Net(original_param, param)) {
-      success = false;
-      LOG(ERROR) << "Warning: had one or more problems upgrading "
-                 << "V1LayerParameter (see above); continuing anyway.";
-    } else {
-      LOG(INFO) << "Successfully upgraded file specified using deprecated "
-                << "V1LayerParameter";
-    }
-  }
-  return success;
-}
-
 bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) {
   bool is_fully_compatible = true;
   if (v1_net_param.layer_size() > 0) {
@@ -923,18 +937,78 @@ const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type) {
   }
 }
 
-void ReadNetParamsFromTextFileOrDie(const string& param_file,
-                                    NetParameter* param) {
-  CHECK(ReadProtoFromTextFile(param_file, param))
-      << "Failed to parse NetParameter file: " << param_file;
-  UpgradeNetAsNeeded(param_file, param);
+// Return true iff the solver contains any old solver_type specified as enums
+bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param) {
+  if (solver_param.has_solver_type()) {
+    return true;
+  }
+  return false;
 }
 
-void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
-                                      NetParameter* param) {
-  CHECK(ReadProtoFromBinaryFile(param_file, param))
-      << "Failed to parse NetParameter file: " << param_file;
-  UpgradeNetAsNeeded(param_file, param);
+bool UpgradeSolverType(SolverParameter* solver_param) {
+  CHECK(!solver_param->has_solver_type() || !solver_param->has_type())
+      << "Failed to upgrade solver: old solver_type field (enum) and new type "
+      << "field (string) cannot be both specified in solver proto text.";
+  if (solver_param->has_solver_type()) {
+    string type;
+    switch (solver_param->solver_type()) {
+    case SolverParameter_SolverType_SGD:
+      type = "SGD";
+      break;
+    case SolverParameter_SolverType_NESTEROV:
+      type = "Nesterov";
+      break;
+    case SolverParameter_SolverType_ADAGRAD:
+      type = "AdaGrad";
+      break;
+    case SolverParameter_SolverType_RMSPROP:
+      type = "RMSProp";
+      break;
+    case SolverParameter_SolverType_ADADELTA:
+      type = "AdaDelta";
+      break;
+    case SolverParameter_SolverType_ADAM:
+      type = "Adam";
+      break;
+    default:
+      LOG(FATAL) << "Unknown SolverParameter solver_type: " << type;
+    }
+    solver_param->set_type(type);
+    solver_param->clear_solver_type();
+  } else {
+    LOG(ERROR) << "Warning: solver type already up to date. ";
+    return false;
+  }
+  return true;
+}
+
+// Check for deprecations and upgrade the SolverParameter as needed.
+bool UpgradeSolverAsNeeded(const string& param_file, SolverParameter* param) {
+  bool success = true;
+  // Try to upgrade old style solver_type enum fields into new string type
+  if (SolverNeedsTypeUpgrade(*param)) {
+    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
+              << "'solver_type' field (enum)': " << param_file;
+    if (!UpgradeSolverType(param)) {
+      success = false;
+      LOG(ERROR) << "Warning: had one or more problems upgrading "
+                 << "SolverType (see above).";
+    } else {
+      LOG(INFO) << "Successfully upgraded file specified using deprecated "
+                << "'solver_type' field (enum) to 'type' field (string).";
+      LOG(WARNING) << "Note that future Caffe releases will only support "
+                   << "'type' field (string) for a solver's type.";
+    }
+  }
+  return success;
+}
+
+// Read parameters from a file into a SolverParameter proto message.
+void ReadSolverParamsFromTextFileOrDie(const string& param_file,
+                                       SolverParameter* param) {
+  CHECK(ReadProtoFromTextFile(param_file, param))
+      << "Failed to parse SolverParameter file: " << param_file;
+  UpgradeSolverAsNeeded(param_file, param);
 }
 
 }  // namespace caffe
index e3f684b..305cfc3 100644 (file)
@@ -157,7 +157,7 @@ int train() {
       "but not both.";
 
   caffe::SolverParameter solver_param;
-  caffe::ReadProtoFromTextFileOrDie(FLAGS_solver, &solver_param);
+  caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);
 
   // If the gpus flag is not provided, allow the mode and device to be set
   // in the solver prototxt.
@@ -194,7 +194,7 @@ int train() {
         GetRequestedAction(FLAGS_sighup_effect));
 
   shared_ptr<caffe::Solver<float> >
-    solver(caffe::GetSolver<float>(solver_param));
+      solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));
 
   solver->SetActionFunction(signal_handler.GetActionFunction());
 
diff --git a/tools/upgrade_solver_proto_text.cpp b/tools/upgrade_solver_proto_text.cpp
new file mode 100644 (file)
index 0000000..7130232
--- /dev/null
@@ -0,0 +1,50 @@
+// This is a script to upgrade old solver prototxts to the new format.
+// Usage:
+//    upgrade_solver_proto_text old_solver_proto_file_in solver_proto_file_out
+
+#include <cstring>
+#include <fstream>  // NOLINT(readability/streams)
+#include <iostream>  // NOLINT(readability/streams)
+#include <string>
+
+#include "caffe/caffe.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/util/upgrade_proto.hpp"
+
+using std::ofstream;
+
+using namespace caffe;  // NOLINT(build/namespaces)
+
+int main(int argc, char** argv) {
+  ::google::InitGoogleLogging(argv[0]);
+  if (argc != 3) {
+    LOG(ERROR) << "Usage: upgrade_solver_proto_text "
+        << "old_solver_proto_file_in solver_proto_file_out";
+    return 1;
+  }
+
+  SolverParameter solver_param;
+  string input_filename(argv[1]);
+  if (!ReadProtoFromTextFile(input_filename, &solver_param)) {
+    LOG(ERROR) << "Failed to parse input text file as SolverParameter: "
+               << input_filename;
+    return 2;
+  }
+  bool need_upgrade = SolverNeedsTypeUpgrade(solver_param);
+  bool success = true;
+  if (need_upgrade) {
+    success = UpgradeSolverAsNeeded(input_filename, &solver_param);
+    if (!success) {
+      LOG(ERROR) << "Encountered error(s) while upgrading prototxt; "
+                 << "see details above.";
+    }
+  } else {
+    LOG(ERROR) << "File already in latest proto format: " << input_filename;
+  }
+
+  // Save new format prototxt.
+  WriteProtoToTextFile(solver_param, argv[2]);
+
+  LOG(ERROR) << "Wrote upgraded SolverParameter text proto to " << argv[2];
+  return !success;
+}