Swap order of lrn and max pooling layers to reflect Krizhevsky
architecture. Initialize biases to 0.1 where Krizhevsky had 1, as
training by initialization with 1 has flat loss.
--- /dev/null
+name: "AlexNet"
+input: "data"
+input_dim: 10
+input_dim: 3
+input_dim: 227
+input_dim: 227
+layers {
+ layer {
+ name: "conv1"
+ type: "conv"
+ num_output: 96
+ kernelsize: 11
+ stride: 4
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "data"
+ top: "conv1"
+}
+layers {
+ layer {
+ name: "relu1"
+ type: "relu"
+ }
+ bottom: "conv1"
+ top: "conv1"
+}
+layers {
+ layer {
+ name: "norm1"
+ type: "lrn"
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+ bottom: "conv1"
+ top: "norm1"
+}
+layers {
+ layer {
+ name: "pool1"
+ type: "pool"
+ pool: MAX
+ kernelsize: 3
+ stride: 2
+ }
+ bottom: "norm1"
+ top: "pool1"
+}
+layers {
+ layer {
+ name: "conv2"
+ type: "conv"
+ num_output: 256
+ group: 2
+ kernelsize: 5
+ pad: 2
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "pool1"
+ top: "conv2"
+}
+layers {
+ layer {
+ name: "relu2"
+ type: "relu"
+ }
+ bottom: "conv2"
+ top: "conv2"
+}
+layers {
+ layer {
+ name: "norm2"
+ type: "lrn"
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+ bottom: "conv2"
+ top: "norm2"
+}
+layers {
+ layer {
+ name: "pool2"
+ type: "pool"
+ pool: MAX
+ kernelsize: 3
+ stride: 2
+ }
+ bottom: "norm2"
+ top: "pool2"
+}
+layers {
+ layer {
+ name: "conv3"
+ type: "conv"
+ num_output: 384
+ kernelsize: 3
+ pad: 1
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "pool2"
+ top: "conv3"
+}
+layers {
+ layer {
+ name: "relu3"
+ type: "relu"
+ }
+ bottom: "conv3"
+ top: "conv3"
+}
+layers {
+ layer {
+ name: "conv4"
+ type: "conv"
+ num_output: 384
+ group: 2
+ kernelsize: 3
+ pad: 1
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "conv3"
+ top: "conv4"
+}
+layers {
+ layer {
+ name: "relu4"
+ type: "relu"
+ }
+ bottom: "conv4"
+ top: "conv4"
+}
+layers {
+ layer {
+ name: "conv5"
+ type: "conv"
+ num_output: 256
+ group: 2
+ kernelsize: 3
+ pad: 1
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "conv4"
+ top: "conv5"
+}
+layers {
+ layer {
+ name: "relu5"
+ type: "relu"
+ }
+ bottom: "conv5"
+ top: "conv5"
+}
+layers {
+ layer {
+ name: "pool5"
+ type: "pool"
+ kernelsize: 3
+ pool: MAX
+ stride: 2
+ }
+ bottom: "conv5"
+ top: "pool5"
+}
+layers {
+ layer {
+ name: "fc6"
+ type: "innerproduct"
+ num_output: 4096
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "pool5"
+ top: "fc6"
+}
+layers {
+ layer {
+ name: "relu6"
+ type: "relu"
+ }
+ bottom: "fc6"
+ top: "fc6"
+}
+layers {
+ layer {
+ name: "drop6"
+ type: "dropout"
+ dropout_ratio: 0.5
+ }
+ bottom: "fc6"
+ top: "fc6"
+}
+layers {
+ layer {
+ name: "fc7"
+ type: "innerproduct"
+ num_output: 4096
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "fc6"
+ top: "fc7"
+}
+layers {
+ layer {
+ name: "relu7"
+ type: "relu"
+ }
+ bottom: "fc7"
+ top: "fc7"
+}
+layers {
+ layer {
+ name: "drop7"
+ type: "dropout"
+ dropout_ratio: 0.5
+ }
+ bottom: "fc7"
+ top: "fc7"
+}
+layers {
+ layer {
+ name: "fc8"
+ type: "innerproduct"
+ num_output: 1000
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "fc7"
+ top: "fc8"
+}
+layers {
+ layer {
+ name: "prob"
+ type: "softmax"
+ }
+ bottom: "fc8"
+ top: "prob"
+}
--- /dev/null
+train_net: "alexnet_train.prototxt"
+test_net: "alexnet_val.prototxt"
+test_iter: 1000
+test_interval: 1000
+base_lr: 0.01
+lr_policy: "step"
+gamma: 0.1
+stepsize: 100000
+display: 20
+max_iter: 450000
+momentum: 0.9
+weight_decay: 0.0005
+snapshot: 10000
+snapshot_prefix: "caffe_alexnet_train"
--- /dev/null
+name: "AlexNet"
+layers {
+ layer {
+ name: "data"
+ type: "data"
+ source: "ilsvrc12_train_leveldb"
+ meanfile: "../../data/ilsvrc12/imagenet_mean.binaryproto"
+ batchsize: 256
+ cropsize: 227
+ mirror: true
+ }
+ top: "data"
+ top: "label"
+}
+layers {
+ layer {
+ name: "conv1"
+ type: "conv"
+ num_output: 96
+ kernelsize: 11
+ stride: 4
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.
+ }
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "data"
+ top: "conv1"
+}
+layers {
+ layer {
+ name: "relu1"
+ type: "relu"
+ }
+ bottom: "conv1"
+ top: "conv1"
+}
+layers {
+ layer {
+ name: "norm1"
+ type: "lrn"
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+ bottom: "conv1"
+ top: "norm1"
+}
+layers {
+ layer {
+ name: "pool1"
+ type: "pool"
+ pool: MAX
+ kernelsize: 3
+ stride: 2
+ }
+ bottom: "norm1"
+ top: "pool1"
+}
+layers {
+ layer {
+ name: "conv2"
+ type: "conv"
+ num_output: 256
+ group: 2
+ kernelsize: 5
+ pad: 2
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "pool1"
+ top: "conv2"
+}
+layers {
+ layer {
+ name: "relu2"
+ type: "relu"
+ }
+ bottom: "conv2"
+ top: "conv2"
+}
+layers {
+ layer {
+ name: "norm2"
+ type: "lrn"
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+ bottom: "conv2"
+ top: "norm2"
+}
+layers {
+ layer {
+ name: "pool2"
+ type: "pool"
+ pool: MAX
+ kernelsize: 3
+ stride: 2
+ }
+ bottom: "norm2"
+ top: "pool2"
+}
+layers {
+ layer {
+ name: "conv3"
+ type: "conv"
+ num_output: 384
+ kernelsize: 3
+ pad: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.
+ }
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "pool2"
+ top: "conv3"
+}
+layers {
+ layer {
+ name: "relu3"
+ type: "relu"
+ }
+ bottom: "conv3"
+ top: "conv3"
+}
+layers {
+ layer {
+ name: "conv4"
+ type: "conv"
+ num_output: 384
+ group: 2
+ kernelsize: 3
+ pad: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "conv3"
+ top: "conv4"
+}
+layers {
+ layer {
+ name: "relu4"
+ type: "relu"
+ }
+ bottom: "conv4"
+ top: "conv4"
+}
+layers {
+ layer {
+ name: "conv5"
+ type: "conv"
+ num_output: 256
+ group: 2
+ kernelsize: 3
+ pad: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "conv4"
+ top: "conv5"
+}
+layers {
+ layer {
+ name: "relu5"
+ type: "relu"
+ }
+ bottom: "conv5"
+ top: "conv5"
+}
+layers {
+ layer {
+ name: "pool5"
+ type: "pool"
+ kernelsize: 3
+ pool: MAX
+ stride: 2
+ }
+ bottom: "conv5"
+ top: "pool5"
+}
+layers {
+ layer {
+ name: "fc6"
+ type: "innerproduct"
+ num_output: 4096
+ weight_filler {
+ type: "gaussian"
+ std: 0.005
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "pool5"
+ top: "fc6"
+}
+layers {
+ layer {
+ name: "relu6"
+ type: "relu"
+ }
+ bottom: "fc6"
+ top: "fc6"
+}
+layers {
+ layer {
+ name: "drop6"
+ type: "dropout"
+ dropout_ratio: 0.5
+ }
+ bottom: "fc6"
+ top: "fc6"
+}
+layers {
+ layer {
+ name: "fc7"
+ type: "innerproduct"
+ num_output: 4096
+ weight_filler {
+ type: "gaussian"
+ std: 0.005
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "fc6"
+ top: "fc7"
+}
+layers {
+ layer {
+ name: "relu7"
+ type: "relu"
+ }
+ bottom: "fc7"
+ top: "fc7"
+}
+layers {
+ layer {
+ name: "drop7"
+ type: "dropout"
+ dropout_ratio: 0.5
+ }
+ bottom: "fc7"
+ top: "fc7"
+}
+layers {
+ layer {
+ name: "fc8"
+ type: "innerproduct"
+ num_output: 1000
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.
+ }
+ blobs_lr: 1.
+ blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
+ }
+ bottom: "fc7"
+ top: "fc8"
+}
+layers {
+ layer {
+ name: "loss"
+ type: "softmax_loss"
+ }
+ bottom: "fc8"
+ bottom: "label"
+}
--- /dev/null
+name: "AlexNet"
+layers {
+ layer {
+ name: "data"
+ type: "data"
+ source: "ilsvrc12_val_leveldb"
+ meanfile: "../../data/ilsvrc12/imagenet_mean.binaryproto"
+ batchsize: 50
+ cropsize: 227
+ mirror: false
+ }
+ top: "data"
+ top: "label"
+}
+layers {
+ layer {
+ name: "conv1"
+ type: "conv"
+ num_output: 96
+ kernelsize: 11
+ stride: 4
+ }
+ bottom: "data"
+ top: "conv1"
+}
+layers {
+ layer {
+ name: "relu1"
+ type: "relu"
+ }
+ bottom: "conv1"
+ top: "conv1"
+}
+layers {
+ layer {
+ name: "norm1"
+ type: "lrn"
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+ bottom: "conv1"
+ top: "norm1"
+}
+layers {
+ layer {
+ name: "pool1"
+ type: "pool"
+ pool: MAX
+ kernelsize: 3
+ stride: 2
+ }
+ bottom: "norm1"
+ top: "pool1"
+}
+layers {
+ layer {
+ name: "conv2"
+ type: "conv"
+ num_output: 256
+ group: 2
+ kernelsize: 5
+ pad: 2
+ }
+ bottom: "pool1"
+ top: "conv2"
+}
+layers {
+ layer {
+ name: "relu2"
+ type: "relu"
+ }
+ bottom: "conv2"
+ top: "conv2"
+}
+layers {
+ layer {
+ name: "norm2"
+ type: "lrn"
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+ bottom: "conv2"
+ top: "norm2"
+}
+layers {
+ layer {
+ name: "pool2"
+ type: "pool"
+ pool: MAX
+ kernelsize: 3
+ stride: 2
+ }
+ bottom: "norm2"
+ top: "pool2"
+}
+layers {
+ layer {
+ name: "conv3"
+ type: "conv"
+ num_output: 384
+ kernelsize: 3
+ pad: 1
+ }
+ bottom: "pool2"
+ top: "conv3"
+}
+layers {
+ layer {
+ name: "relu3"
+ type: "relu"
+ }
+ bottom: "conv3"
+ top: "conv3"
+}
+layers {
+ layer {
+ name: "conv4"
+ type: "conv"
+ num_output: 384
+ group: 2
+ kernelsize: 3
+ pad: 1
+ }
+ bottom: "conv3"
+ top: "conv4"
+}
+layers {
+ layer {
+ name: "relu4"
+ type: "relu"
+ }
+ bottom: "conv4"
+ top: "conv4"
+}
+layers {
+ layer {
+ name: "conv5"
+ type: "conv"
+ num_output: 256
+ group: 2
+ kernelsize: 3
+ pad: 1
+ }
+ bottom: "conv4"
+ top: "conv5"
+}
+layers {
+ layer {
+ name: "relu5"
+ type: "relu"
+ }
+ bottom: "conv5"
+ top: "conv5"
+}
+layers {
+ layer {
+ name: "pool5"
+ type: "pool"
+ kernelsize: 3
+ pool: MAX
+ stride: 2
+ }
+ bottom: "conv5"
+ top: "pool5"
+}
+layers {
+ layer {
+ name: "fc6"
+ type: "innerproduct"
+ num_output: 4096
+ }
+ bottom: "pool5"
+ top: "fc6"
+}
+layers {
+ layer {
+ name: "relu6"
+ type: "relu"
+ }
+ bottom: "fc6"
+ top: "fc6"
+}
+layers {
+ layer {
+ name: "drop6"
+ type: "dropout"
+ dropout_ratio: 0.5
+ }
+ bottom: "fc6"
+ top: "fc6"
+}
+layers {
+ layer {
+ name: "fc7"
+ type: "innerproduct"
+ num_output: 4096
+ }
+ bottom: "fc6"
+ top: "fc7"
+}
+layers {
+ layer {
+ name: "relu7"
+ type: "relu"
+ }
+ bottom: "fc7"
+ top: "fc7"
+}
+layers {
+ layer {
+ name: "drop7"
+ type: "dropout"
+ dropout_ratio: 0.5
+ }
+ bottom: "fc7"
+ top: "fc7"
+}
+layers {
+ layer {
+ name: "fc8"
+ type: "innerproduct"
+ num_output: 1000
+ }
+ bottom: "fc7"
+ top: "fc8"
+}
+layers {
+ layer {
+ name: "prob"
+ type: "softmax"
+ }
+ bottom: "fc8"
+ top: "prob"
+}
+layers {
+ layer {
+ name: "accuracy"
+ type: "accuracy"
+ }
+ bottom: "prob"
+ bottom: "label"
+ top: "accuracy"
+}
--- /dev/null
+#!/usr/bin/env sh
+
+TOOLS=../../build/tools
+
+GLOG_logtostderr=1 $TOOLS/train_net.bin alexnet_solver.prototxt
+
+echo "Done."