working on translator
authorYangqing Jia <jiayq84@gmail.com>
Tue, 8 Oct 2013 20:20:35 +0000 (13:20 -0700)
committerYangqing Jia <jiayq84@gmail.com>
Tue, 8 Oct 2013 20:20:35 +0000 (13:20 -0700)
12 files changed:
src/caffe/net.cpp
src/caffe/optimization/solver.cpp
src/caffe/pyutil/translator/__init__.py [new file with mode: 0644]
src/caffe/pyutil/translator/registerer.py [new file with mode: 0644]
src/caffe/pyutil/translator/translator_cmrnorm.py [new file with mode: 0644]
src/caffe/pyutil/translator/translator_conv.py [new file with mode: 0644]
src/caffe/pyutil/translator/translator_fc.py [new file with mode: 0644]
src/caffe/pyutil/translator/translator_neuron.py [new file with mode: 0644]
src/caffe/pyutil/translator/translator_pool.py [new file with mode: 0644]
src/caffe/pyutil/translator/translator_softmax.py [new file with mode: 0644]
src/caffe/util/io.cpp
src/programs/dump_network.cpp [new file with mode: 0644]

index 6b5e4af..6795ccc 100644 (file)
@@ -150,6 +150,10 @@ void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
     CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
         << "Incompatible number of blobs for layer " << source_layer_name;
     for (int j = 0; j < target_blobs.size(); ++j) {
+      CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num());
+      CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels());
+      CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height());
+      CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width());
       target_blobs[j]->FromProto(source_layer.blobs(j));
     }
   }
index 2b2656d..d841383 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/net.hpp"
+#include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/optimization/solver.hpp"
 
@@ -54,10 +55,7 @@ void Solver<Dtype>::Snapshot(bool is_final) {
   }
   string filename = ss.str();
   LOG(ERROR) << "Snapshotting to " << filename;
-  ofstream output_file;
-  output_file.open(filename.c_str());
-  CHECK(net_param.SerializeToOstream(&output_file));
-  output_file.close();
+  WriteProtoToBinaryFile(net_param, filename.c_str());
 }
 
 template <typename Dtype>
diff --git a/src/caffe/pyutil/translator/__init__.py b/src/caffe/pyutil/translator/__init__.py
new file mode 100644 (file)
index 0000000..0375f10
--- /dev/null
@@ -0,0 +1,18 @@
+"""This module implements a translator model that is able to convert a network
+trained using Alex Krizhevsky's cuda-convnet code to a caffe net. It is
+implemented for debugging reason, and also for easier translation with layers
+trained under cuda-convnet.
+"""
+
+# first of all, import the registerer
+# pylint: disable=W0401
+from caffe.pyutil.translator.registerer import *
+from caffe.pyutil.translator.conversions import *
+
+# In the lines below, we will import all the translators we implemented.
+import translator_cmrnorm
+import translator_conv
+import translator_fc
+import translator_neuron
+import translator_pool
+import translator_softmax
diff --git a/src/caffe/pyutil/translator/registerer.py b/src/caffe/pyutil/translator/registerer.py
new file mode 100644 (file)
index 0000000..3232196
--- /dev/null
@@ -0,0 +1,82 @@
+"""registerer is a simple module that allows one to register a custom
+translator for a specific cuda layer.
+"""
+
+from caffe.proto import caffe_pb2
+import logging
+
+# DATA_TYPENAME is the typename for the data layers at cuda convnet.
+DATA_TYPENAME = 'data'
+# likewise, cost typename
+COST_TYPENAME = 'cost'
+# _TRANSLATORS is a dictionary mapping layer names to functions that does the
+# actual translations.
+_TRANSLATORS = {}
+
+
+def register_translator(name, translator):
+    """Registers a translator."""
+    _TRANSLATORS[name] = translator
+
+
+def translate_layer(cuda_layer):
+    """Translates a cuda layer to a decaf layer. The function will return
+    False if the input layer is a data layer, in which no decaf layer needs to
+    be inserted.
+
+    Input:
+        cuda_layer: a cuda layer as a dictionary, produced by the cuda convnet
+            code.
+    Output:
+        caffe_layer: the corresponding caffe layer
+    """
+    layertype = cuda_layer['type']
+    if layertype == DATA_TYPENAME or layertype.startswith(COST_TYPENAME):
+        # if the layer type is data, it is simply a data layer.
+        logging.info('Ignoring layer %s (type %s)', cuda_layer['name'],
+                     cuda_layer['type'])
+        return False
+    elif layertype in _TRANSLATORS:
+        logging.info('Translating layer %s (type %s)', cuda_layer['name'],
+                     cuda_layer['type'])
+        return _TRANSLATORS[layertype](cuda_layer)
+    else:
+        raise TypeError('No registered translator for %s (type %s).' %
+            (cuda_layer['name'], cuda_layer['type']))
+
+
+def translate_cuda_network(cuda_layers):
+    """Translates a list of cuda layers to a decaf net.
+
+    Input:
+        cuda_layers: a list of layers from the cuda convnet training.
+    Output:
+        net_param: the net parameter corresponding to the cuda net.
+    """
+    caffe_net = caffe_pb2.NetParameter()
+    caffe_net.name = 'CaffeNet'
+    provided_blobs = set()
+    for cuda_layer in cuda_layers:
+        if cuda_layer['type'] == DATA_TYPENAME:
+            logging.error('Ignoring data layer %s' % cuda_layer['name'])
+            continue
+        elif cuda_layer['type'].startswith(COST_TYPENAME):
+            logging.error('Ignoring cost layer %s' % cuda_layer['name'])
+            continue
+        logging.error('Translating layer %s' % cuda_layer['name'])
+        layer_params = translate_layer(cuda_layer)
+        # Now, let's figure out the inputs of the layer
+        if len(cuda_layer['inputs']) != 1:
+            raise ValueError('Layer %s takes more than 1 input (not supported)'
+                % cuda_layer['name'])
+        needs = cuda_layers[cuda_layer['inputs'][0]]['name']
+        if needs not in provided_blobs:
+            caffe_net.input.extend([needs])
+        for layer_param in layer_params:
+            caffe_net.layers.add()
+            caffe_net.layers[-1].layer.CopyFrom(layer_param)
+            caffe_net.layers[-1].bottom.append(needs)
+            caffe_net.layers[-1].top.append(layer_param.name)
+            provided_blobs.add(layer_param.name)
+            needs = layer_param.name
+    return caffe_net
diff --git a/src/caffe/pyutil/translator/translator_cmrnorm.py b/src/caffe/pyutil/translator/translator_cmrnorm.py
new file mode 100644 (file)
index 0000000..9cd7a7a
--- /dev/null
@@ -0,0 +1,20 @@
+"""Translates the cmrnorm layer."""
+from caffe.pyutil.translator import registerer
+from caffe.proto import caffe_pb2
+
+
+def translator_cmrnorm(cuda_layer):
+    """Translates the cmrnorm layer.
+    Note: we hard-code the constant in the local response normalization
+    layer to be 1. This may be different from Krizhevsky's NIPS paper but
+    matches the actual cuda convnet code.
+    """
+    output = caffe_pb2.LayerParameter()
+    output.name = cuda_layer['name']
+    output.type = 'lrn'
+    output.local_size = cuda_layer['size']
+    output.alpha = cuda_layer['scale'] * cuda_layer['size']
+    output.beta = cuda_layer['pow']
+    return [output]
+
+registerer.register_translator('cmrnorm', translator_cmrnorm)
diff --git a/src/caffe/pyutil/translator/translator_conv.py b/src/caffe/pyutil/translator/translator_conv.py
new file mode 100644 (file)
index 0000000..a41d326
--- /dev/null
@@ -0,0 +1,43 @@
+"""Translates the convolution and group convolution layers."""
+from caffe.pyutil.translator import registerer
+from caffe.proto import caffe_pb2
+from caffe.pyutil import convert
+import numpy as np
+
+#pylint: disable=R0914
+def translator_conv(cuda_layer):
+    """Translates the convolution and group convolution layers."""
+    outputs = []
+    output_layer = caffe_pb2.LayerParameter()
+    if not cuda_layer['sharedBiases']:
+        raise ValueError('Unshared bias layers not supported yet.')
+    pad = -cuda_layer['padding'][0]
+    if pad != 0:
+        # add a padding layer first
+        pad_layer = caffe_pb2.LayerParameter()
+        pad_layer.name = cuda_layer['name'] + 'pad'
+        pad_layer.type = 'padding'
+        pad_layer.pad = pad
+        outputs.append(pad_layer)
+    output_layer.name = cuda_layer['name']
+    output_layer.type = 'conv'
+    output_layer.num_output = cuda_layer['filters']
+    output_layer.group = cuda_layer['groups'][0]
+    output_layer.kernelsize = cuda_layer['filterSize'][0]
+    output_layer.stride = cuda_layer['stride'][0]
+    # For cuda convnet, the weight is input_channels, ksize, ksize, num_kernels
+    weight = cuda_layer['weights'][0].reshape(
+        cuda_layer['channels'][0] / cuda_layer['groups'][0],
+        cuda_layer['filterSize'][0], cuda_layer['filterSize'][0],
+        cuda_layer['filters'])
+    # However, our weight is organized as num_kernels, input_channels, ksize, ksize
+    out_weight = weight.swapaxes(2,3).swapaxes(1,2).swapaxes(0,1)
+    # The bias is simple.
+    bias = cuda_layer['biases'].flatten()
+    output_layer.blobs.extend(
+        [convert.array_to_blobproto(out_weight),
+         convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
+    outputs.append(output_layer)
+    return outputs
+
+registerer.register_translator('conv', translator_conv)
diff --git a/src/caffe/pyutil/translator/translator_fc.py b/src/caffe/pyutil/translator/translator_fc.py
new file mode 100644 (file)
index 0000000..ff57189
--- /dev/null
@@ -0,0 +1,25 @@
+"""translator_fc translates a fully connected layer to a decaf
+InnerProductLayer.
+"""
+from caffe.proto import caffe_pb2
+from caffe.pyutil.translator import registerer
+from caffe.pyutil import convert
+import numpy as np
+from operator import mul
+
+def translator_fc(cuda_layer):
+    """The translator for the fc layer."""
+    output_layer = caffe_pb2.LayerParameter()
+    output_layer.name = cuda_layer['name']
+    output_layer.type = 'innerproduct'
+    output_layer.num_output = cuda_layer['outputs']
+
+    weight = cuda_layer['weights'][0]
+    weight.resize(weight.size / cuda_layer['outputs'], cuda_layer['outputs'])
+    bias = cuda_layer['biases'][0].flatten()
+    output_layer.blobs.extend(
+        [convert.array_to_blobproto(weight.T.reshape((1,1) + weight.T.shape)),
+         convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
+    return [output_layer]
+
+registerer.register_translator('fc', translator_fc)
diff --git a/src/caffe/pyutil/translator/translator_neuron.py b/src/caffe/pyutil/translator/translator_neuron.py
new file mode 100644 (file)
index 0000000..f0bd648
--- /dev/null
@@ -0,0 +1,25 @@
+"""Translates the neuron layers."""
+from caffe.pyutil.translator import registerer
+from caffe.proto import caffe_pb2
+import logging
+
+
+def translator_neuron(cuda_layer):
+    """Translates the neuron layers.
+    Note: not all neuron layers are supported. We only implemented those that
+    are needed for imagenet.
+    """
+    output_layer = caffe_pb2.LayerParameter()
+    output_layer.name = cuda_layer['name']
+    neurontype = cuda_layer['neuron']['type']
+    if neurontype == 'relu':
+        output_layer.type = 'relu'
+    elif neurontype == 'dropout':
+        output_layer.type = 'dropout'
+        output_layer.dropout_ratio = cuda_layer['neuron']['params']['d']
+    else:
+        raise NotImplementedError('Neuron type %s not implemented yet.'
+                                  % neurontype)
+    return [output_layer]
+
+registerer.register_translator('neuron', translator_neuron)
diff --git a/src/caffe/pyutil/translator/translator_pool.py b/src/caffe/pyutil/translator/translator_pool.py
new file mode 100644 (file)
index 0000000..b971e66
--- /dev/null
@@ -0,0 +1,31 @@
+"""Translates the pooling layers."""
+from caffe.pyutil.translator import registerer
+from caffe.proto import caffe_pb2
+import math
+
+def translator_pool(cuda_layer):
+    """Translates the pooling layers."""
+    output_layer = caffe_pb2.LayerParameter()
+    output_layer.name = cuda_layer['name']
+    output_layer.type = 'pool'
+    method = cuda_layer['pool']
+    if method == 'max':
+        output_layer.pool = caffe_pb2.LayerParameter.MAX
+    elif method == 'avg':
+        output_layer.pool = caffe_pb2.LayerParameter.AVE
+    else:
+        raise NotImplementedError('Unrecognized pooling method: %s' % method)
+    if cuda_layer['start'] != 0:
+        raise NotImplementedError('Unsupported layer with a non-zero start.')
+    # Check the outputsX size.
+    output_size = math.ceil(
+        float(cuda_layer['imgSize'] - cuda_layer['sizeX']) /
+        cuda_layer['stride']) + 1
+    if cuda_layer['outputsX'] != output_size:
+        raise NotImplementedError('Unsupported layer with custon output size.')
+    # If all checks passed, we will return our pooling layer
+    output_layer.kernelsize = cuda_layer['sizeX']
+    output_layer.stride = cuda_layer['stride']
+    return [output_layer]
+
+registerer.register_translator('pool', translator_pool)
diff --git a/src/caffe/pyutil/translator/translator_softmax.py b/src/caffe/pyutil/translator/translator_softmax.py
new file mode 100644 (file)
index 0000000..8b495c9
--- /dev/null
@@ -0,0 +1,13 @@
+"""Translates the softmax layers."""
+from caffe.pyutil.translator import registerer
+from caffe.proto import caffe_pb2
+
+
+def translator_softmax(cuda_layer):
+    """Translates the softmax layers."""
+    output_layer = caffe_pb2.LayerParameter()
+    output_layer.name = cuda_layer['name']
+    output_layer.type = 'softmax'
+    return [output_layer]
+
+registerer.register_translator('softmax', translator_softmax)
index b7a830b..c4d35ce 100644 (file)
@@ -4,6 +4,7 @@
 #include <fcntl.h>
 #include <google/protobuf/text_format.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/io/coded_stream.h>
 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
 
@@ -24,6 +25,10 @@ using std::max;
 using std::string;
 using google::protobuf::io::FileInputStream;
 using google::protobuf::io::FileOutputStream;
+using google::protobuf::io::ZeroCopyInputStream;
+using google::protobuf::io::CodedInputStream;
+using google::protobuf::io::ZeroCopyOutputStream;
+using google::protobuf::io::CodedOutputStream;
 
 namespace caffe {
 
@@ -116,13 +121,28 @@ void WriteProtoToTextFile(const Message& proto, const char* filename) {
 }
 
 void ReadProtoFromBinaryFile(const char* filename, Message* proto) {
-  fstream input(filename, ios::in | ios::binary);
-  CHECK(proto->ParseFromIstream(&input));
+  int fd = open(filename, O_RDONLY);
+  ZeroCopyInputStream* raw_input = new FileInputStream(fd);
+  CodedInputStream* coded_input = new CodedInputStream(raw_input);
+  coded_input->SetTotalBytesLimit(536870912, 268435456);
+
+  CHECK(proto->ParseFromCodedStream(coded_input));
+
+  delete coded_input;
+  delete raw_input;
+  close(fd);
 }
 
 void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
-  fstream output(filename, ios::out | ios::trunc | ios::binary);
-  CHECK(proto.SerializeToOstream(&output));
+  int fd = open(filename, O_WRONLY);
+  ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
+  CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
+
+  CHECK(proto.SerializeToCodedStream(coded_output));
+
+  delete coded_output;
+  delete raw_output;
+  close(fd);
 }
 
 }  // namespace caffe
diff --git a/src/programs/dump_network.cpp b/src/programs/dump_network.cpp
new file mode 100644 (file)
index 0000000..c2162b4
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright 2013 Yangqing Jia
+
+#include <cuda_runtime.h>
+#include <fcntl.h>
+#include <google/protobuf/text_format.h>
+
+#include <cstring>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/net.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/util/io.hpp"
+#include "caffe/optimization/solver.hpp"
+
+using namespace caffe;
+
+int main(int argc, char** argv) {
+  cudaSetDevice(1);
+  Caffe::set_mode(Caffe::GPU);
+
+  NetParameter net_param;
+  ReadProtoFromBinaryFile(argv[1], &net_param);
+  BlobProto input_blob_proto;
+  ReadProtoFromBinaryFile(argv[2], &input_blob_proto);
+  shared_ptr<Blob<float> > input_blob(new Blob<float>());
+  input_blob->FromProto(input_blob_proto);
+
+  vector<Blob<float>* > input_vec;
+  input_vec.push_back(input_blob.get());
+  // For implementational reasons, we need to first set up the net, and
+  // then copy the trained parameters.
+  shared_ptr<Net<float> > caffe_net(new Net<float>(net_param, input_vec));
+  caffe_net->CopyTrainedLayersFrom(net_param);
+
+  // Run the network without training.
+  LOG(ERROR) << "Performing Forward";
+  caffe_net->Forward(input_vec);
+
+  // Dump results.
+  return 0;
+}