from caffe.proto import caffe_pb2
import numpy as np
-def blobproto_to_array(blob):
- arr = np.array(blob.data).reshape(blob.num, blob.channels, blob.height,
- blob.width)
- return arr
+def blobproto_to_array(blob, return_diff=False):
+ """Convert a blob proto to an array. In default, we will just return the data,
+ unless return_diff is True, in which case we will return the diff.
+ """
+ if return_diff:
+ return np.array(blob.diff).reshape(
+ blob.num, blob.channels, blob.height, blob.width)
+ else:
+ return np.array(blob.data).reshape(
+ blob.num, blob.channels, blob.height, blob.width)
-def array_to_blobproto(arr):
+def array_to_blobproto(arr, diff=None):
+ """Converts a 4-dimensional array to blob proto. If diff is given, also
+ convert the diff. You need to make sure that arr and diff have the same
+ shape, and this function does not do sanity check.
+ """
if arr.ndim != 4:
raise ValueError('Incorrect array shape.')
blob = caffe_pb2.BlobProto()
blob.num, blob.channels, blob.height, blob.width = arr.shape;
blob.data.extend(arr.astype(float).flat)
+ if diff is not None:
+ blob.diff.extend(diff.astype(float).flat)
return blob
-def array_to_datum(arr):
+def array_to_datum(arr, label=0):
+ """Converts a 3-dimensional array to datum. If the array has dtype uint8,
+ the output data will be encoded as a string. Otherwise, the output data
+ will be stored in float format.
+ """
if arr.ndim != 3:
raise ValueError('Incorrect array shape.')
datum = caffe_pb2.Datum()
datum.data = arr.tostring()
else:
datum.float_data.extend(arr.flat)
+ datum.label = label
return datum
+
+def datum_to_array(datum):
+ """Converts a datum to an array. Note that the label is not returned,
+ as one can easily get it by calling datum.label.
+ """
+ if len(datum.data):
+ return np.fromstring(datum.data, dtype = np.uint8).reshape(
+ datum.channels, datum.height, datum.width)
+ else:
+ return np.array(datum.float_data).astype(float).reshape(
+ datum.channels, datum.height, datum.width)
\ No newline at end of file
def register_translator(name, translator):
- """Registers a translator."""
- _TRANSLATORS[name] = translator
+ """Registers a translator."""
+ _TRANSLATORS[name] = translator
def translate_layer(cuda_layer):
- """Translates a cuda layer to a decaf layer. The function will return
- False if the input layer is a data layer, in which no decaf layer needs to
- be inserted.
+ """Translates a cuda layer to a decaf layer. The function will return
+ False if the input layer is a data layer, in which no decaf layer needs to
+ be inserted.
- Input:
- cuda_layer: a cuda layer as a dictionary, produced by the cuda convnet
- code.
- Output:
- caffe_layer: the corresponding caffe layer
- """
- layertype = cuda_layer['type']
- if layertype == DATA_TYPENAME or layertype.startswith(COST_TYPENAME):
- # if the layer type is data, it is simply a data layer.
- logging.info('Ignoring layer %s (type %s)', cuda_layer['name'],
- cuda_layer['type'])
- return False
- elif layertype in _TRANSLATORS:
- logging.info('Translating layer %s (type %s)', cuda_layer['name'],
- cuda_layer['type'])
- return _TRANSLATORS[layertype](cuda_layer)
- else:
- raise TypeError('No registered translator for %s (type %s).' %
- (cuda_layer['name'], cuda_layer['type']))
+ Input:
+ cuda_layer: a cuda layer as a dictionary, produced by the cuda convnet
+ code.
+ Output:
+ caffe_layer: the corresponding caffe layer
+ """
+ layertype = cuda_layer['type']
+ if layertype == DATA_TYPENAME or layertype.startswith(COST_TYPENAME):
+ # if the layer type is data, it is simply a data layer.
+ logging.info('Ignoring layer %s (type %s)',
+ cuda_layer['name'], cuda_layer['type'])
+ return False
+ elif layertype in _TRANSLATORS:
+ logging.info('Translating layer %s (type %s)',
+ cuda_layer['name'], cuda_layer['type'])
+ return _TRANSLATORS[layertype](cuda_layer)
+ else:
+ raise TypeError('No registered translator for %s (type %s).' %
+ (cuda_layer['name'], cuda_layer['type']))
def translate_cuda_network(cuda_layers):
- """Translates a list of cuda layers to a decaf net.
+ """Translates a list of cuda layers to a decaf net.
- Input:
- cuda_layers: a list of layers from the cuda convnet training.
- Output:
- net_param: the net parameter corresponding to the cuda net.
- """
- caffe_net = caffe_pb2.NetParameter()
- caffe_net.name = 'CaffeNet'
- provided_blobs = set()
- for cuda_layer in cuda_layers:
- if cuda_layer['type'] == DATA_TYPENAME:
- logging.error('Ignoring data layer %s' % cuda_layer['name'])
- continue
- elif cuda_layer['type'].startswith(COST_TYPENAME):
- logging.error('Ignoring cost layer %s' % cuda_layer['name'])
- continue
- logging.error('Translating layer %s' % cuda_layer['name'])
- layer_params = translate_layer(cuda_layer)
- # Now, let's figure out the inputs of the layer
- if len(cuda_layer['inputs']) != 1:
- raise ValueError('Layer %s takes more than 1 input (not supported)'
- % cuda_layer['name'])
- needs = cuda_layers[cuda_layer['inputs'][0]]['name']
- if needs not in provided_blobs:
- caffe_net.input.extend([needs])
- for layer_param in layer_params:
- caffe_net.layers.add()
- caffe_net.layers[-1].layer.CopyFrom(layer_param)
- caffe_net.layers[-1].bottom.append(needs)
- caffe_net.layers[-1].top.append(layer_param.name)
- provided_blobs.add(layer_param.name)
- needs = layer_param.name
- return caffe_net
+ Input:
+ cuda_layers: a list of layers from the cuda convnet training.
+ Output:
+ net_param: the net parameter corresponding to the cuda net.
+ """
+ caffe_net = caffe_pb2.NetParameter()
+ caffe_net.name = 'CaffeNet'
+ provided_blobs = set()
+ for cuda_layer in cuda_layers:
+ if cuda_layer['type'] == DATA_TYPENAME:
+ logging.error('Ignoring data layer %s' % cuda_layer['name'])
+ continue
+ elif cuda_layer['type'].startswith(COST_TYPENAME):
+ logging.error('Ignoring cost layer %s' % cuda_layer['name'])
+ continue
+ logging.error('Translating layer %s' % cuda_layer['name'])
+ layer_params = translate_layer(cuda_layer)
+ # Now, let's figure out the inputs of the layer
+ if len(cuda_layer['inputs']) != 1:
+ raise ValueError('Layer %s takes more than 1 input (not supported)'
+ % cuda_layer['name'])
+ needs = cuda_layers[cuda_layer['inputs'][0]]['name']
+ if needs not in provided_blobs:
+ caffe_net.input.extend([needs])
+ for layer_param in layer_params:
+ caffe_net.layers.add()
+ caffe_net.layers[-1].layer.CopyFrom(layer_param)
+ caffe_net.layers[-1].bottom.append(needs)
+ caffe_net.layers[-1].top.append(layer_param.name)
+ provided_blobs.add(layer_param.name)
+ needs = layer_param.name
+ return caffe_net
def translator_cmrnorm(cuda_layer):
- """Translates the cmrnorm layer.
- Note: we hard-code the constant in the local response normalization
- layer to be 1. This may be different from Krizhevsky's NIPS paper but
- matches the actual cuda convnet code.
- """
- output = caffe_pb2.LayerParameter()
- output.name = cuda_layer['name']
- output.type = 'lrn'
- output.local_size = cuda_layer['size']
- output.alpha = cuda_layer['scale'] * cuda_layer['size']
- output.beta = cuda_layer['pow']
- return [output]
+ """Translates the cmrnorm layer.
+ Note: we hard-code the constant in the local response normalization
+ layer to be 1. This may be different from Krizhevsky's NIPS paper but
+ matches the actual cuda convnet code.
+ """
+ output = caffe_pb2.LayerParameter()
+ output.name = cuda_layer['name']
+ output.type = 'lrn'
+ output.local_size = cuda_layer['size']
+ output.alpha = cuda_layer['scale'] * cuda_layer['size']
+ output.beta = cuda_layer['pow']
+ return [output]
registerer.register_translator('cmrnorm', translator_cmrnorm)
#pylint: disable=R0914
def translator_conv(cuda_layer):
- """Translates the convolution and group convolution layers."""
- outputs = []
- output_layer = caffe_pb2.LayerParameter()
- if not cuda_layer['sharedBiases']:
- raise ValueError('Unshared bias layers not supported yet.')
- pad = -cuda_layer['padding'][0]
- if pad != 0:
- # add a padding layer first
- pad_layer = caffe_pb2.LayerParameter()
- pad_layer.name = cuda_layer['name'] + 'pad'
- pad_layer.type = 'padding'
- pad_layer.pad = pad
- outputs.append(pad_layer)
- output_layer.name = cuda_layer['name']
- output_layer.type = 'conv'
- output_layer.num_output = cuda_layer['filters']
- output_layer.group = cuda_layer['groups'][0]
- output_layer.kernelsize = cuda_layer['filterSize'][0]
- output_layer.stride = cuda_layer['stride'][0]
- # For cuda convnet, the weight is input_channels, ksize, ksize, num_kernels
- weight = cuda_layer['weights'][0].reshape(
- cuda_layer['channels'][0] / cuda_layer['groups'][0],
- cuda_layer['filterSize'][0], cuda_layer['filterSize'][0],
- cuda_layer['filters'])
- # However, our weight is organized as num_kernels, input_channels, ksize, ksize
- out_weight = weight.swapaxes(2,3).swapaxes(1,2).swapaxes(0,1)
- # The bias is simple.
- bias = cuda_layer['biases'].flatten()
- output_layer.blobs.extend(
- [convert.array_to_blobproto(out_weight),
- convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
- outputs.append(output_layer)
- return outputs
+ """Translates the convolution and group convolution layers."""
+ outputs = []
+ output_layer = caffe_pb2.LayerParameter()
+ if not cuda_layer['sharedBiases']:
+ raise ValueError('Unshared bias layers not supported yet.')
+ pad = -cuda_layer['padding'][0]
+ if pad != 0:
+ # add a padding layer first
+ pad_layer = caffe_pb2.LayerParameter()
+ pad_layer.name = cuda_layer['name'] + 'pad'
+ pad_layer.type = 'padding'
+ pad_layer.pad = pad
+ outputs.append(pad_layer)
+ output_layer.name = cuda_layer['name']
+ output_layer.type = 'conv'
+ output_layer.num_output = cuda_layer['filters']
+ output_layer.group = cuda_layer['groups'][0]
+ output_layer.kernelsize = cuda_layer['filterSize'][0]
+ output_layer.stride = cuda_layer['stride'][0]
+ # For cuda convnet, the weight is input_channels, ksize, ksize, num_kernels
+ weight = cuda_layer['weights'][0].reshape(
+ cuda_layer['channels'][0] / cuda_layer['groups'][0],
+ cuda_layer['filterSize'][0], cuda_layer['filterSize'][0],
+ cuda_layer['filters'])
+ # However, our weight is organized as num_kernels, input_channels, ksize, ksize
+ out_weight = weight.swapaxes(2,3).swapaxes(1,2).swapaxes(0,1)
+ # The bias is simple.
+ bias = cuda_layer['biases'].flatten()
+ output_layer.blobs.extend(
+ [convert.array_to_blobproto(out_weight),
+ convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
+ outputs.append(output_layer)
+ return outputs
registerer.register_translator('conv', translator_conv)
from operator import mul
def translator_fc(cuda_layer):
- """The translator for the fc layer."""
- output_layer = caffe_pb2.LayerParameter()
- output_layer.name = cuda_layer['name']
- output_layer.type = 'innerproduct'
- output_layer.num_output = cuda_layer['outputs']
+ """The translator for the fc layer."""
+ output_layer = caffe_pb2.LayerParameter()
+ output_layer.name = cuda_layer['name']
+ output_layer.type = 'innerproduct'
+ output_layer.num_output = cuda_layer['outputs']
- weight = cuda_layer['weights'][0]
- weight.resize(weight.size / cuda_layer['outputs'], cuda_layer['outputs'])
- bias = cuda_layer['biases'][0].flatten()
- output_layer.blobs.extend(
- [convert.array_to_blobproto(weight.T.reshape((1,1) + weight.T.shape)),
- convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
- return [output_layer]
+ weight = cuda_layer['weights'][0]
+ weight.resize(weight.size / cuda_layer['outputs'], cuda_layer['outputs'])
+ bias = cuda_layer['biases'][0].flatten()
+ output_layer.blobs.extend(
+ [convert.array_to_blobproto(weight.T.reshape((1,1) + weight.T.shape)),
+ convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
+ return [output_layer]
registerer.register_translator('fc', translator_fc)
def translator_neuron(cuda_layer):
- """Translates the neuron layers.
- Note: not all neuron layers are supported. We only implemented those that
- are needed for imagenet.
- """
- output_layer = caffe_pb2.LayerParameter()
- output_layer.name = cuda_layer['name']
- neurontype = cuda_layer['neuron']['type']
- if neurontype == 'relu':
- output_layer.type = 'relu'
- elif neurontype == 'dropout':
- output_layer.type = 'dropout'
- output_layer.dropout_ratio = cuda_layer['neuron']['params']['d']
- else:
- raise NotImplementedError('Neuron type %s not implemented yet.'
- % neurontype)
- return [output_layer]
+ """Translates the neuron layers.
+ Note: not all neuron layers are supported. We only implemented those that
+ are needed for imagenet.
+ """
+ output_layer = caffe_pb2.LayerParameter()
+ output_layer.name = cuda_layer['name']
+ neurontype = cuda_layer['neuron']['type']
+ if neurontype == 'relu':
+ output_layer.type = 'relu'
+ elif neurontype == 'dropout':
+ output_layer.type = 'dropout'
+ output_layer.dropout_ratio = cuda_layer['neuron']['params']['d']
+ else:
+ raise NotImplementedError(
+ 'Neuron type %s not implemented yet.' % neurontype)
+ return [output_layer]
registerer.register_translator('neuron', translator_neuron)
import math
def translator_pool(cuda_layer):
- """Translates the pooling layers."""
- output_layer = caffe_pb2.LayerParameter()
- output_layer.name = cuda_layer['name']
- output_layer.type = 'pool'
- method = cuda_layer['pool']
- if method == 'max':
- output_layer.pool = caffe_pb2.LayerParameter.MAX
- elif method == 'avg':
- output_layer.pool = caffe_pb2.LayerParameter.AVE
- else:
- raise NotImplementedError('Unrecognized pooling method: %s' % method)
- if cuda_layer['start'] != 0:
- raise NotImplementedError('Unsupported layer with a non-zero start.')
- # Check the outputsX size.
- output_size = math.ceil(
- float(cuda_layer['imgSize'] - cuda_layer['sizeX']) /
- cuda_layer['stride']) + 1
- if cuda_layer['outputsX'] != output_size:
- raise NotImplementedError('Unsupported layer with custon output size.')
- # If all checks passed, we will return our pooling layer
- output_layer.kernelsize = cuda_layer['sizeX']
- output_layer.stride = cuda_layer['stride']
- return [output_layer]
+ """Translates the pooling layers."""
+ output_layer = caffe_pb2.LayerParameter()
+ output_layer.name = cuda_layer['name']
+ output_layer.type = 'pool'
+ method = cuda_layer['pool']
+ if method == 'max':
+ output_layer.pool = caffe_pb2.LayerParameter.MAX
+ elif method == 'avg':
+ output_layer.pool = caffe_pb2.LayerParameter.AVE
+ else:
+ raise NotImplementedError('Unrecognized pooling method: %s' % method)
+ if cuda_layer['start'] != 0:
+ raise NotImplementedError('Unsupported layer with a non-zero start.')
+ # Check the outputsX size.
+ output_size = math.ceil(
+ float(cuda_layer['imgSize'] - cuda_layer['sizeX']) /
+ cuda_layer['stride']) + 1
+ if cuda_layer['outputsX'] != output_size:
+ raise NotImplementedError('Unsupported layer with custon output size.')
+ # If all checks passed, we will return our pooling layer
+ output_layer.kernelsize = cuda_layer['sizeX']
+ output_layer.stride = cuda_layer['stride']
+ return [output_layer]
registerer.register_translator('pool', translator_pool)