pyutil update
authorYangqing Jia <jiayq84@gmail.com>
Mon, 14 Oct 2013 22:48:04 +0000 (15:48 -0700)
committerYangqing Jia <jiayq84@gmail.com>
Mon, 14 Oct 2013 22:48:04 +0000 (15:48 -0700)
src/caffe/pyutil/convert.py
src/caffe/pyutil/translator/registerer.py
src/caffe/pyutil/translator/translator_cmrnorm.py
src/caffe/pyutil/translator/translator_conv.py
src/caffe/pyutil/translator/translator_fc.py
src/caffe/pyutil/translator/translator_neuron.py
src/caffe/pyutil/translator/translator_pool.py

index 6bc76ae..483e4ea 100644 (file)
@@ -4,20 +4,36 @@
 from caffe.proto import caffe_pb2
 import numpy as np
 
-def blobproto_to_array(blob):
-  arr = np.array(blob.data).reshape(blob.num, blob.channels, blob.height,
-      blob.width)
-  return arr
+def blobproto_to_array(blob, return_diff=False):
+  """Convert a blob proto to an array. In default, we will just return the data,
+  unless return_diff is True, in which case we will return the diff.
+  """
+  if return_diff:
+    return np.array(blob.diff).reshape(
+        blob.num, blob.channels, blob.height, blob.width)
+  else:
+    return np.array(blob.data).reshape(
+        blob.num, blob.channels, blob.height, blob.width)
 
-def array_to_blobproto(arr):
+def array_to_blobproto(arr, diff=None):
+  """Converts a 4-dimensional array to blob proto. If diff is given, also
+  convert the diff. You need to make sure that arr and diff have the same
+  shape, and this function does not do sanity check.
+  """
   if arr.ndim != 4:
     raise ValueError('Incorrect array shape.')
   blob = caffe_pb2.BlobProto()
   blob.num, blob.channels, blob.height, blob.width = arr.shape;
   blob.data.extend(arr.astype(float).flat)
+  if diff is not None:
+    blob.diff.extend(diff.astype(float).flat)
   return blob
 
-def array_to_datum(arr):
+def array_to_datum(arr, label=0):
+  """Converts a 3-dimensional array to datum. If the array has dtype uint8,
+  the output data will be encoded as a string. Otherwise, the output data
+  will be stored in float format.
+  """
   if arr.ndim != 3:
     raise ValueError('Incorrect array shape.')
   datum = caffe_pb2.Datum()
@@ -26,4 +42,16 @@ def array_to_datum(arr):
     datum.data = arr.tostring()
   else:
     datum.float_data.extend(arr.flat)
+  datum.label = label
   return datum
+
+def datum_to_array(datum):
+  """Converts a datum to an array. Note that the label is not returned,
+  as one can easily get it by calling datum.label.
+  """
+  if len(datum.data):
+    return np.fromstring(datum.data, dtype = np.uint8).reshape(
+        datum.channels, datum.height, datum.width)
+  else:
+    return np.array(datum.float_data).astype(float).reshape(
+        datum.channels, datum.height, datum.width)
\ No newline at end of file
index 3232196..8c139b0 100644 (file)
@@ -15,68 +15,68 @@ _TRANSLATORS = {}
 
 
 def register_translator(name, translator):
-    """Registers a translator."""
-    _TRANSLATORS[name] = translator
+  """Registers a translator."""
+  _TRANSLATORS[name] = translator
 
 
 def translate_layer(cuda_layer):
-    """Translates a cuda layer to a decaf layer. The function will return
-    False if the input layer is a data layer, in which no decaf layer needs to
-    be inserted.
+  """Translates a cuda layer to a decaf layer. The function will return
+  False if the input layer is a data layer, in which no decaf layer needs to
+  be inserted.
 
-    Input:
-        cuda_layer: a cuda layer as a dictionary, produced by the cuda convnet
-            code.
-    Output:
-        caffe_layer: the corresponding caffe layer
-    """
-    layertype = cuda_layer['type']
-    if layertype == DATA_TYPENAME or layertype.startswith(COST_TYPENAME):
-        # if the layer type is data, it is simply a data layer.
-        logging.info('Ignoring layer %s (type %s)', cuda_layer['name'],
-                     cuda_layer['type'])
-        return False
-    elif layertype in _TRANSLATORS:
-        logging.info('Translating layer %s (type %s)', cuda_layer['name'],
-                     cuda_layer['type'])
-        return _TRANSLATORS[layertype](cuda_layer)
-    else:
-        raise TypeError('No registered translator for %s (type %s).' %
-            (cuda_layer['name'], cuda_layer['type']))
+  Input:
+    cuda_layer: a cuda layer as a dictionary, produced by the cuda convnet
+      code.
+  Output:
+    caffe_layer: the corresponding caffe layer
+  """
+  layertype = cuda_layer['type']
+  if layertype == DATA_TYPENAME or layertype.startswith(COST_TYPENAME):
+    # if the layer type is data, it is simply a data layer.
+    logging.info('Ignoring layer %s (type %s)',
+        cuda_layer['name'], cuda_layer['type'])
+    return False
+  elif layertype in _TRANSLATORS:
+    logging.info('Translating layer %s (type %s)',
+        cuda_layer['name'], cuda_layer['type'])
+    return _TRANSLATORS[layertype](cuda_layer)
+  else:
+    raise TypeError('No registered translator for %s (type %s).' %
+      (cuda_layer['name'], cuda_layer['type']))
 
 
 def translate_cuda_network(cuda_layers):
-    """Translates a list of cuda layers to a decaf net.
+  """Translates a list of cuda layers to a decaf net.
 
-    Input:
-        cuda_layers: a list of layers from the cuda convnet training.
-    Output:
-        net_param: the net parameter corresponding to the cuda net.
-    """
-    caffe_net = caffe_pb2.NetParameter()
-    caffe_net.name = 'CaffeNet'
-    provided_blobs = set()
-    for cuda_layer in cuda_layers:
-        if cuda_layer['type'] == DATA_TYPENAME:
-            logging.error('Ignoring data layer %s' % cuda_layer['name'])
-            continue
-        elif cuda_layer['type'].startswith(COST_TYPENAME):
-            logging.error('Ignoring cost layer %s' % cuda_layer['name'])
-            continue
-        logging.error('Translating layer %s' % cuda_layer['name'])
-        layer_params = translate_layer(cuda_layer)
-        # Now, let's figure out the inputs of the layer
-        if len(cuda_layer['inputs']) != 1:
-            raise ValueError('Layer %s takes more than 1 input (not supported)'
-                % cuda_layer['name'])
-        needs = cuda_layers[cuda_layer['inputs'][0]]['name']
-        if needs not in provided_blobs:
-            caffe_net.input.extend([needs])
-        for layer_param in layer_params:
-            caffe_net.layers.add()
-            caffe_net.layers[-1].layer.CopyFrom(layer_param)
-            caffe_net.layers[-1].bottom.append(needs)
-            caffe_net.layers[-1].top.append(layer_param.name)
-            provided_blobs.add(layer_param.name)
-            needs = layer_param.name
-    return caffe_net
+  Input:
+    cuda_layers: a list of layers from the cuda convnet training.
+  Output:
+    net_param: the net parameter corresponding to the cuda net.
+  """
+  caffe_net = caffe_pb2.NetParameter()
+  caffe_net.name = 'CaffeNet'
+  provided_blobs = set()
+  for cuda_layer in cuda_layers:
+    if cuda_layer['type'] == DATA_TYPENAME:
+      logging.error('Ignoring data layer %s' % cuda_layer['name'])
+      continue
+    elif cuda_layer['type'].startswith(COST_TYPENAME):
+      logging.error('Ignoring cost layer %s' % cuda_layer['name'])
+      continue
+    logging.error('Translating layer %s' % cuda_layer['name'])
+    layer_params = translate_layer(cuda_layer)
+    # Now, let's figure out the inputs of the layer
+    if len(cuda_layer['inputs']) != 1:
+      raise ValueError('Layer %s takes more than 1 input (not supported)'
+          % cuda_layer['name'])
+    needs = cuda_layers[cuda_layer['inputs'][0]]['name']
+    if needs not in provided_blobs:
+      caffe_net.input.extend([needs])
+    for layer_param in layer_params:
+      caffe_net.layers.add()
+      caffe_net.layers[-1].layer.CopyFrom(layer_param)
+      caffe_net.layers[-1].bottom.append(needs)
+      caffe_net.layers[-1].top.append(layer_param.name)
+      provided_blobs.add(layer_param.name)
+      needs = layer_param.name
+  return caffe_net
index 9cd7a7a..c21199f 100644 (file)
@@ -4,17 +4,17 @@ from caffe.proto import caffe_pb2
 
 
 def translator_cmrnorm(cuda_layer):
-    """Translates the cmrnorm layer.
-    Note: we hard-code the constant in the local response normalization
-    layer to be 1. This may be different from Krizhevsky's NIPS paper but
-    matches the actual cuda convnet code.
-    """
-    output = caffe_pb2.LayerParameter()
-    output.name = cuda_layer['name']
-    output.type = 'lrn'
-    output.local_size = cuda_layer['size']
-    output.alpha = cuda_layer['scale'] * cuda_layer['size']
-    output.beta = cuda_layer['pow']
-    return [output]
+  """Translates the cmrnorm layer.
+  Note: we hard-code the constant in the local response normalization
+  layer to be 1. This may be different from Krizhevsky's NIPS paper but
+  matches the actual cuda convnet code.
+  """
+  output = caffe_pb2.LayerParameter()
+  output.name = cuda_layer['name']
+  output.type = 'lrn'
+  output.local_size = cuda_layer['size']
+  output.alpha = cuda_layer['scale'] * cuda_layer['size']
+  output.beta = cuda_layer['pow']
+  return [output]
 
 registerer.register_translator('cmrnorm', translator_cmrnorm)
index a41d326..b8a3739 100644 (file)
@@ -6,38 +6,38 @@ import numpy as np
 
 #pylint: disable=R0914
 def translator_conv(cuda_layer):
-    """Translates the convolution and group convolution layers."""
-    outputs = []
-    output_layer = caffe_pb2.LayerParameter()
-    if not cuda_layer['sharedBiases']:
-        raise ValueError('Unshared bias layers not supported yet.')
-    pad = -cuda_layer['padding'][0]
-    if pad != 0:
-        # add a padding layer first
-        pad_layer = caffe_pb2.LayerParameter()
-        pad_layer.name = cuda_layer['name'] + 'pad'
-        pad_layer.type = 'padding'
-        pad_layer.pad = pad
-        outputs.append(pad_layer)
-    output_layer.name = cuda_layer['name']
-    output_layer.type = 'conv'
-    output_layer.num_output = cuda_layer['filters']
-    output_layer.group = cuda_layer['groups'][0]
-    output_layer.kernelsize = cuda_layer['filterSize'][0]
-    output_layer.stride = cuda_layer['stride'][0]
-    # For cuda convnet, the weight is input_channels, ksize, ksize, num_kernels
-    weight = cuda_layer['weights'][0].reshape(
-        cuda_layer['channels'][0] / cuda_layer['groups'][0],
-        cuda_layer['filterSize'][0], cuda_layer['filterSize'][0],
-        cuda_layer['filters'])
-    # However, our weight is organized as num_kernels, input_channels, ksize, ksize
-    out_weight = weight.swapaxes(2,3).swapaxes(1,2).swapaxes(0,1)
-    # The bias is simple.
-    bias = cuda_layer['biases'].flatten()
-    output_layer.blobs.extend(
-        [convert.array_to_blobproto(out_weight),
-         convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
-    outputs.append(output_layer)
-    return outputs
+  """Translates the convolution and group convolution layers."""
+  outputs = []
+  output_layer = caffe_pb2.LayerParameter()
+  if not cuda_layer['sharedBiases']:
+    raise ValueError('Unshared bias layers not supported yet.')
+  pad = -cuda_layer['padding'][0]
+  if pad != 0:
+    # add a padding layer first
+    pad_layer = caffe_pb2.LayerParameter()
+    pad_layer.name = cuda_layer['name'] + 'pad'
+    pad_layer.type = 'padding'
+    pad_layer.pad = pad
+    outputs.append(pad_layer)
+  output_layer.name = cuda_layer['name']
+  output_layer.type = 'conv'
+  output_layer.num_output = cuda_layer['filters']
+  output_layer.group = cuda_layer['groups'][0]
+  output_layer.kernelsize = cuda_layer['filterSize'][0]
+  output_layer.stride = cuda_layer['stride'][0]
+  # For cuda convnet, the weight is input_channels, ksize, ksize, num_kernels
+  weight = cuda_layer['weights'][0].reshape(
+    cuda_layer['channels'][0] / cuda_layer['groups'][0],
+    cuda_layer['filterSize'][0], cuda_layer['filterSize'][0],
+    cuda_layer['filters'])
+  # However, our weight is organized as num_kernels, input_channels, ksize, ksize
+  out_weight = weight.swapaxes(2,3).swapaxes(1,2).swapaxes(0,1)
+  # The bias is simple.
+  bias = cuda_layer['biases'].flatten()
+  output_layer.blobs.extend(
+    [convert.array_to_blobproto(out_weight),
+     convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
+  outputs.append(output_layer)
+  return outputs
 
 registerer.register_translator('conv', translator_conv)
index ff57189..b7a9314 100644 (file)
@@ -8,18 +8,18 @@ import numpy as np
 from operator import mul
 
 def translator_fc(cuda_layer):
-    """The translator for the fc layer."""
-    output_layer = caffe_pb2.LayerParameter()
-    output_layer.name = cuda_layer['name']
-    output_layer.type = 'innerproduct'
-    output_layer.num_output = cuda_layer['outputs']
+  """The translator for the fc layer."""
+  output_layer = caffe_pb2.LayerParameter()
+  output_layer.name = cuda_layer['name']
+  output_layer.type = 'innerproduct'
+  output_layer.num_output = cuda_layer['outputs']
 
-    weight = cuda_layer['weights'][0]
-    weight.resize(weight.size / cuda_layer['outputs'], cuda_layer['outputs'])
-    bias = cuda_layer['biases'][0].flatten()
-    output_layer.blobs.extend(
-        [convert.array_to_blobproto(weight.T.reshape((1,1) + weight.T.shape)),
-         convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
-    return [output_layer]
+  weight = cuda_layer['weights'][0]
+  weight.resize(weight.size / cuda_layer['outputs'], cuda_layer['outputs'])
+  bias = cuda_layer['biases'][0].flatten()
+  output_layer.blobs.extend(
+      [convert.array_to_blobproto(weight.T.reshape((1,1) + weight.T.shape)),
+       convert.array_to_blobproto(bias.reshape(1, 1, 1, bias.size))])
+  return [output_layer]
 
 registerer.register_translator('fc', translator_fc)
index f0bd648..52ee95d 100644 (file)
@@ -5,21 +5,21 @@ import logging
 
 
 def translator_neuron(cuda_layer):
-    """Translates the neuron layers.
-    Note: not all neuron layers are supported. We only implemented those that
-    are needed for imagenet.
-    """
-    output_layer = caffe_pb2.LayerParameter()
-    output_layer.name = cuda_layer['name']
-    neurontype = cuda_layer['neuron']['type']
-    if neurontype == 'relu':
-        output_layer.type = 'relu'
-    elif neurontype == 'dropout':
-        output_layer.type = 'dropout'
-        output_layer.dropout_ratio = cuda_layer['neuron']['params']['d']
-    else:
-        raise NotImplementedError('Neuron type %s not implemented yet.'
-                                  % neurontype)
-    return [output_layer]
+  """Translates the neuron layers.
+  Note: not all neuron layers are supported. We only implemented those that
+  are needed for imagenet.
+  """
+  output_layer = caffe_pb2.LayerParameter()
+  output_layer.name = cuda_layer['name']
+  neurontype = cuda_layer['neuron']['type']
+  if neurontype == 'relu':
+    output_layer.type = 'relu'
+  elif neurontype == 'dropout':
+    output_layer.type = 'dropout'
+    output_layer.dropout_ratio = cuda_layer['neuron']['params']['d']
+  else:
+    raise NotImplementedError(
+        'Neuron type %s not implemented yet.' % neurontype)
+  return [output_layer]
 
 registerer.register_translator('neuron', translator_neuron)
index b971e66..cc62ea0 100644 (file)
@@ -4,28 +4,28 @@ from caffe.proto import caffe_pb2
 import math
 
 def translator_pool(cuda_layer):
-    """Translates the pooling layers."""
-    output_layer = caffe_pb2.LayerParameter()
-    output_layer.name = cuda_layer['name']
-    output_layer.type = 'pool'
-    method = cuda_layer['pool']
-    if method == 'max':
-        output_layer.pool = caffe_pb2.LayerParameter.MAX
-    elif method == 'avg':
-        output_layer.pool = caffe_pb2.LayerParameter.AVE
-    else:
-        raise NotImplementedError('Unrecognized pooling method: %s' % method)
-    if cuda_layer['start'] != 0:
-        raise NotImplementedError('Unsupported layer with a non-zero start.')
-    # Check the outputsX size.
-    output_size = math.ceil(
-        float(cuda_layer['imgSize'] - cuda_layer['sizeX']) /
-        cuda_layer['stride']) + 1
-    if cuda_layer['outputsX'] != output_size:
-        raise NotImplementedError('Unsupported layer with custon output size.')
-    # If all checks passed, we will return our pooling layer
-    output_layer.kernelsize = cuda_layer['sizeX']
-    output_layer.stride = cuda_layer['stride']
-    return [output_layer]
+  """Translates the pooling layers."""
+  output_layer = caffe_pb2.LayerParameter()
+  output_layer.name = cuda_layer['name']
+  output_layer.type = 'pool'
+  method = cuda_layer['pool']
+  if method == 'max':
+    output_layer.pool = caffe_pb2.LayerParameter.MAX
+  elif method == 'avg':
+    output_layer.pool = caffe_pb2.LayerParameter.AVE
+  else:
+    raise NotImplementedError('Unrecognized pooling method: %s' % method)
+  if cuda_layer['start'] != 0:
+    raise NotImplementedError('Unsupported layer with a non-zero start.')
+  # Check the outputsX size.
+  output_size = math.ceil(
+    float(cuda_layer['imgSize'] - cuda_layer['sizeX']) /
+    cuda_layer['stride']) + 1
+  if cuda_layer['outputsX'] != output_size:
+    raise NotImplementedError('Unsupported layer with custon output size.')
+  # If all checks passed, we will return our pooling layer
+  output_layer.kernelsize = cuda_layer['sizeX']
+  output_layer.stride = cuda_layer['stride']
+  return [output_layer]
 
 registerer.register_translator('pool', translator_pool)