Quantize bypasses after activations.
authorSuharsh Sivakumar <suharshs@google.com>
Tue, 20 Mar 2018 03:42:00 +0000 (20:42 -0700)
committerTensorFlower Gardener <gardener@tensorflow.org>
Tue, 20 Mar 2018 03:45:28 +0000 (20:45 -0700)
PiperOrigin-RevId: 189686219

tensorflow/contrib/quantize/python/quantize.py
tensorflow/contrib/quantize/python/quantize_test.py

index 6cc097b..9780e6d 100644 (file)
@@ -123,10 +123,47 @@ def Quantize(graph,
           vars_collection=vars_collection,
           bits=activation_bits)
 
+    if layer_match.post_activation_bypass_op is not None:
+      _InsertQuantOp(
+          add_context,
+          'post_activation_bypass_quant',
+          layer_match.post_activation_bypass_op,
+          input_to_ops_map.ConsumerOperations(
+              layer_match.post_activation_bypass_op),
+          is_training,
+          moving_avg=True,
+          ema_decay=ema_decay,
+          quant_delay=quant_delay,
+          vars_collection=vars_collection,
+          bits=activation_bits)
+
 
 def _FindLayersToQuantize(graph):
   """Matches layers in graph to quantize.
 
+  The following patterns get matched. Nodes surrounded by [] will be
+  optionally matched:
+
+          weight|folded_weight
+                /
+         conv|fc
+            |
+    [post_conv_correction]
+            |
+     biasadd|folded_bias
+            |
+         [bypass]
+            |
+        activation
+            |
+   [post_activation_bypass]
+
+  Match replacements:
+    If weight_folded_weight is found, FakeQuant is added afterwards.
+    If bypass is found, FakeQuant is added before and after.
+    If activation is found, FakeQuant is added afterwards.
+    If post_activation_bypass is found, FakeQuant is added afterwards.
+
   Args:
     graph: Graph to perform match on.
 
@@ -179,7 +216,7 @@ def _FindLayersToQuantize(graph):
               [bias_add_pattern, folded_bias_add_pattern])
       ])
 
-  # The input to the activation can come from bias add, fold bias add or the
+  # The input to the activation can come from bias add, fold bias add, the
   # bypasses.
   activation_pattern = graph_matcher.OpTypePattern(
       '|'.join(_ACTIVATION_TYPES),
@@ -190,7 +227,16 @@ def _FindLayersToQuantize(graph):
           ])
       ])
 
-  layer_matcher = graph_matcher.GraphMatcher(activation_pattern)
+  post_activation_bypass_pattern_a = graph_matcher.OpTypePattern(
+      'Add', inputs=['*', activation_pattern])
+  post_activation_bypass_pattern_b = graph_matcher.OpTypePattern(
+      'Add', inputs=[activation_pattern, '*'])
+
+  layer_matcher = graph_matcher.GraphMatcher(
+      graph_matcher.OneofPattern([
+          post_activation_bypass_pattern_a, post_activation_bypass_pattern_b,
+          activation_pattern
+      ]))
   for match_result in layer_matcher.match_graph(graph):
     layer_op = match_result.get_op(layer_pattern)
     weight_tensor = match_result.get_tensor(weight_pattern)
@@ -203,8 +249,19 @@ def _FindLayersToQuantize(graph):
     bypass_op = match_result.get_op(bypass_pattern_a)
     if bypass_op is None:
       bypass_op = match_result.get_op(bypass_pattern_b)
+    post_activation_bypass_op = match_result.get_op(
+        post_activation_bypass_pattern_a)
+    if post_activation_bypass_op is None:
+      post_activation_bypass_op = match_result.get_op(
+          post_activation_bypass_pattern_b)
+    # If we don't find a post_activation_bypass_op but activation_op has a
+    # bypass following it, then we need to skip this match, since there will be
+    # another match that includes post_activation_bypass_op.
+    if post_activation_bypass_op is None and _HasPostActivationBypass(
+        activation_op):
+      continue
     yield _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op,
-                      bias_add_op)
+                      post_activation_bypass_op, bias_add_op)
 
   # Match the final layer, where there will not be an activation and instead
   # the output of the final BiasAdd must be quantized, so we treat it as the
@@ -215,19 +272,32 @@ def _FindLayersToQuantize(graph):
   for match_result in final_layer_matcher.match_graph(graph):
     layer_op = match_result.get_op(layer_pattern)
     weight_tensor = match_result.get_tensor(weight_pattern)
+    if weight_tensor is None:
+      weight_tensor = match_result.get_tensor(folded_weight_pattern)
     activation_op = match_result.get_op(bias_add_pattern)
-    yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None)
+    if activation_op is None:
+      activation_op = match_result.get_op(folded_bias_add_pattern)
+    yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None, None)
+
+
+def _HasPostActivationBypass(activation_op):
+  for activation_tensor in activation_op.outputs:
+    for output_op in activation_tensor.consumers():
+      if output_op.type == 'Add':
+        return True
+  return False
 
 
 class _LayerMatch(object):
   """Contains all information related to a matched Layer."""
 
   def __init__(self, layer_op, weight_tensor, activation_op, bypass_op,
-               bias_add_op):
+               post_activation_bypass_op, bias_add_op):
     self._layer_op = layer_op
     self._weight_tensor = weight_tensor
     self._activation_op = activation_op
     self._bypass_op = bypass_op
+    self._post_activation_bypass_op = post_activation_bypass_op
     self._bias_add_op = bias_add_op
 
   @property
@@ -247,6 +317,10 @@ class _LayerMatch(object):
     return self._bypass_op
 
   @property
+  def post_activation_bypass_op(self):
+    return self._post_activation_bypass_op
+
+  @property
   def bias_add_op(self):
     return self._bias_add_op
 
index ef59475..8e60f4b 100644 (file)
@@ -135,6 +135,35 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       self.assertTrue('FakeQuantWithMinMaxVars' in
                       [op.type for op in bias_add_op.outputs[0].consumers()])
 
+  def testPostActivationBypassQuantized(self):
+    self._RunTestOverParameters(self._TestPostActivationBypassQuantized)
+
+  def _TestPostActivationBypassQuantized(self, is_training):
+    graph = ops.Graph()
+    with graph.as_default():
+      batch_size, height, width, depth = 5, 128, 128, 3
+      input1 = array_ops.zeros((batch_size, height, width, depth))
+      input2 = array_ops.zeros((batch_size, height / 2, width / 2, 32))
+      conv = conv2d(
+          input1,
+          32, [5, 5],
+          stride=2,
+          padding='SAME',
+          weights_initializer=self._WeightInit(0.09),
+          activation_fn=array_ops.identity,
+          scope='test/test')
+      bypass_tensor = math_ops.add(conv, input2, name='test/add')
+      _ = array_ops.identity(bypass_tensor, name='test/output')
+
+      quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8)
+
+      # Ensure that the bypass node is preceded and followed by
+      # FakeQuantWithMinMaxVars operations.
+      self.assertTrue('FakeQuantWithMinMaxVars' in
+                      [c.type for c in bypass_tensor.consumers()])
+      self.assertTrue('FakeQuantWithMinMaxVars' in
+                      [i.op.type for i in bypass_tensor.op.inputs])
+
   def _WeightInit(self, stddev):
     """Returns truncated normal variable initializer.