Automated g4 rollback of changelist 178963334

author Igor Saprykin <isaprykin@google.com>

Thu, 14 Dec 2017 02:30:33 +0000 (18:30 -0800)

committer TensorFlower Gardener <gardener@tensorflow.org>

Thu, 14 Dec 2017 02:34:08 +0000 (18:34 -0800)
author Igor Saprykin <isaprykin@google.com>
Thu, 14 Dec 2017 02:30:33 +0000 (18:30 -0800)
committer TensorFlower Gardener <gardener@tensorflow.org>
Thu, 14 Dec 2017 02:34:08 +0000 (18:34 -0800)
diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD

index bd65ece85d2bfc6b38ba3507d3e702241eaf6067..ba272d7e885434eb556cbafd3d9e64a50d21f9b2 100644 (file)
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -331,17 +331,16 @@ py_library(
          "//tensorflow/python:device",
          "//tensorflow/python:device_lib",
          "//tensorflow/python:framework_ops",
+        "//tensorflow/python:gradients",
          "//tensorflow/python:math_ops",
          "//tensorflow/python:platform",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
          "//tensorflow/python:state_ops",
          "//tensorflow/python:training",
          "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
          "//tensorflow/python/estimator:export_output",
          "//tensorflow/python/estimator:model_fn",
          "//tensorflow/python/estimator:util",
-        "//tensorflow/python/ops/losses",
          "@six_archive//:six",
      ],
  )
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py

index 2177ae2366fe7504af14dcd626ca31ed4b8055f9..ca3a2394ee227f2ab78e6d4d3d882f2b10954699 100644 (file)
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -41,25 +41,21 @@ from tensorflow.python.ops import math_ops
  from tensorflow.python.ops import sparse_ops
  from tensorflow.python.ops import state_ops
  from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops.losses import losses
  from tensorflow.python.platform import tf_logging
  from tensorflow.python.training import device_setter as device_setter_lib
  from tensorflow.python.training import training_util
  
  
-def replicate_model_fn(model_fn,
-                       optimizer_fn,
-                       loss_reduction=losses.Reduction.SUM,
-                       devices=None):
+def replicate_model_fn(model_fn, optimizer_fn, devices=None):
    """Replicate `Estimator.model_fn` over GPUs within a single host.
  
    The given `model_fn` specifies a single forward pass of a model.  To replicate
    such a model over GPUs, each GPU gets its own instance of the forward pass
    (a.k.a. a tower).  The input features and labels get sharded into the chunks
-  that correspond to the number of GPUs.  Each tower computes a loss based
+  that correspond to the number of GPUs.  Each tower computes its own loss based
    on its input.  For each such loss, gradients are computed.  After that, the
-  available losses are aggregated to form aggregated loss.  Available
-  gradients are summed.  Then, they update weights using the specified
+  available losses are summed to form aggregated loss.  The available
+  gradients are summed too.  Then, they update weights using the specified
    optimizer.
  
    If `devices` are `None`, then all available GPUs are going to be used for
@@ -106,7 +102,7 @@ def replicate_model_fn(model_fn,
    On reduction algorithms:
    Certain algorithms were chosen for aggregating results of computations on
    multiple towers:
-    - Losses from all towers are reduced according to `loss_reduction`.
+    - Losses from all towers are reduced using sum.
      - Gradients are reduced using sum for each trainable variable.
      - `eval_metrics_ops` are reduced per metric using `reduce_mean`.
      - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are
@@ -128,7 +124,6 @@ def replicate_model_fn(model_fn,
      optimizer_fn: a function that returns an optimizer instance.  The function
        may accept one `params` argument.  This is the `params` argument as
        defined by `Estimator`.  See  the `Estimator` documentation for details.
-    loss_reduction: controls whether losses are summed or averaged.
      devices: Optional list of devices to replicate the model across.  This
        argument can be used to replice only on the subset of available GPUs.
        If `None`, then all available GPUs are going to be used for replication.
@@ -142,11 +137,9 @@ def replicate_model_fn(model_fn,
    return _replicate_model_fn_with_mode(
        model_fn,
        optimizer_fn,
-      loss_reduction,
        devices,
-      # TODO(isaprykin): Query the system configuration to choose modes other
-      # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often
-      # appropriate.
+      # TODO(isaprykin): Query system configuration to choose modes other than
+      # `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often appropriate.
        mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER)
  
  
@@ -178,13 +171,9 @@ class _VariableDistributionMode(object):
  def _replicate_model_fn_with_mode(
      model_fn,
      optimizer_fn,
-    loss_reduction=losses.Reduction.SUM,
      devices=None,
      mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER):
    """A version of `replicate_model_fn` that allows to specify a `mode`."""
-  if loss_reduction == losses.Reduction.NONE:
-    raise ValueError('Tower losses need to be reduced in some way, yet {} '
-                     'reduction is specified.'.format(loss_reduction))
    if not devices:
      devices = _get_local_devices('GPU') or _get_local_devices('CPU')
  
@@ -210,7 +199,6 @@ def _replicate_model_fn_with_mode(
          features=feature_shards,
          labels=label_shards,
          params=params,
-        loss_reduction=loss_reduction,
          config=config,
          devices=devices,
          local_ps_devices=ps_devices)
@@ -281,7 +269,6 @@ def _get_loss_towers(model_fn,
                       config,
                       devices,
                       local_ps_devices,
-                     loss_reduction=losses.Reduction.SUM,
                       name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN):
    """Replicate the loss computation across devices."""
    tower_specs = []
@@ -320,15 +307,12 @@ def _get_loss_towers(model_fn,
            if labels:
              labels_shard = labels[i]
  
-          tower_spec = model_fn(
-              mode=mode,
-              features=features[i],
-              labels=labels_shard,
-              **optional_params)
-          if loss_reduction != losses.Reduction.SUM:
-            tower_spec = _scale_tower_loss(
-                tower_spec, number_of_towers=len(devices))
-          tower_specs.append(tower_spec)
+          tower_specs.append(
+              model_fn(
+                  mode=mode,
+                  features=features[i],
+                  labels=labels_shard,
+                  **optional_params))
    return tower_specs
  
  
@@ -355,17 +339,6 @@ def _local_device_setter(worker_device, ps_devices, ps_strategy):
    return local_device_chooser
  
  
-def _scale_tower_loss(tower_spec, number_of_towers):
-  """Scale down the loss for arriving at the average loss by summing."""
-  if tower_spec.loss is None:
-    return tower_spec
-
-  estimator_spec = tower_spec._asdict()
-  estimator_spec['loss'] = math_ops.div(
-      estimator_spec['loss'], 1.0 * number_of_towers, name='averaged_loss')
-  return model_fn_lib.EstimatorSpec(**estimator_spec)
-
-
  def _minimize_towers(tower_specs, optimizer):
    """Aggregate and apply gradients for computed losses."""
    grad_lists = {}
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py

index c1b4e7b1a7bcebb0eff4241a968f28b24801db86..a83a1b84079f115f94be33297f0ab0e2e8f2f7e3 100644 (file)
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -40,7 +40,6 @@ from tensorflow.python.framework import ops as ops_lib
  from tensorflow.python.framework import test_util
  from tensorflow.python.ops import array_ops
  from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import losses
  from tensorflow.python.ops import math_ops
  from tensorflow.python.ops import metrics as metrics_lib
  from tensorflow.python.ops import variable_scope
@@ -222,40 +221,13 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
        total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
        self.assertEqual(total_loss, session.run(estimator_spec.loss))
  
-      # derivative of loss = (1*c - 1) + (2*c - 2) is 3.
+      # loss' of c is 3.
        # new value of c = 10 - learning rate * 3 = 7.0.
        session.run(estimator_spec.train_op)
        with variable_scope.variable_scope('', reuse=True):
          c = variable_scope.get_variable('c', dtype=dtypes.float64)
          self.assertEqual(7.0, session.run(c))
  
-  def test_train_with_mean_reduction(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.test_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn,
-          self.optimizer_fn,
-          losses.Reduction.MEAN,
-          devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-      session.run(variables.global_variables_initializer())
-
-      # loss = feature * c - label
-      total_loss = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)) / 2.0
-      self.assertEqual(total_loss, session.run(estimator_spec.loss))
-
-      # derivative of loss = (1*c - 1)/2 + (2*c - 2)/2 is 1.5.
-      # It's the same computation as without mean reduction, but the
-      # loss from every tower is scaled by 1/<number of towers>.
-      # new value of c = 10 - learning rate * 1.5 = 8.5
-      session.run(estimator_spec.train_op)
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(8.5, session.run(c))
-
    def test_train_spec_with_optimizer_without_params(self):
  
      def optimizer_fn_without_params():
@@ -304,38 +276,6 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
        self.assertEqual(0, auc)
        self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
  
-  def test_eval_with_mean_reduction(self):
-    features = np.array([[0.01], [0.002]])
-    labels = np.array([[0.01], [0.02]])
-
-    with self.test_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn,
-          self.optimizer_fn,
-          losses.Reduction.MEAN,
-          devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
-      session.run(variables.local_variables_initializer())
-      session.run(variables.global_variables_initializer())
-
-      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
-      auc, b = estimator_spec.eval_metric_ops['auc']
-
-      session.run([a, b])
-      accuracy = session.run(accuracy)
-      auc = session.run(auc)
-
-      # loss[i] = features[i] * 10 - labels[i].
-      # Accuracy is 0.0 (no match) in the first tower.
-      # Accuracy is 1.0 (match) in the second tower, since the feature
-      # times weight "c" happened to be equal to the label.
-      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) / 2.0
-
-      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
-      self.assertEqual(0, auc)
-      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
-
    def test_predict(self):
      features = np.array([[0.01], [0.002]])
      labels = np.array([[0.01], [0.02]])
@@ -416,11 +356,6 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
            'probabilities': np.array([[0.1], [0.02]])
        }, session.run(estimator_spec.predictions))
  
-  def test_unsupported_loss_reduction(self):
-    with self.assertRaisesRegexp(ValueError, ''):
-      _ = replicate_model_fn.replicate_model_fn(
-          self.model_fn, self.optimizer_fn, losses.Reduction.NONE)
-
  
  class GetLossTowersTest(test_util.TensorFlowTestCase):
  
@@ -471,40 +406,6 @@ class GetLossTowersTest(test_util.TensorFlowTestCase):
          c = variable_scope.get_variable('c', dtype=dtypes.float64)
          self.assertEqual(0.25, session.run(c))
  
-  def test_gradients_are_computed_with_mean_reduction(self):
-    with self.test_session() as session:
-      tower_specs = replicate_model_fn._get_loss_towers(
-          self.model_fn,
-          mode=None,
-          features=[[0.6], [1.6]],
-          labels=[[0.6], [0.6]],
-          params=None,
-          loss_reduction=losses.Reduction.MEAN,
-          config=None,
-          devices=['/gpu:0', '/gpu:1'],
-          local_ps_devices=['/gpu:0'],
-          name_scope_pattern='test_tower_{}')
-      session.run(variables.global_variables_initializer())
-
-      self.assertEqual(len(tower_specs), 2)
-
-      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
-      self.assertEqual('averaged_loss:0', tower_specs[0].loss.name)
-      self.assertEqual(0.5, session.run(tower_specs[0].loss))
-
-      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
-      self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name)
-      # The input batch for the second tower had a loss that is 1.0
-      # bigger: 0.6 vs 1.6.
-      self.assertEqual(1.0, session.run(tower_specs[1].loss))
-
-      self.assertEqual(1, len(variables.global_variables()))
-      self.assertEqual(1, len(variables.trainable_variables()))
-
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(0.25, session.run(c))
-
    def test_variables_are_round_robined_correctly(self):
      """Test that creates multiple variables and tests round-robin placement."""
author	Igor Saprykin <isaprykin@google.com>
	Thu, 14 Dec 2017 02:30:33 +0000 (18:30 -0800)
committer	TensorFlower Gardener <gardener@tensorflow.org>
	Thu, 14 Dec 2017 02:34:08 +0000 (18:34 -0800)
tensorflow/contrib/estimator/BUILD		patch \| blob \| history
tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py		patch \| blob \| history
tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py		patch \| blob \| history