From 7f9ab7f8c5e161562656604d9b22939b1f97c791 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 19 Mar 2018 10:37:00 -0700 Subject: [PATCH] Documentation tweaks and tests for GradientTape with graph execution. PiperOrigin-RevId: 189604536 --- tensorflow/python/eager/backprop.py | 76 +++++++++++--------------- tensorflow/python/eager/backprop_test.py | 25 ++++++--- tensorflow/python/framework/ops.py | 35 ++++++++---- tensorflow/python/ops/resource_variable_ops.py | 32 +++++------ tensorflow/python/ops/variables.py | 4 +- 5 files changed, 88 insertions(+), 84 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 88de1a9..9b997fe 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -638,63 +638,53 @@ _default_vspace = imperative_grad.VSpace( class GradientTape(object): - """Records operations to use to compute gradients. + """Record operations for automatic differentiation. - Operations are recorded if: - - they happen in code marked by this context manager - - at least one of their inputs is being watched + Operations are recorded if they are executed within this context manager and + at least one of their inputs is being "watched". - Outputs of recorded operations are watched. Variables are automatically - watched and tensors can be manually watched by calling the watch method on the - context manager. + Variables (created by @{tf.contrib.eager.Variable} or @{tf.get_variable}) + are automatically watched. Tensors can be manually watched by invoking the + `watch` + method on this context manager. - Example usage: + For example, consider the function `y = x * x`. The gradient at `x = 3.0` can + be computed as: ```python + x = tf.constant(3.) with tfe.GradientTape() as g: - x = tf.constant(3.0) g.watch(x) y = x * x - grad = g.gradient(y, [x])[0] - assert grad.numpy() == 6.0 + grad = g.gradient(y, [x])[0] # Will compute to 6.0 ``` - It is possible to use GradientTapes to compute higher-order derivatives as - follows: + GradientTapes can be nested to compute higher-order derivatives. For example, ```python + x = tf.constant(3.0) with tfe.GradientTape() as g: - x = tf.constant(3.0) - g.watch(x) - y = x * x with tfe.GradientTape() as gg: - gg.watch(y) - z = 2 * y - inner_grad = gg.gradient(z, [y])[0] - assert inner_grad.numpy() == 2 - y = y + inner_grad - grad = g.gradient(y, [x])[0] - assert grad.numpy() == 6.0 + gg.watch(x) + y = x * x + dy_dx = gg.gradient(y, [x])[0] # Will compute to 6.0 + d2y_dx2 = g.gradient(dy_dx, [x])[0] # Will compute to 2.0 ``` By default, the resources held by a GradientTape are released as soon as - GradientTape.gradient() method is called. However, if one need to compute - multiple gradients over the same computation, she can create a persistent - GradientTape. Persistent tapes allow multiple calls to the gradient() method - and release resources when the tape object is destructed. - - Example usage: + GradientTape.gradient() method is called. To compute multiple gradients over + the same computation, create a persistent gradient tape. This allows multiple + calls to the gradient() method as resources are released when the tape object + is garbage collected. For example: ```python + x = tf.constant(3.0) with tfe.GradientTape(persistent=True) as g: - x = tf.constant(3.0) g.watch(x) y = x * x z = y * y - dz_dx = g.gradient(z, [x])[0] - assert dz_dx.numpy() == 108.0 # 4*x^3 at x = 3 - dy_dx = g.gradient(y, [x])[0] - assert dy_dx.numpy() == 6.0 + dy_dx = g.gradient(z, [x])[0] # 6.0 + dz_dx = g.gradient(y, [x])[0] # 108.0 (4*x^3 at x = 3) del g # Drop the reference to the tape """ @@ -703,8 +693,8 @@ class GradientTape(object): Args: persistent: Boolean controlling whether a persistent gradient tape - is created. Must be True or False. - + is created. False by default, which means at most one call can + be made to the gradient() method on this object. """ self._tape = None self._persistent = persistent @@ -720,7 +710,7 @@ class GradientTape(object): """Ensures that `tensor` is being traced by this tape. Args: - tensor: a Tensor or Variable a list of Tensors or Variables. + tensor: a Tensor or list of Tensors. """ for t in nest.flatten(tensor): if isinstance(t, resource_variable_ops.ResourceVariable): @@ -735,14 +725,14 @@ class GradientTape(object): key=lambda v: v.handle._id)) # pylint: disable=protected-access def gradient(self, target, sources, output_gradients=None): - """Computes the gradient using information traced by the tape. + """Computes the gradient using operations recorded in context of this tape. Args: - target: the tensor to be differentiated. - sources: a list of Tensors or Variables, the target will be - differentiated with respect to the sources. + target: Tensor to be differentiated. + sources: a list of Tensors or Variables. `target` will be differentiated + against elements in `sources`. output_gradients: a list of gradients, one for each element of - target. Defaults to None. + target. Defaults to None. Returns: a list of Tensors (or IndexedSlices, or None), one for each element in @@ -750,7 +740,7 @@ class GradientTape(object): Raises: RuntimeError: if called inside the context of the tape, or if called more - than once. + than once on a non-persistent tape. """ if self._tape is None: raise RuntimeError("GradientTape.gradient can only be called once " diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 5934293..bca2928 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -195,8 +195,10 @@ class BackpropTest(test.TestCase): g, = backprop.gradients_function(loss, [0])(logits, labels) self.assertAllEqual(g.numpy(), [[-0.5, 0.5]]) + @test_util.run_in_graph_and_eager_modes() def testGradientWithinTapeBlock(self): v1 = resource_variable_ops.ResourceVariable(1.) + self.evaluate(v1.initializer) with backprop.GradientTape() as t: loss = 2 * v1 with self.assertRaises(RuntimeError): @@ -204,7 +206,7 @@ class BackpropTest(test.TestCase): with backprop.GradientTape(persistent=True) as t: loss = 2 * v1 grad = t.gradient(loss, [v1]) - self.assertAllEqual(grad[0], 2.0) + self.assertAllEqual(self.evaluate(grad[0]), 2.0) @test_util.assert_no_new_tensors def testSecondGrad(self): @@ -367,6 +369,7 @@ class BackpropTest(test.TestCase): self.assertEqual(backprop.implicit_grad(f)()[0][0], None) @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() def testGradientTape(self): with backprop.GradientTape() as g: x = constant_op.constant(3.0) @@ -376,10 +379,10 @@ class BackpropTest(test.TestCase): gg.watch(y) z = 2 * y inner_grad = gg.gradient(z, [y])[0] - self.assertEqual(inner_grad.numpy(), 2.0) + self.assertEqual(self.evaluate(inner_grad), 2.0) y += inner_grad grad = g.gradient(y, [x])[0] - self.assertEqual(grad.numpy(), 6.0) + self.assertEqual(self.evaluate(grad), 6.0) @test_util.assert_no_new_tensors def testGradientTapeGradientCalledMultipleTimes(self): @@ -394,6 +397,7 @@ class BackpropTest(test.TestCase): g.gradient(y, [x]) @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() def testPersistentTape(self): with backprop.GradientTape(persistent=True) as g: x = constant_op.constant(3.0) @@ -401,12 +405,13 @@ class BackpropTest(test.TestCase): y = x * x z = y * y dz_dx = g.gradient(z, [x])[0] - self.assertEqual(dz_dx.numpy(), 4*3*3*3) + self.assertEqual(self.evaluate(dz_dx), 4 * 3 * 3 * 3) dy_dx = g.gradient(y, [x])[0] - self.assertEqual(dy_dx.numpy(), 2*3) + self.assertEqual(self.evaluate(dy_dx), 2 * 3) del g @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() def testPersistentNestedTape(self): with backprop.GradientTape(persistent=True) as g: x = constant_op.constant(3.0) @@ -417,22 +422,24 @@ class BackpropTest(test.TestCase): z = 2 * y for _ in range(2): inner_grad = gg.gradient(z, [y])[0] - self.assertEqual(inner_grad.numpy(), 2.0) + self.assertEqual(self.evaluate(inner_grad), 2.0) y += inner_grad del gg grad = g.gradient(y, [x])[0] - self.assertEqual(grad.numpy(), 6.0) + self.assertEqual(self.evaluate(grad), 6.0) grad = g.gradient(z, [x])[0] - self.assertEqual(grad.numpy(), 12.0) + self.assertEqual(self.evaluate(grad), 12.0) del g @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() def testGradientTapeVariable(self): v = resource_variable_ops.ResourceVariable(1.0, name='v') + self.evaluate(v.initializer) with backprop.GradientTape() as g: y = v * v grad = g.gradient(y, [v])[0] - self.assertAllEqual(grad, 2.0) + self.assertAllEqual(self.evaluate(grad), 2.0) @test_util.assert_no_new_tensors def testEmptyParamsForValueAndGradFunction(self): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 01a0e03..f1cd341 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -838,41 +838,51 @@ class _EagerTensorBase(Tensor): def set_shape(self, shape): if not self.shape.is_compatible_with(shape): raise ValueError( - "EagerTensor's shape %s is not compatible with supplied shape %s" % + "Tensor's shape %s is not compatible with supplied shape %s" % (self.shape, shape)) # Methods not supported / implemented for Eager Tensors. @property def op(self): - raise AttributeError("op not supported for Eager Tensors.") + raise AttributeError( + "Tensor.op is meaningless when eager execution is enabled.") @property def graph(self): - raise AttributeError("graph not supported for Eager Tensors.") + raise AttributeError( + "Tensor.graph is meaningless when eager execution is enabled.") @property def name(self): - raise AttributeError("name not supported for Eager Tensors.") + raise AttributeError( + "Tensor.name is meaningless when eager execution is enabled.") @property def value_index(self): - raise AttributeError("value_index not supported for Eager Tensors.") + raise AttributeError( + "Tensor.value_index is meaningless when eager execution is enabled.") def consumers(self): - raise NotImplementedError("consumers not supported for Eager Tensors.") + raise NotImplementedError( + "Tensor.consumers is meaningless when eager execution is enabled.") def _add_consumer(self, consumer): - raise NotImplementedError("_add_consumer not supported for Eager Tensors.") + raise NotImplementedError( + "_add_consumer not supported when eager execution is enabled.") def _as_node_def_input(self): raise NotImplementedError( - "_as_node_def_input not supported for Eager Tensors.") + "_as_node_def_input not supported when eager execution is enabled.") def _as_tf_output(self): - raise NotImplementedError("_as_tf_output not supported for Eager Tensors.") + raise NotImplementedError( + "_as_tf_output not supported when eager execution is enabled.") def eval(self, feed_dict=None, session=None): - raise NotImplementedError("eval not supported for Eager Tensors.") + raise NotImplementedError( + "eval is not supported when eager execution is enabled, " + "is .numpy() what you're looking for?" + ) # This call creates an EagerTensor class, as a subclass of _EagerTensorBase, and @@ -5937,8 +5947,9 @@ def get_from_proto_function(collection_name): def _assert_collection_is_ok(collection_name): if context.executing_eagerly(): if collection_name in GraphKeys._VARIABLE_COLLECTIONS: # pylint: disable=protected-access - raise ValueError("When Eager Execution is enabled, variable " - "collections are not supported.") + raise ValueError( + "variable collections are not supported when eager execution is enabled." + ) def _operation_conversion_error(op, dtype=None, name=None, as_ref=False): diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index affa7ae..df873da 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -149,7 +149,7 @@ def shape_safe_assign_variable_handle(handle, shape, value, name=None): class ResourceVariable(variables.Variable): """Variable based on resource handles. - See the @{$python/state_ops$`Variables`} documentation for more details. + See the @{$variables$Variables How To} for a high level overview. A `ResourceVariable` allows you to maintain state across subsequent calls to session.run. @@ -179,24 +179,20 @@ class ResourceVariable(variables.Variable): by edges in the graph. Consider the following example, in which two writes can cause tf.Variable and tf.ResourceVariable to behave differently: - ```python - a = tf.ResourceVariable(1.0) - a.initializer.run() - - assign = a.assign(2.0) - with tf.control_dependencies([assign]): - b = a.read_value() - with tf.control_dependencies([b]): - other_assign = a.assign(3.0) - with tf.control_dependencies([other_assign]): - # Will print 2.0 because the value was read before other_assign ran. If - # `a` was a tf.Variable instead, 2.0 or 3.0 could be printed. - tf.Print(b, [b]).eval() + ```python + a = tf.ResourceVariable(1.0) + a.initializer.run() + + assign = a.assign(2.0) + with tf.control_dependencies([assign]): + b = a.read_value() + with tf.control_dependencies([b]): + other_assign = a.assign(3.0) + with tf.control_dependencies([other_assign]): + # Will print 2.0 because the value was read before other_assign ran. If + # `a` was a tf.Variable instead, 2.0 or 3.0 could be printed. + tf.Print(b, [b]).eval() ``` - - To enforce these consistency properties tf.ResourceVariable might make more - copies than an equivalent tf.Variable under the hood, so tf.Variable is still - not deprecated. """ def __init__(self, diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 5b9947f..c37cdd9 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -125,8 +125,8 @@ class Variable(checkpointable.CheckpointableBase): @compatibility(eager) `tf.Variable` is not compatible with eager execution. Use - `tfe.Variable` instead which is compatible with both eager execution - and graph construction. See [the TensorFlow Eager Execution + `tf.contrib.eager.Variable` instead which is compatible with both eager + execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) for details on how variables work in eager execution. @end_compatibility -- 2.7.4