From 015c1d84f714c651f401a19cdb709ad9c91561e1 Mon Sep 17 00:00:00 2001
From: Akshay Agrawal <akshayka@google.com>
Date: Thu, 24 May 2018 10:58:47 -0700
Subject: [PATCH] Fix convert_to_tensor logic in GradientDescentOptimizer's
 _prepare method

Previously, eagerly executing an optimizer that had been used in a `defun`
led to a cryptic error because the learning rate tensor supplied to the update
op was in fact a vestigial graph Tensor.

PiperOrigin-RevId: 197919104
---
 tensorflow/python/training/gradient_descent.py     |  3 ++-
 .../python/training/gradient_descent_test.py       | 23 ++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/training/gradient_descent.py b/tensorflow/python/training/gradient_descent.py
index 6caf29d..a07ad19 100644
--- a/tensorflow/python/training/gradient_descent.py
+++ b/tensorflow/python/training/gradient_descent.py
@@ -71,6 +71,7 @@ class GradientDescentOptimizer(optimizer.Optimizer):
     return var.scatter_sub(delta, use_locking=self._use_locking)
 
   def _prepare(self):
-    if not context.executing_eagerly() or self._learning_rate_tensor is None:
+    if not context.executing_eagerly() or not isinstance(
+        self._learning_rate_tensor, ops.EagerTensor):
       self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate,
                                                          name="learning_rate")
diff --git a/tensorflow/python/training/gradient_descent_test.py b/tensorflow/python/training/gradient_descent_test.py
index 5370caf..f89a9c5 100644
--- a/tensorflow/python/training/gradient_descent_test.py
+++ b/tensorflow/python/training/gradient_descent_test.py
@@ -18,6 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -218,6 +221,26 @@ class GradientDescentOptimizerTest(test.TestCase):
         self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
                                            var1.eval())
 
+  def testCapturingInDefunWhileExecutingEagerly(self):
+    with context.eager_mode():
+      optimizer = gradient_descent.GradientDescentOptimizer(1.0)
+
+      def step():
+        v = resource_variable_ops.ResourceVariable(1.0)
+        with backprop.GradientTape() as tape:
+          loss = v ** 2
+        grad = tape.gradient(loss, v)
+        optimizer.apply_gradients([(grad, v)])
+        return v.read_value()
+
+      compiled_step = function.defun(step)
+
+      self.assertEqual(float(step()), -1.0)
+      self.assertEqual(float(compiled_step()), -1.0)
+      # This shouldn't fail; in particular, the learning rate tensor should
+      # be an EagerTensor once again, not a graph Tensor.
+      self.assertEqual(float(step()), -1.0)
+
 
 if __name__ == "__main__":
   test.main()
-- 
2.7.4