self.assertAllEqual(c([[-2.0, -10.0]]), grad_vals[1])
def testTensorArrayGradientWriteRead(self):
- for dtype in self.numeric_types:
+ for dtype in self.float_types:
+ self._testTensorArrayGradientWriteReadType(dtype)
+ for dtype in self.complex_types:
self._testTensorArrayGradientWriteReadType(dtype)
def _testTensorArrayGradientWritePackConcatAndRead(self):
def testUnbatchGrad(self):
"""Tests that batch and unbatch are differentiable."""
with self.test_session() as sess:
- inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
+ inp = array_ops.placeholder(dtype=dtypes.float32, shape=[1])
batched, index, id_t = batch_ops.batch(
[inp], num_batch_threads=1, max_batch_size=2,
batch_timeout_micros=36000000, grad_timeout_micros=1000000,
def testCompilationInGradient(self):
with self.test_session():
- x = constant_op.constant([[3]])
+ x = constant_op.constant([[3.]])
y_nc = math_ops.matmul(x, x, name="not_compiled")
with jit.experimental_jit_scope():
y_c = math_ops.matmul(y_nc, y_nc, name="compiled")
with self.test_session(graph=ops.Graph()):
with jit.experimental_jit_scope():
# XlaScope 0
- a1 = constant_op.constant([[1]])
+ a1 = constant_op.constant([[1.]])
a1t = math_ops.matmul(a1, a1)
with jit.experimental_jit_scope():
# XlaScope 1
- a2 = constant_op.constant([[1]])
+ a2 = constant_op.constant([[1.]])
a2t = math_ops.matmul(a2, a2)
self.assertEqual(b"jit_scope_0", a1.op.get_attr("_XlaScope"))
with self.test_session(graph=ops.Graph()):
with jit.experimental_jit_scope(True, separate_compiled_gradients=True):
# XlaScope 0
- a1 = constant_op.constant([[1]])
+ a1 = constant_op.constant([[1.]])
a1t = math_ops.matmul(a1, a1)
with jit.experimental_jit_scope(True, separate_compiled_gradients=True):
# XlaScope 1
- a2 = constant_op.constant([[1]])
+ a2 = constant_op.constant([[1.]])
a2t = math_ops.matmul(a2, a2)
self.assertEqual(b"jit_scope_0", a1.op.get_attr("_XlaScope"))
class IteratorTest(test.TestCase):
- def testAttemptingGradientsRaiseExceptions(self):
- component = constant_op.constant([1])
- side = constant_op.constant(0)
+ def testNoGradients(self):
+ component = constant_op.constant([1.])
+ side = constant_op.constant(0.)
add = lambda x: x + side
dataset = dataset_ops.Dataset.from_tensor_slices(component).map(add)
value = dataset.make_one_shot_iterator().get_next()
- with self.assertRaisesRegexp(LookupError, "No gradient defined"):
- gradients_impl.gradients(value, component)
- with self.assertRaisesRegexp(LookupError, "No gradient defined"):
- gradients_impl.gradients(value, side)
- with self.assertRaisesRegexp(LookupError, "No gradient defined"):
- gradients_impl.gradients(value, [component, side])
+ self.assertIsNone(gradients_impl.gradients(value, component)[0])
+ self.assertIsNone(gradients_impl.gradients(value, side)[0])
+ self.assertIsNone(gradients_impl.gradients(value, [component, side])[0])
def testCapturingStateInOneShotRaisesException(self):
var = variables.Variable(37.0, name="myvar")
def g(x):
return backprop.gradients_function(f, [0])(x)[0]
- self.assertAllEqual(2, g(constant_op.constant(2)))
+ self.assertAllEqual(2, g(constant_op.constant(2.)))
def testGraphModeEagerGradError(self):
with context.graph_mode():
# Create a simple while loop.
with ops.Graph().as_default():
with ops.name_scope("export"):
- var = variables.Variable(0)
+ var = variables.Variable(0.)
var_name = var.name
- _, output = control_flow_ops.while_loop(lambda i, x: i < 5,
- lambda i, x: (i + 1, x + i),
- [0, var])
+ _, output = control_flow_ops.while_loop(
+ lambda i, x: i < 5,
+ lambda i, x: (i + 1, x + math_ops.cast(i, dtypes.float32)),
+ [0, var])
output_name = output.name
# Generate a MetaGraphDef containing the while loop with an export scope.
analytic_grad2 = 2 * slice_val
dy = variables.Variable(
- array_ops.ones(shape=slice_var.get_shape(), dtype=dtypes.int32))
+ array_ops.ones(shape=slice_var.get_shape(), dtype=dtypes.float32))
assign = dy.assign(slice_var)
slice_val_grad, = gradients_impl.gradients(slice_val, self.var, grad_ys=dy)
slice_val_grad2, = gradients_impl.gradients(
def testGradient(self):
with self.test_session(use_gpu=True) as sess:
var = variables.Variable(
- array_ops.reshape(math_ops.range(1, 97, 1), shape=(6, 4, 4)))
+ array_ops.reshape(
+ math_ops.range(1, 97, 1, dtype=dtypes.float32), shape=(6, 4, 4)))
init = variables.global_variables_initializer()
sess.run(init)
def testGradientZero(self):
with self.test_session(use_gpu=True) as sess:
- var = variables.Variable(8)
+ var = variables.Variable(8.)
init = variables.global_variables_initializer()
sess.run(init)
grad = GradSliceChecker(self, sess, var, np.array(8))
def testInt64Indices(self):
with self.test_session(use_gpu=True) as sess:
- a = math_ops.range(3)
+ a = math_ops.range(3, dtype=dtypes.float32)
index = constant_op.constant(1, dtype=dtypes.int64)
- b = 2 * a[index]
+ b = 2. * a[index]
grad, = gradients_impl.gradients(b, a)
- self.assertAllEqual(sess.run(grad), [0, 2, 0])
+ self.assertAllEqual(sess.run(grad), [0., 2., 0.])
class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
def testWhileWithRefsWithGradients_1(self):
with self.test_session() as sess:
- x = variables.Variable(0)._ref() # pylint: disable=protected-access
+ x = variables.Variable(0.)._ref() # pylint: disable=protected-access
i = constant_op.constant(0)
c = lambda i, x: math_ops.less(i, 10)
- self.assertEqual(x.dtype, dtypes.int32_ref)
+ self.assertEqual(x.dtype, dtypes.float32_ref)
def body(i, x):
- self.assertEqual(x.dtype, dtypes.int32_ref)
+ self.assertEqual(x.dtype, dtypes.float32_ref)
return [i + 1, gen_array_ops.ref_identity(x)]
r = control_flow_ops.while_loop(c, body, [i, x], parallel_iterations=5)
variables.global_variables_initializer().run()
self.assertEqual(r[0].dtype, dtypes.int32)
- self.assertEqual(r[1].dtype, dtypes.int32_ref)
+ self.assertEqual(r[1].dtype, dtypes.float32_ref)
value_i, value_x, value_x_grad = sess.run(r + grad)
r = gradients_impl.gradients(r, y)[0]
self.assertEqual(388.0, r.eval())
+ def testWhileGradientWithNontrainablePath1(self):
+ q = variables.Variable([7., 8.])
+
+ def cond(_, y):
+ del y
+ return False
+
+ def body(x, _):
+ return x, math_ops.cast(x, dtypes.float32) + math_ops.reduce_sum(q)
+
+ _, y = control_flow_ops.while_loop(cond, body, (math_ops.argmin(q), 0.))
+ dy_dq, = gradients_impl.gradients(y, q)
+ self.assertIsNotNone(dy_dq)
+ with self.test_session() as sess:
+ sess.run(q.initializer)
+ self.assertAllClose([0., 0.], sess.run(dy_dq))
+
+ def testWhileGradientWithNontrainablePath2(self):
+ q = variables.Variable([7., 8.])
+
+ def cond(_, y):
+ return math_ops.equal(y, 0.)
+
+ def body(x, _):
+ zero = constant_op.constant(0, dtype=dtypes.int64)
+ return zero, math_ops.cast(x, dtypes.float32) + math_ops.reduce_sum(q)
+
+ _, y = control_flow_ops.while_loop(cond, body, (math_ops.argmin(q), 0.))
+ dy_dq, = gradients_impl.gradients(y, q)
+ self.assertIsNotNone(dy_dq)
+ with self.test_session() as sess:
+ sess.run(q.initializer)
+ self.assertAllClose([1., 1.], sess.run(dy_dq))
+
+ def testIssue16504(self):
+ c = constant_op.constant(np.arange(100), dtype=dtypes.float32)
+ w = variables.Variable(
+ initial_value=np.ones(100), dtype=dtypes.float32) / 100
+ k = variables.Variable(0, dtype=dtypes.int32)
+ chg_w = constant_op.constant(np.inf, dtype=dtypes.float32)
+
+ def cond(k, _, chg_w):
+ return math_ops.logical_and(k < 10, chg_w > 1e-3)
+
+ def body(k, w, chg_w):
+ grad, = gradients_impl.gradients(-math_ops.reduce_sum(w * c), w)
+ w_n = w * math_ops.exp(-0.1 * grad)
+ w_n /= math_ops.reduce_sum(w_n)
+ chg_w = (
+ math_ops.reduce_sum(math_ops.abs(w_n - w)) / math_ops.reduce_sum(
+ math_ops.abs(w)))
+ return k + 1, w_n, chg_w
+
+ _, w, _ = control_flow_ops.while_loop(cond, body, [k, w, chg_w])
+ grad, = gradients_impl.gradients(w, c)
+ self.assertIsNotNone(grad)
+
def testStopGradMultiFlows(self):
with self.test_session():
constant_op.constant([[5, 2], [0, 3]])
]
data = [
- constant_op.constant([61, 62]),
- constant_op.constant([[41, 42], [11, 12]]),
- constant_op.constant([[[51, 52], [21, 22]], [[1, 2], [31, 32]]])
+ constant_op.constant([61., 62.]),
+ constant_op.constant([[41., 42.], [11., 12.]]),
+ constant_op.constant([[[51., 52.], [21., 22.]],
+ [[1., 2.], [31., 32.]]])
]
stitched_t = self.stitch_op(indices, data)
stitched_val = stitched_t.eval()
- correct = 10 * np.arange(7)[:, None] + [1, 2]
+ correct = 10. * np.arange(7)[:, None] + [1., 2.]
self.assertAllEqual(correct, stitched_val)
self.assertEqual([7, 2], stitched_t.get_shape().as_list())
# Test gradients
- stitched_grad = 7 * stitched_val
+ stitched_grad = 7. * stitched_val
grads = gradients_impl.gradients(stitched_t, indices + data,
stitched_grad)
self.assertEqual(grads[:3], [None] * 3) # Indices have no gradients
for datum, grad in zip(data, sess.run(grads[3:])):
- self.assertAllEqual(7 * datum.eval(), grad)
+ self.assertAllEqual(7. * datum.eval(), grad)
def testErrorIndicesMultiDimensional(self):
indices = [
# [dexp(x)/dx + d(log(exp(x)))/dx] @ x=1 == exp(1) + 1
self.assertAllClose(grad_vals[0], exp1_plus_one)
+ def testIdentityGradient(self):
+ x = constant_op.constant(3.)
+ dx_dx, = gradients_impl.gradients(x, x)
+ with self.test_session() as sess:
+ self.assertAllClose(1., sess.run(dx_dx))
+
+ def testIntegerIdentityGradient(self):
+ x = constant_op.constant(3)
+ dx_dx, = gradients_impl.gradients(x, x)
+ with self.test_session() as sess:
+ self.assertAllClose(1, sess.run(dx_dx))
+
+ def testGradientWithIntegerPath(self):
+ x = constant_op.constant([3.9, 4.1])
+ k = math_ops.to_float(math_ops.to_int32(x))
+ y = x * k
+ dy_dx, = gradients_impl.gradients(y, x)
+ with self.test_session() as sess:
+ self.assertAllClose([3., 4.], sess.run(dy_dx))
+
+ def testNoIntegerGradient1(self):
+ x = constant_op.constant([3.9, 4.1])
+ k = math_ops.to_float(math_ops.to_int32(x))
+ y = k * k
+ dy_dx, = gradients_impl.gradients(y, x)
+ self.assertIsNone(dy_dx)
+
+ def testNoIntegerGradient2(self):
+ k = constant_op.constant([3, 4])
+ x = math_ops.to_float(k)
+ y = x * x
+ dy_dk, = gradients_impl.gradients(y, k)
+ self.assertIsNone(dy_dk)
+
+ def testNoIntegerGradient3(self):
+ k = constant_op.constant([3, 4])
+ m = k * k
+ dm_dk, = gradients_impl.gradients(m, k)
+ self.assertIsNone(dm_dk)
+
+ def testNoIntegerGradient4(self):
+ k = constant_op.constant([3, 4])
+ m = k * k * k
+ dm_dk, = gradients_impl.gradients(m, k)
+ self.assertIsNone(dm_dk)
+
+ def testNoIntegerGradient5(self):
+ k = constant_op.constant([3, 4])
+ m = k * k
+ n = m * m
+ dn_dk, = gradients_impl.gradients(n, k)
+ self.assertIsNone(dn_dk)
+
+ def testNoIntegerGradient6(self):
+ k = constant_op.constant(3)
+ x = math_ops.to_float(k)
+ grad_1, = gradients_impl.gradients(k * k, k)
+ grad_2, = gradients_impl.gradients(x * x, k)
+ grad_3, = gradients_impl.gradients(math_ops.square(k), k)
+ grad_4, = gradients_impl.gradients(math_ops.square(x), k)
+ self.assertIsNone(grad_1)
+ self.assertIsNone(grad_2)
+ self.assertIsNone(grad_3)
+ self.assertIsNone(grad_4)
+
if __name__ == '__main__':
test.main()
def testGradients(self):
with self.test_session(use_gpu=False) as sess:
- inputs = array_ops.placeholder(dtypes.int32, shape=[3, 5])
+ inputs = array_ops.placeholder(dtypes.float32, shape=[3, 5])
values = nn_ops.nth_element(inputs, 3)
grad = sess.run(
gradients_impl.gradients(
values, inputs, grad_ys=[[-1., 2., 5.]]),
- feed_dict={inputs: [[2, -1, 1000, 3, 1000],
- [1, 5, 2, 4, 3],
- [2, 2, 2, 2, 2],
+ feed_dict={inputs: [[2., -1., 1000., 3., 1000.],
+ [1., 5., 2., 4., 3.],
+ [2., 2., 2., 2., 2.],
]})
self.assertAllClose(grad[0], [[0, 0, -0.5, 0, -0.5],
[0, 0, 0, 2, 0],
self.assertAllEqual(c(-2.0), grad_vals[1])
def testTensorArrayGradientWriteRead(self):
- for dtype in (np.float32, np.float64, np.int32, np.int64, np.complex64,
- np.complex128):
+ for dtype in (np.float32, np.float64, np.complex64, np.complex128):
self._testTensorArrayGradientWriteReadType(dtype)
def _testTensorArrayGradientWritePackConcatAndRead(self):
def testTopKGradients(self):
with self.test_session(use_gpu=True) as sess:
- inputs = array_ops.placeholder(dtypes.int32, shape=[2, 5])
+ inputs = array_ops.placeholder(dtypes.float32, shape=[2, 5])
values, _ = nn_ops.top_k(inputs, 3)
grad = sess.run(
gradients_impl.gradients(
- values, inputs, grad_ys=[[[1, 2, 3], [4, 5, 6]]]),
- feed_dict={inputs: [[2, -1, 1000, 3, 4], [1, 5, 2, 4, 3]]})[0]
- self.assertEqual(grad.tolist(), [[0, 0, 1, 3, 2], [0, 4, 0, 5, 6]])
+ values, inputs, grad_ys=[[[1., 2., 3.], [4., 5., 6.]]]),
+ feed_dict={inputs: [[2., -1., 1000., 3., 4.],
+ [1., 5., 2., 4., 3.]]})[0]
+ self.assertEqual(
+ grad.tolist(), [[0., 0., 1., 3., 2.], [0., 4., 0., 5., 6.]])
class TopKBenchmark(test.Benchmark):
if not reached_ops[op._id]:
reached_ops[op._id] = True
for output in op.outputs:
- queue.extend(output.consumers())
+ if _IsBackpropagatable(output):
+ queue.extend(output.consumers())
def _GatherInputs(to_ops, reached_ops):
colocate_gradients_with_ops: Python bool. See docstring of gradients().
Returns:
- A tuple containing: (1) a list of integers indexed by operation id,
- indicating the number of backprop inputs to this operation, and (2)
- a ControlFlowState object which is not None if the ops between from_ops
- and to_ops contain control flow loops.
+ A tuple containing: (1) the subset of to_ops ids reachable from from_ops
+ by a path of zero or more backpropagatable tensors, (2) a list of integers
+ indexed by operation id, indicating the number of backprop inputs to this
+ operation, and (3) a ControlFlowState object which is not None if the ops
+ between from_ops and to_ops contain control flow loops.
"""
# Mark reachable ops from from_ops.
reached_ops = [False] * (graph._last_id + 1)
- for op in to_ops:
- reached_ops[op._id] = True
_MarkReachedOps(from_ops, reached_ops)
+ # reached_ops[X] iff X is reachable from from_ops by a path of zero or more
+ # backpropagatable tensors.
+
+ reachable_to_ops = set(op._id for op in to_ops if reached_ops[op._id]) # pylint: disable=protected-access
# Mark between ops.
between_ops = [False] * (graph._last_id + 1)
reached_ops[op._id] = False
for inp in op.inputs:
queue.append(inp.op)
+ # between_ops[X] iff X is on a path of zero or more backpropagatable tensors
+ # between from_ops and to_ops
# 'loop_state' is None if there are no while loops.
loop_state = control_flow_ops.MaybeCreateControlFlowState(
if between_ops[x.op._id]:
pending_count[x.op._id] += 1
- return pending_count, loop_state
+ return reachable_to_ops, pending_count, loop_state
def _AsList(x):
dtypes.complex64, dtypes.complex128)
+def _IsBackpropagatable(tensor):
+ if _IsTrainable(tensor):
+ return True
+ dtype = dtypes.as_dtype(tensor.dtype)
+ return dtype.base_dtype in (dtypes.bfloat16, dtypes.resource, dtypes.variant)
+
+
def _VerifyGeneratedGradients(grads, op):
"""Verify that gradients are valid in number and type.
backpropagation stops at both `tf.stop_gradient` nodes and nodes in
`stop_gradients`, whichever is encountered first.
+ All integer tensors are considered constant with respect to all `xs`, as if
+ they were included in `stop_gradients`.
+
Args:
ys: A `Tensor` or list of tensors to be differentiated.
xs: A `Tensor` or list of tensors to be used for differentiation.
to_ops = [t.op for t in ys]
from_ops = [t.op for t in xs]
stop_gradient_ops = [t.op for t in stop_gradients]
- pending_count, loop_state = _PendingCount(
+ reachable_to_ops, pending_count, loop_state = _PendingCount(
ops.get_default_graph(), to_ops, from_ops, colocate_gradients_with_ops)
# Iterate over the collected ops.
# another output's gradient.
# pylint: disable=protected-access
ready = (pending_count[op._id] == 0)
- if ready and op._id not in to_ops_set:
+ if ready and op._id not in to_ops_set and op._id in reachable_to_ops:
to_ops_set.add(op._id)
queue.append(op)
# pylint: enable=protected-access