From: Francois Chollet Date: Tue, 3 Apr 2018 22:39:02 +0000 (-0700) Subject: Add ability to pass symbolic tensors as inputs and targets in calls to Model training... X-Git-Tag: tflite-v0.1.7~39^2^2~70 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1948b74779e34e5ac608ef427b3409ca0a98c5f5;p=platform%2Fupstream%2Ftensorflow.git Add ability to pass symbolic tensors as inputs and targets in calls to Model training and evaluation methods. This also works for eager tensors, but due to a slicing behavior difference between eager tensors and Numpy arrays, we have to implement a workaround (with a performance cost). PiperOrigin-RevId: 191511215 --- diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index 7baf276..3aac6a9 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -34,6 +34,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_util from tensorflow.python.layers import base as tf_base_layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops @@ -2795,6 +2796,8 @@ class Function(object): else: feed_dict = {} + session = get_session() + data_tensors_to_feed = [] for tensor, value in zip(self.inputs, inputs): if value is None: continue @@ -2803,9 +2806,20 @@ class Function(object): indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1) value = (indices, sparse_coo.data, sparse_coo.shape) - feed_dict[tensor] = value + elif tensor_util.is_tensor(value): + data_tensors_to_feed.append((tensor, value)) + else: + feed_dict[tensor] = value + + if data_tensors_to_feed: + # This is a *temporary* workaround (i.e. hack) to feed a symbolic tensor + # to `feed_dict`. It is very inefficient. It will be removed as soon + # as it becomes possible to pass symbolic tensors to `feed_dict`. + data_tensor_values = session.run([x[1] for x in data_tensors_to_feed]) + for i, v in enumerate(data_tensor_values): + feed_dict[data_tensors_to_feed[i][0]] = v + fetches = self.outputs + [self.updates_op] + self.fetches - session = get_session() updated = session.run( fetches=fetches, feed_dict=feed_dict, **self.session_kwargs) return updated[:len(self.outputs)] diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 971245c..71de657 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -1181,6 +1181,9 @@ class Model(Network): batch_size=batch_size) elif validation_split and 0. < validation_split < 1.: + if training_utils.has_symbolic_tensors(x): + raise ValueError('If your data is in the form of symbolic tensors, ' + 'you cannot use `validation_split`.') if hasattr(x[0], 'shape'): split_at = int(x[0].shape[0] * (1. - validation_split)) else: diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index 67858a5..4cdb5f1 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -31,9 +31,8 @@ from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module from tensorflow.python.keras._impl.keras.engine import training_utils -from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches -from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar -from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays +from tensorflow.python.keras._impl.keras.utils import generic_utils +from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -173,6 +172,41 @@ def _model_loss(model, inputs, targets, sample_weights=None, training=False): return outs, total_loss, loss_metrics +def slice_arrays(arrays, indices, contiguous=True): + """Slices batches out of provided arrays (workaround for eager tensors). + + Unfortunately eager tensors don't have the same slicing behavior as + Numpy arrays (they folow the same slicing behavior as symbolic TF tensors), + hence we cannot use `generic_utils.slice_arrays` directly + and we have to implement this workaround based on `concat`. This has a + performance cost. + + Arguments: + arrays: Single array or list of arrays. + indices: List of indices in the array that should be included in the output + batch. + contiguous: Boolean flag indicating whether the indices are contiguous. + + Returns: + Slice of data (either single array or list of arrays). + """ + if any(tensor_util.is_tensor(x) for x in arrays): + converted_to_list = False + if not isinstance(arrays, list): + converted_to_list = True + arrays = [arrays] + if not contiguous: + entries = [[x[i:i + 1] for i in indices] for x in arrays] + slices = [array_ops.concat(x, axis=0) for x in entries] + else: + slices = [x[indices[0]:indices[-1] + 1] for x in arrays] + if converted_to_list: + slices = slices[0] + return slices + else: + return generic_utils.slice_arrays(arrays, indices) + + def _process_single_batch(model, inputs, targets, @@ -270,9 +304,8 @@ def test_on_batch(model, inputs, targets, sample_weights=None): model, inputs, targets, sample_weights=sample_weights, training=False) if not isinstance(outs, list): outs = [outs] - metric_names, metrics_results = _eager_metrics_fn( + _, metrics_results = _eager_metrics_fn( model, outs, targets) - model.metrics_names.append(metric_names) if not isinstance(loss, list): loss = [loss] return loss + loss_metrics + metrics_results @@ -328,6 +361,12 @@ def fit_loop( Raises: ValueError: In case of invalid argument values. """ + if not batch_size: + raise ValueError('With eager execution, `batch_size` should be specified.') + if steps_per_epoch or validation_steps: + raise ValueError('With eager execution, `steps_per_epoch` and ' + '`validation_steps` are not valid arguments ' + '(set `batch_size` instead).') # Required for Eager mode with backend.learning_phase_scope(1): do_validation = False @@ -410,15 +449,18 @@ def fit_loop( elif shuffle: np.random.shuffle(index_array) - batches = make_batches(num_train_samples, batch_size) + batches = generic_utils.make_batches(num_train_samples, batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: - inputs_batch = slice_arrays(inputs, batch_ids) - targets_batch = slice_arrays(targets, batch_ids) + inputs_batch = slice_arrays(inputs, batch_ids, + contiguous=not shuffle) + targets_batch = slice_arrays(targets, batch_ids, + contiguous=not shuffle) if sample_weights: - sample_weights_batch = slice_arrays(sample_weights, batch_ids) + sample_weights_batch = slice_arrays(sample_weights, batch_ids, + contiguous=not shuffle) else: sample_weights_batch = None except TypeError: @@ -539,8 +581,8 @@ def test_loop(model, inputs, targets, feed_data, batch_size=batch_size, steps=steps, steps_name='steps') outs = [] if verbose == 1: - progbar = Progbar(target=num_samples) - batches = make_batches(num_samples, batch_size) + progbar = generic_utils.Progbar(target=num_samples) + batches = generic_utils.make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] @@ -620,12 +662,12 @@ def predict_loop(model, inputs, inputs, batch_size, steps, 'steps') if verbose == 1: if steps is not None: - progbar = Progbar(target=steps) + progbar = generic_utils.Progbar(target=steps) else: - progbar = Progbar(target=num_samples) + progbar = generic_utils.Progbar(target=num_samples) outs = [] - batches = make_batches(num_samples, batch_size) + batches = generic_utils.make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py index 8848b39..6cdb6b0 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os import numpy as np from tensorflow.python.framework import ops @@ -308,6 +307,100 @@ class TrainingTest(test.TestCase): model.compile(loss=None, optimizer='rms') + def test_model_methods_with_eager_tensors_multi_io(self): + a = keras.layers.Input(shape=(3,), name='input_a') + b = keras.layers.Input(shape=(3,), name='input_b') + + dense = keras.layers.Dense(4, name='dense') + c = dense(a) + d = dense(b) + e = keras.layers.Dropout(0.5, name='dropout')(c) + + model = keras.models.Model([a, b], [d, e]) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + loss_weights = [1., 0.5] + metrics = ['mae'] + model.compile( + optimizer, + loss, + metrics=metrics, + loss_weights=loss_weights, + sample_weight_mode=None) + + input_a = keras.backend.zeros(shape=(10, 3)) + input_b = keras.backend.zeros(shape=(10, 3)) + target_d = keras.backend.zeros(shape=(10, 4)) + target_e = keras.backend.zeros(shape=(10, 4)) + + model.fit( + [input_a, input_b], [target_d, target_e], + epochs=1, + batch_size=5, + verbose=0) + # Test: no shuffle. + model.fit( + [input_a, input_b], [target_d, target_e], + epochs=1, + batch_size=5, + verbose=0, + shuffle=False) + # Test: validation data. + model.fit([input_a, input_b], [target_d, target_e], + epochs=1, batch_size=2, verbose=0, + validation_data=([input_a, input_b], [target_d, target_e])) + model.train_on_batch([input_a, input_b], [target_d, target_e]) + model.predict([input_a, input_b], batch_size=5) + model.evaluate([input_a, input_b], [target_d, target_e], + batch_size=2, verbose=0) + model.test_on_batch([input_a, input_b], [target_d, target_e]) + + # Test: mix np and tensors. + input_b = np.zeros(shape=(10, 3)).astype('float32') + target_e = np.zeros(shape=(10, 4)).astype('float32') + model.fit( + [input_a, input_b], [target_d, target_e], + epochs=1, + batch_size=5, + verbose=0) + model.fit([input_a, input_b], [target_d, target_e], + epochs=1, batch_size=2, verbose=0, + validation_data=([input_a, input_b], [target_d, target_e])) + model.fit( + [input_a, input_b], [target_d, target_e], + epochs=1, + batch_size=5, + verbose=0, + shuffle=False) + model.train_on_batch([input_a, input_b], [target_d, target_e]) + model.predict([input_a, input_b], batch_size=5) + model.evaluate([input_a, input_b], [target_d, target_e], + batch_size=2, verbose=0) + model.test_on_batch([input_a, input_b], [target_d, target_e]) + + def test_model_methods_with_eager_tensors_single_io(self): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = keras.backend.zeros(shape=(10, 3)) + targets = keras.backend.zeros(shape=(10, 4)) + + model.fit(inputs, targets, epochs=1, batch_size=2, verbose=0) + model.fit(inputs, targets, epochs=1, batch_size=3, verbose=0, shuffle=False) + model.fit(inputs, targets, epochs=1, batch_size=4, verbose=0, + validation_data=(inputs, targets)) + model.evaluate(inputs, targets, batch_size=2, verbose=0) + model.predict(inputs, batch_size=2) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + class LossWeightingTest(test.TestCase): @@ -533,14 +626,5 @@ class LossWeightingTest(test.TestCase): if __name__ == '__main__': - # Bazel sets these environment variables to very long paths. - # Tempfile uses them to create long paths, and in turn multiprocessing - # library tries to create sockets named after paths. Delete whatever bazel - # writes to these to avoid tests failing due to socket addresses being too - # long. - for var in ('TMPDIR', 'TMP', 'TEMP'): - if var in os.environ: - del os.environ[var] - ops.enable_eager_execution() test.main() diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index fd91dbb..08fd26d 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -1117,6 +1117,121 @@ class TestTrainingUtils(test.TestCase): class TestTrainingWithDataTensors(test.TestCase): + def test_training_and_eval_methods_on_symbolic_tensors_single_io(self): + with self.test_session(): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + + optimizer = 'rmsprop' + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = keras.backend.zeros(shape=(10, 3)) + targets = keras.backend.zeros(shape=(10, 4)) + + model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0) + model.evaluate(inputs, targets, steps=2, verbose=0) + model.predict(inputs, steps=2) + model.train_on_batch(inputs, targets) + model.test_on_batch(inputs, targets) + model.fit(inputs, targets, + epochs=1, steps_per_epoch=2, verbose=0, + validation_data=(inputs, targets), validation_steps=2) + + def test_training_and_eval_methods_on_symbolic_tensors_multi_io(self): + with self.test_session(): + a = keras.layers.Input(shape=(3,), name='input_a') + b = keras.layers.Input(shape=(3,), name='input_b') + + dense = keras.layers.Dense(4, name='dense') + c = dense(a) + d = dense(b) + e = keras.layers.Dropout(0.5, name='dropout')(c) + + model = keras.models.Model([a, b], [d, e]) + + optimizer = 'rmsprop' + loss = 'mse' + loss_weights = [1., 0.5] + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics, loss_weights=loss_weights) + + input_a_tf = keras.backend.zeros(shape=(10, 3)) + input_b_tf = keras.backend.zeros(shape=(10, 3)) + + output_d_tf = keras.backend.zeros(shape=(10, 4)) + output_e_tf = keras.backend.zeros(shape=(10, 4)) + + model.fit( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + epochs=1, + steps_per_epoch=2, + verbose=0) + with self.assertRaisesRegexp(ValueError, + 'should specify the `steps_per_epoch`'): + model.fit( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + epochs=1, + batch_size=5, + verbose=0) + model.train_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) + + # Test with dictionary inputs + model.fit( + {'input_a': input_a_tf, + 'input_b': input_b_tf}, + {'dense': output_d_tf, + 'dropout': output_e_tf}, + epochs=1, + steps_per_epoch=2, + verbose=0) + model.fit( + {'input_a': input_a_tf, + 'input_b': input_b_tf}, + {'dense': output_d_tf, + 'dropout': output_e_tf}, + validation_data=({'input_a': input_a_tf, + 'input_b': input_b_tf}, + {'dense': output_d_tf, + 'dropout': output_e_tf}), + epochs=1, + steps_per_epoch=2, + validation_steps=2, + verbose=0) + model.train_on_batch( + {'input_a': input_a_tf, + 'input_b': input_b_tf}, + {'dense': output_d_tf, + 'dropout': output_e_tf}) + + # Test with validation data + model.fit( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + validation_data=([input_a_tf, input_b_tf], + [output_d_tf, output_e_tf]), + epochs=1, + steps_per_epoch=2, + validation_steps=2, + verbose=0) + # Test with validation split + with self.assertRaisesRegexp(ValueError, + 'you cannot use `validation_split`'): + model.fit( + [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + epochs=2, + steps_per_epoch=2, + verbose=0, + validation_split=0.2, + validation_steps=2) + + # Test evaluation / prediction methods + model.evaluate([input_a_tf, input_b_tf], [output_d_tf, output_e_tf], + steps=2, verbose=0) + model.predict([input_a_tf, input_b_tf], steps=2) + model.test_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) + def test_model_with_input_feed_tensor(self): """We test building a model with a TF variable as input. diff --git a/tensorflow/python/keras/_impl/keras/engine/training_utils.py b/tensorflow/python/keras/_impl/keras/engine/training_utils.py index 105638c..76537b7 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_utils.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_utils.py @@ -22,6 +22,7 @@ import copy import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import tensor_util from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import losses @@ -64,15 +65,29 @@ def check_num_samples(ins, if batch_size is not None: raise ValueError( 'If ' + steps_name + ' is set, the `batch_size` must be None.') - elif ins and hasattr(ins[0], 'shape'): - num_samples = ins[0].shape[0] - else: + if has_symbolic_tensors(ins) and steps is None: + raise ValueError('If your data is in the form of symbolic tensors, ' + 'you should specify the `' + steps_name + '` argument ' + '(instead of the `batch_size` argument).') + if ins and hasattr(ins[0], 'shape'): + num_samples = int(ins[0].shape[0]) + elif steps is None: raise ValueError( 'Either the input data should have ' 'a defined shape, or ' + steps_name + ' should be specified.') return num_samples +def standardize_single_array(x): + if x is None: + return None + elif tensor_util.is_tensor(x): + return x + elif x.ndim == 1: + x = np.expand_dims(x, 1) + return x + + def standardize_input_data(data, names, shapes=None, @@ -130,9 +145,7 @@ def standardize_input_data(data, else: data = data.values if data.__class__.__name__ == 'DataFrame' else data data = [data] - data = [ - np.expand_dims(x, 1) if x is not None and x.ndim == 1 else x for x in data - ] + data = [standardize_single_array(x) for x in data] if len(data) != len(names): if data and hasattr(data[0], 'shape'): @@ -158,7 +171,7 @@ def standardize_input_data(data, # Check shapes compatibility. if shapes: for i in range(len(names)): - if shapes[i] is not None: + if shapes[i] is not None and not tensor_util.is_tensor(data[i]): data_shape = data[i].shape shape = shapes[i] if data[i].ndim != len(shape): @@ -245,12 +258,13 @@ def check_array_lengths(inputs, targets, weights=None): """ def set_of_lengths(x): - # return a set with the variation between + # Returns a set with the variation between # different shapes, with None => 0 if x is None: return {} else: - return set([y.shape[0] for y in x if y is not None]) + return set([y.shape[0] for y in x + if y is not None and not tensor_util.is_tensor(y)]) set_x = set_of_lengths(inputs) set_y = set_of_lengths(targets) @@ -532,3 +546,8 @@ def standardize_weights(y, return weights else: return None + + +def has_symbolic_tensors(ls): + return (any(tensor_util.is_tensor(v) for v in ls) + and not context.executing_eagerly())