From 6d014ecbd63fec208742b327b94c39afd4953fb8 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 2 Mar 2018 15:11:13 -0800 Subject: [PATCH] ReadVariableOp in C for eager (only for the fastpath) PiperOrigin-RevId: 187676012 --- tensorflow/python/eager/benchmarks_test.py | 21 ++ tensorflow/python/eager/pywrap_tfe.h | 7 + tensorflow/python/eager/pywrap_tfe_src.cc | 460 +++++++++++++++++-------- tensorflow/python/eager/pywrap_tfe_test.py | 31 ++ tensorflow/python/ops/resource_variable_ops.py | 4 + tensorflow/python/pywrap_tfe.i | 1 + 6 files changed, 377 insertions(+), 147 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 527a919..551d564 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -275,6 +275,16 @@ class MicroBenchmarks(test.Benchmark): def _benchmark_read_variable(self, m, num_iters): self._run(m.value, num_iters) + def _benchmark_matmul_read_variable(self, m, num_iters): + self._benchmark_gen_math_ops_matmul( + m, transpose_b=False, num_iters=num_iters) + + def _benchmark_matmul_read_variable_with_tape(self, m, num_iters): + with backprop.GradientTape() as tape: + tape.watch(m) + self._benchmark_gen_math_ops_matmul( + m, transpose_b=False, num_iters=num_iters) + def _benchmark_read_variable_with_tape(self, m, num_iters): with backprop.GradientTape() as tape: tape.watch(m) @@ -416,6 +426,17 @@ class MicroBenchmarks(test.Benchmark): self._benchmark_defun_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) + def benchmark_matmul_read_variable_op_2_by_2_CPU(self): + with context.device(CPU): + m = resource_variable_ops.ResourceVariable(self._m_2_by_2) + self._benchmark_matmul_read_variable(m, num_iters=self._num_iters_2_by_2) + + def benchmark_matmul_read_variable_op_with_tape_2_by_2_CPU(self): + with context.device(CPU): + m = resource_variable_ops.ResourceVariable(self._m_2_by_2) + self._benchmark_matmul_read_variable_with_tape( + m, num_iters=self._num_iters_2_by_2) + def benchmark_read_variable_op_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index b1b4a6b..32d731d 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -51,6 +51,13 @@ void TFE_Py_Execute(TFE_Context* ctx, const char* device_name, // This function is not thread-safe. PyObject* TFE_Py_RegisterExceptionClass(PyObject* e); +// Registers e as the type of the ResourceVariable class. +// Returns Py_None if registration succeeds, else throws a TypeError and returns +// NULL. +// +// This function is not thread-safe. +PyObject* TFE_Py_RegisterResourceVariableType(PyObject* e); + // Registers e as the Exception to be raised when the conditions of // TFE_Py_FastPathExecute_C have not been met. When this exception is set, it // is a signal to the calling code that it should fall back to the safer (and diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 42d97df..27c9d05 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -38,6 +38,23 @@ using tensorflow::strings::Printf; namespace { +struct FastPathOpExecInfo { + TFE_Context* ctx; + const char* device_name; + // The op def of the main op being executed. + const tensorflow::OpDef* op_def; + + bool run_callbacks; + bool run_post_exec_callbacks; + bool run_gradient_callback; + + // The op name of the main op being executed. + PyObject* name; + // The op type name of the main op being executed. + PyObject* op_name; + PyObject* callbacks; +}; + #define PARSE_VALUE(fn_name, type, check_fn, parse_fn) \ bool fn_name(const string& key, PyObject* py_value, TF_Status* status, \ type* value) { \ @@ -120,6 +137,11 @@ bool ParseTypeValue(const string& key, PyObject* py_value, TF_Status* status, PyObject* py_type_enum = PyObject_GetAttrString(py_value, "_type_enum"); if (py_type_enum == nullptr) { + TF_SetStatus( + status, TF_INVALID_ARGUMENT, + tensorflow::strings::StrCat("Expecting a DType.dtype for attr ", key, + ", got ", py_value->ob_type->tp_name) + .c_str()); return false; } @@ -580,6 +602,8 @@ PyObject* fallback_exception_class = nullptr; // Python function that returns a backward_function. PyObject* backward_function_getter = nullptr; +PyTypeObject* resource_variable_type = nullptr; + tensorflow::mutex _uid_mutex(tensorflow::LINKER_INITIALIZED); tensorflow::int64 _uid GUARDED_BY(_uid_mutex) = 0; @@ -628,11 +652,28 @@ PyObject* TFE_Py_RegisterExceptionClass(PyObject* e) { "TFE_Py_RegisterExceptionClass: " "Registered class should be subclass of Exception."); return nullptr; - } else { - Py_INCREF(e); - exception_class = e; - Py_RETURN_NONE; } + + Py_INCREF(e); + exception_class = e; + Py_RETURN_NONE; +} + +PyObject* TFE_Py_RegisterResourceVariableType(PyObject* e) { + if (!PyType_Check(e)) { + PyErr_SetString( + PyExc_TypeError, + "TFE_Py_RegisterResourceVariableType: Need to register a type."); + return nullptr; + } + + if (resource_variable_type != nullptr) { + Py_DECREF(resource_variable_type); + } + + Py_INCREF(e); + resource_variable_type = reinterpret_cast(e); + Py_RETURN_NONE; } PyObject* TFE_Py_RegisterFallbackExceptionClass(PyObject* e) { @@ -1375,8 +1416,12 @@ PyObject* GetPythonObjectFromString(const char* s) { #endif } -bool CheckEagerTensors(PyObject* seq, int start_index, - const tensorflow::OpDef& op_def) { +bool CheckResourceVariable(PyObject* item) { + return PyObject_TypeCheck(item, resource_variable_type); +} + +bool CheckInputsOk(PyObject* seq, int start_index, + const tensorflow::OpDef& op_def) { for (int i = 0; i < op_def.input_arg_size(); i++) { PyObject* item = PyTuple_GET_ITEM(seq, i + start_index); if (!op_def.input_arg(i).number_attr().empty() || @@ -1384,9 +1429,13 @@ bool CheckEagerTensors(PyObject* seq, int start_index, // This item should be a list input. if (!PyList_Check(item)) return false; for (Py_ssize_t j = 0; j < PyList_Size(item); j++) { - if (!EagerTensor_CheckExact(PyList_GET_ITEM(item, j))) return false; + PyObject* inner_item = PyList_GET_ITEM(item, j); + if (!EagerTensor_CheckExact(inner_item) && + !CheckResourceVariable(inner_item)) { + return false; + } } - } else if (!EagerTensor_CheckExact(item)) { + } else if (!EagerTensor_CheckExact(item) && !CheckResourceVariable(item)) { return false; } } @@ -1394,71 +1443,6 @@ bool CheckEagerTensors(PyObject* seq, int start_index, return true; } -// Adds input and type attr to the op, and to the list of flattened -// inputs/attrs. -bool AddInputToOp(PyObject* input, const tensorflow::OpDef::ArgDef* input_arg, - std::vector* flattened_attrs, - std::vector* flattened_inputs, TFE_Op* op, - TF_Status* status) { - TFE_TensorHandle* input_handle = EagerTensor_Handle(input); - if (input_arg != nullptr && !input_arg->type_attr().empty()) { - auto dtype = TFE_TensorHandleDataType(input_handle); - TFE_OpSetAttrType(op, input_arg->type_attr().data(), dtype); - if (flattened_attrs != nullptr) { - flattened_attrs->push_back( - GetPythonObjectFromString(input_arg->type_attr().data())); - flattened_attrs->push_back(PyLong_FromLong(dtype)); - } - } - - if (flattened_inputs != nullptr) { - flattened_inputs->push_back(input); - } - TFE_OpAddInput(op, input_handle, status); - if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) { - return false; - } - return true; -} - -const tensorflow::OpDef* GetOpDef(PyObject* py_op_name) { - const char* op_name = TFE_GetPythonString(py_op_name); - if (op_name == nullptr) { - PyErr_SetString(PyExc_TypeError, - Printf("expected a string for op_name, got %s instead", - py_op_name->ob_type->tp_name) - .c_str()); - return nullptr; - } - - const tensorflow::OpRegistrationData* op_reg_data = nullptr; - const tensorflow::Status lookup_status = - tensorflow::OpRegistry::Global()->LookUp(op_name, &op_reg_data); - if (MaybeRaiseExceptionFromStatus(lookup_status, nullptr)) { - return nullptr; - } - return &op_reg_data->op_def; -} - -const char* GetDeviceName(PyObject* py_device_name) { - if (py_device_name != Py_None) { - return TFE_GetPythonString(py_device_name); - } - return nullptr; -} - -bool RaiseIfNotPyList(PyObject* list, const string& attr_name) { - if (!PyList_Check(list)) { - PyErr_SetString(PyExc_TypeError, - Printf("expected a list for attr %s, got %s instead", - attr_name.data(), list->ob_type->tp_name) - .data()); - - return false; - } - return true; -} - bool OpDoesntRequireOutput(const string& op_name) { static tensorflow::gtl::FlatSet* ops_that_dont_require_outputs = new tensorflow::gtl::FlatSet({ @@ -1583,7 +1567,6 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, break; } } - if (!should_record) Py_RETURN_NONE; string c_op_name = TFE_GetPythonString(op_name); @@ -1617,50 +1600,212 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, Py_RETURN_NONE; } -bool RunCallbacks(bool run_gradient_callback, bool run_post_exec_callbacks, - const tensorflow::OpDef* op_def, PyObject* args, - const std::vector& flattened_inputs, - const std::vector& flattened_attrs, - PyObject* flattened_result, PyObject* op_name, PyObject* name, - PyObject* callbacks) { - tensorflow::Safe_PyObjectPtr inputs = - tensorflow::make_safe(PyTuple_New(flattened_inputs.size())); +void MaybeWatchVariable(PyObject* input) { + DCHECK(CheckResourceVariable(input)); + DCHECK(PyObject_HasAttrString(input, "_trainable")); + + tensorflow::Safe_PyObjectPtr trainable( + PyObject_GetAttrString(input, "_trainable")); + if (trainable.get() == Py_False) return; + TFE_Py_TapeSetWatchVariable(input); +} + +bool ReadVariableOp(const FastPathOpExecInfo& parent_op_exec_info, + PyObject* input, tensorflow::Safe_PyObjectPtr* output, + TF_Status* status) { + MaybeWatchVariable(input); + + TFE_Op* op = TFE_NewOp(parent_op_exec_info.ctx, "ReadVariableOp", status); + auto cleaner = tensorflow::gtl::MakeCleanup([op] { TFE_DeleteOp(op); }); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + // Set dtype + DCHECK(PyObject_HasAttrString(input, "_dtype")); + tensorflow::Safe_PyObjectPtr dtype(PyObject_GetAttrString(input, "_dtype")); + int value; + if (!ParseTypeValue("_dtype", dtype.get(), status, &value)) { + return false; + } + TFE_OpSetAttrType(op, "dtype", static_cast(value)); + + TFE_OpSetDevice(op, parent_op_exec_info.device_name, status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + // Get handle + tensorflow::Safe_PyObjectPtr handle(PyObject_GetAttrString(input, "_handle")); + if (!EagerTensor_CheckExact(handle.get())) return false; + TFE_OpAddInput(op, EagerTensor_Handle(handle.get()), status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + int num_retvals = 1; + TFE_TensorHandle* output_handle; + TFE_Execute(op, &output_handle, &num_retvals, status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + // Always create the py object (and correctly DECREF it) from the returned + // value, else the data will leak. + output->reset(EagerTensorFromHandle(output_handle)); + + // TODO(nareshmodi): Should we run post exec callbacks here? + if (parent_op_exec_info.run_gradient_callback) { + tensorflow::Safe_PyObjectPtr inputs(PyTuple_New(1)); + PyTuple_SET_ITEM(inputs.get(), 0, handle.release()); + + tensorflow::Safe_PyObjectPtr outputs(PyTuple_New(1)); + Py_INCREF(output->get()); // stay alive after since tuple steals. + PyTuple_SET_ITEM(outputs.get(), 0, output->get()); + + if (!RecordGradient(GetPythonObjectFromString("ReadVariableOp"), + inputs.get(), Py_None, outputs.get(), Py_None)) { + return false; + } + } + + return true; +} + +// Supports only 2 cases at the moment: +// i) input is an EagerTensor +// ii) input is a ResourceVariable - in this case, the is_variable param is set +// to true. +bool ConvertToTensor(const FastPathOpExecInfo& op_exec_info, PyObject* input, + tensorflow::Safe_PyObjectPtr* output_handle, + TF_Status* status) { + if (CheckResourceVariable(input)) { + return ReadVariableOp(op_exec_info, input, output_handle, status); + } + + Py_INCREF(input); + output_handle->reset(input); + + return true; +} + +// Adds input and type attr to the op, and to the list of flattened +// inputs/attrs. +bool AddInputToOp(const FastPathOpExecInfo& op_exec_info, PyObject* input, + const tensorflow::OpDef::ArgDef* input_arg, + std::vector* flattened_attrs, + std::vector* flattened_inputs, + TFE_Op* op, TF_Status* status) { + // py_eager_tensor's ownership is transferred to flattened_inputs if it is + // required, else the object is destroyed and DECREF'd when the object goes + // out of scope in this function. + tensorflow::Safe_PyObjectPtr py_eager_tensor = nullptr; + + if (!ConvertToTensor(op_exec_info, input, &py_eager_tensor, status)) { + return false; + } + + TFE_TensorHandle* input_handle = EagerTensor_Handle(py_eager_tensor.get()); + + if (input_arg != nullptr && !input_arg->type_attr().empty()) { + auto dtype = TFE_TensorHandleDataType(input_handle); + TFE_OpSetAttrType(op, input_arg->type_attr().data(), dtype); + if (flattened_attrs != nullptr) { + flattened_attrs->emplace_back( + GetPythonObjectFromString(input_arg->type_attr().data())); + flattened_attrs->emplace_back(PyLong_FromLong(dtype)); + } + } + + if (flattened_inputs != nullptr) { + flattened_inputs->emplace_back(std::move(py_eager_tensor)); + } + + TFE_OpAddInput(op, input_handle, status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) { + return false; + } + + return true; +} + +const tensorflow::OpDef* GetOpDef(PyObject* py_op_name) { + const char* op_name = TFE_GetPythonString(py_op_name); + if (op_name == nullptr) { + PyErr_SetString(PyExc_TypeError, + Printf("expected a string for op_name, got %s instead", + py_op_name->ob_type->tp_name) + .c_str()); + return nullptr; + } + + const tensorflow::OpRegistrationData* op_reg_data = nullptr; + const tensorflow::Status lookup_status = + tensorflow::OpRegistry::Global()->LookUp(op_name, &op_reg_data); + if (MaybeRaiseExceptionFromStatus(lookup_status, nullptr)) { + return nullptr; + } + return &op_reg_data->op_def; +} + +const char* GetDeviceName(PyObject* py_device_name) { + if (py_device_name != Py_None) { + return TFE_GetPythonString(py_device_name); + } + return nullptr; +} + +bool RaiseIfNotPyList(PyObject* list, const string& attr_name) { + if (!PyList_Check(list)) { + PyErr_SetString(PyExc_TypeError, + Printf("expected a list for attr %s, got %s instead", + attr_name.data(), list->ob_type->tp_name) + .data()); + + return false; + } + return true; +} + +bool RunCallbacks( + const FastPathOpExecInfo& op_exec_info, PyObject* args, + const std::vector& flattened_inputs, + const std::vector& flattened_attrs, + PyObject* flattened_result) { + if (!op_exec_info.run_callbacks) return true; + + tensorflow::Safe_PyObjectPtr inputs(PyTuple_New(flattened_inputs.size())); for (int i = 0; i < flattened_inputs.size(); i++) { - PyObject* input = flattened_inputs[i]; + PyObject* input = flattened_inputs[i].get(); Py_INCREF(input); PyTuple_SET_ITEM(inputs.get(), i, input); } int num_non_inferred_attrs = PyTuple_GET_SIZE(args) - - op_def->input_arg_size() - + op_exec_info.op_def->input_arg_size() - kFastPathExecuteInputStartIndex; int num_attrs = flattened_attrs.size() + num_non_inferred_attrs; - tensorflow::Safe_PyObjectPtr attrs = - tensorflow::make_safe(PyTuple_New(num_attrs)); + tensorflow::Safe_PyObjectPtr attrs(PyTuple_New(num_attrs)); for (int i = 0; i < num_non_inferred_attrs; i++) { - auto* attr = PyTuple_GET_ITEM( - args, kFastPathExecuteInputStartIndex + op_def->input_arg_size() + i); + auto* attr = + PyTuple_GET_ITEM(args, kFastPathExecuteInputStartIndex + + op_exec_info.op_def->input_arg_size() + i); Py_INCREF(attr); PyTuple_SET_ITEM(attrs.get(), i, attr); } for (int i = num_non_inferred_attrs; i < num_attrs; i++) { - // Not INCREFing anything in flattened_attrs as each of those is a new - // reference, so allow the attrs tuple to steal the reference. - PyTuple_SET_ITEM(attrs.get(), i, - flattened_attrs.at(i - num_non_inferred_attrs)); + PyObject* attr_or_name = + flattened_attrs.at(i - num_non_inferred_attrs).get(); + Py_INCREF(attr_or_name); + PyTuple_SET_ITEM(attrs.get(), i, attr_or_name); } - if (run_gradient_callback) { - RecordGradient(op_name, inputs.get(), attrs.get(), flattened_result, name); + if (op_exec_info.run_gradient_callback) { + if (!RecordGradient(op_exec_info.op_name, inputs.get(), attrs.get(), + flattened_result, op_exec_info.name)) { + return false; + } } - if (run_post_exec_callbacks) { - tensorflow::Safe_PyObjectPtr callback_args = tensorflow::make_safe( - Py_BuildValue("OOOOO", op_name, inputs.get(), attrs.get(), - flattened_result, name)); - for (Py_ssize_t i = 0; i < PyList_Size(callbacks); i++) { - PyObject* callback_fn = PyList_GET_ITEM(callbacks, i); + if (op_exec_info.run_post_exec_callbacks) { + tensorflow::Safe_PyObjectPtr callback_args( + Py_BuildValue("OOOOO", op_exec_info.op_name, inputs.get(), attrs.get(), + flattened_result, op_exec_info.name)); + for (Py_ssize_t i = 0; i < PyList_Size(op_exec_info.callbacks); i++) { + PyObject* callback_fn = PyList_GET_ITEM(op_exec_info.callbacks, i); if (!PyCallable_Check(callback_fn)) { PyErr_SetString( PyExc_TypeError, @@ -1695,14 +1840,30 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - TFE_Context* ctx = reinterpret_cast( + FastPathOpExecInfo op_exec_info; + + op_exec_info.ctx = reinterpret_cast( PyCapsule_GetPointer(PyTuple_GET_ITEM(args, 0), nullptr)); - const char* device_name = GetDeviceName(PyTuple_GET_ITEM(args, 1)); - PyObject* op_name = PyTuple_GET_ITEM(args, 2); - const tensorflow::OpDef* op_def = GetOpDef(op_name); - if (op_def == nullptr) return nullptr; - PyObject* name = PyTuple_GET_ITEM(args, 3); - PyObject* callbacks = PyTuple_GET_ITEM(args, 4); + op_exec_info.device_name = GetDeviceName(PyTuple_GET_ITEM(args, 1)); + op_exec_info.op_name = PyTuple_GET_ITEM(args, 2); + op_exec_info.op_def = GetOpDef(op_exec_info.op_name); + if (op_exec_info.op_def == nullptr) return nullptr; + op_exec_info.name = PyTuple_GET_ITEM(args, 3); + op_exec_info.callbacks = PyTuple_GET_ITEM(args, 4); + + const tensorflow::OpDef* op_def = op_exec_info.op_def; + + // TODO(nareshmodi): Add a benchmark for the fast-path with gradient callbacks + // (similar to benchmark_tf_gradient_function_*). Also consider using an + // InlinedVector for flattened_attrs and flattened_inputs if the benchmarks + // point out problems with heap allocs. + op_exec_info.run_gradient_callback = + !*ThreadTapeIsStopped() && !GetTapeSet()->empty(); + op_exec_info.run_post_exec_callbacks = + op_exec_info.callbacks != Py_None && + PyList_Size(op_exec_info.callbacks) > 0; + op_exec_info.run_callbacks = op_exec_info.run_gradient_callback || + op_exec_info.run_post_exec_callbacks; if (args_size < kFastPathExecuteInputStartIndex + op_def->input_arg_size()) { PyErr_SetString( @@ -1715,7 +1876,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - if (!CheckEagerTensors(args, kFastPathExecuteInputStartIndex, *op_def)) { + if (!CheckInputsOk(args, kFastPathExecuteInputStartIndex, *op_def)) { RaiseFallbackException( "This function does not handle the case of the path where " "all inputs are not already EagerTensors."); @@ -1723,7 +1884,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { } TF_Status* status = TF_NewStatus(); - TFE_Op* op = TFE_NewOp(ctx, op_def->name().c_str(), status); + TFE_Op* op = TFE_NewOp(op_exec_info.ctx, op_def->name().c_str(), status); auto cleaner = tensorflow::gtl::MakeCleanup([status, op] { TF_DeleteStatus(status); TFE_DeleteOp(op); @@ -1750,8 +1911,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { // OpRegistrationData. for (const auto& attr : op_def->attr()) { if (attr_name == attr.name()) { - SetOpAttrWithDefaults(ctx, op, attr, attr_name.data(), py_attr_value, - &attr_list_sizes, status); + SetOpAttrWithDefaults(op_exec_info.ctx, op, attr, attr_name.data(), + py_attr_value, &attr_list_sizes, status); if (TF_GetCode(status) != TF_OK) { RaiseFallbackException(TF_Message(status)); @@ -1763,33 +1924,28 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { } } - TFE_OpSetDevice(op, device_name, status); + TFE_OpSetDevice(op, op_exec_info.device_name, status); if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) { return nullptr; } - // TODO(nareshmodi): Add a benchmark for the fast-path with gradient callbacks - // (similar to benchmark_tf_gradient_function_*). Also consider using an - // InlinedVector for flattened_attrs and flattened_inputs if the benchmarks - // point out problems with heap allocs. - bool run_gradient_callback = - !*ThreadTapeIsStopped() && !GetTapeSet()->empty(); - bool run_post_exec_callbacks = - callbacks != Py_None && PyList_Size(callbacks) > 0; - bool run_callbacks = run_gradient_callback || run_post_exec_callbacks; // Flat attrs and inputs as required by the record_gradient call. The attrs // here only contain inferred attrs (non-inferred attrs are added directly // from the input args). - // All items in flattened_attrs contain new references. - // All items in flattened_inputs contain borrowed references. + // All items in flattened_attrs and flattened_inputs contain + // Safe_PyObjectPtr - any time something steals a reference to this, it must + // INCREF. // TODO(nareshmodi): figure out why PyList_New/PyList_Append don't work // directly. - std::unique_ptr> flattened_attrs = nullptr; - std::unique_ptr> flattened_inputs = nullptr; + std::unique_ptr> flattened_attrs = + nullptr; + std::unique_ptr> flattened_inputs = + nullptr; - if (run_callbacks) { - flattened_attrs.reset(new std::vector); - flattened_inputs.reset(new std::vector); + // TODO(nareshmodi): Encapsulate callbacks information into a struct. + if (op_exec_info.run_callbacks) { + flattened_attrs.reset(new std::vector); + flattened_inputs.reset(new std::vector); } // Add inferred attrs and inputs. @@ -1809,16 +1965,16 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { Py_ssize_t len = PyList_Size(input); TFE_OpSetAttrInt(op, input_arg.number_attr().data(), len); - if (run_callbacks) { - flattened_attrs->push_back( + if (op_exec_info.run_callbacks) { + flattened_attrs->emplace_back( GetPythonObjectFromString(input_arg.number_attr().data())); - flattened_attrs->push_back(PyLong_FromLong(len)); + flattened_attrs->emplace_back(PyLong_FromLong(len)); } attr_list_sizes[input_arg.number_attr()] = len; if (len > 0) { // First item adds the type attr. - if (!AddInputToOp(PyList_GET_ITEM(input, 0), &input_arg, + if (!AddInputToOp(op_exec_info, PyList_GET_ITEM(input, 0), &input_arg, flattened_attrs.get(), flattened_inputs.get(), op, status)) { return nullptr; @@ -1826,7 +1982,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { for (Py_ssize_t j = 1; j < len; j++) { // Since the list is homogeneous, we don't need to re-add the attr. - if (!AddInputToOp(PyList_GET_ITEM(input, j), nullptr /* input_arg */, + if (!AddInputToOp(op_exec_info, PyList_GET_ITEM(input, j), + nullptr /* input_arg */, nullptr /* flattened_attrs */, flattened_inputs.get(), op, status)) { return nullptr; @@ -1840,12 +1997,20 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { Py_ssize_t len = PyList_Size(input); tensorflow::gtl::InlinedVector attr_value(len); PyObject* py_attr_value = nullptr; - if (run_callbacks) { + if (op_exec_info.run_callbacks) { py_attr_value = PyTuple_New(len); } for (Py_ssize_t j = 0; j < len; j++) { PyObject* py_input = PyList_GET_ITEM(input, j); - TFE_TensorHandle* input_handle = EagerTensor_Handle(py_input); + tensorflow::Safe_PyObjectPtr py_eager_tensor; + if (!ConvertToTensor(op_exec_info, py_input, &py_eager_tensor, + status)) { + return nullptr; + } + + TFE_TensorHandle* input_handle = + EagerTensor_Handle(py_eager_tensor.get()); + attr_value[j] = TFE_TensorHandleDataType(input_handle); TFE_OpAddInput(op, input_handle, status); @@ -1853,22 +2018,23 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - if (run_callbacks) { - flattened_inputs->push_back(py_input); + if (op_exec_info.run_callbacks) { + flattened_inputs->emplace_back(std::move(py_eager_tensor)); PyTuple_SET_ITEM(py_attr_value, j, PyLong_FromLong(attr_value[j])); } } - if (run_callbacks) { - flattened_attrs->push_back(GetPythonObjectFromString(attr_name.data())); - flattened_attrs->push_back(py_attr_value); + if (op_exec_info.run_callbacks) { + flattened_attrs->emplace_back( + GetPythonObjectFromString(attr_name.data())); + flattened_attrs->emplace_back(py_attr_value); } TFE_OpSetAttrTypeList(op, attr_name.data(), attr_value.data(), attr_value.size()); attr_list_sizes[attr_name] = len; } else { // The item is a single item. - if (!AddInputToOp(input, &input_arg, flattened_attrs.get(), + if (!AddInputToOp(op_exec_info, input, &input_arg, flattened_attrs.get(), flattened_inputs.get(), op, status)) { return nullptr; } @@ -1892,12 +2058,14 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { Py_BEGIN_ALLOW_THREADS; TFE_Execute(op, retvals.data(), &num_retvals, status); Py_END_ALLOW_THREADS; + if (TF_GetCode(status) != TF_OK) { // Augment the status with the op_name for easier debugging similar to // TFE_Py_Execute. TF_SetStatus(status, TF_GetCode(status), - tensorflow::strings::StrCat(TF_Message(status), " [Op:", - TFE_GetPythonString(op_name), "]") + tensorflow::strings::StrCat( + TF_Message(status), + " [Op:", TFE_GetPythonString(op_exec_info.op_name), "]") .c_str()); MaybeRaiseExceptionFromTFStatus(status, nullptr); @@ -1909,10 +2077,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { PyList_SET_ITEM(flat_result, i, EagerTensorFromHandle(retvals[i])); } - if (run_callbacks && - !RunCallbacks(run_gradient_callback, run_post_exec_callbacks, op_def, - args, *flattened_inputs, *flattened_attrs, flat_result, - op_name, name, callbacks)) { + if (!RunCallbacks(op_exec_info, args, *flattened_inputs, *flattened_attrs, + flat_result)) { return nullptr; } diff --git a/tensorflow/python/eager/pywrap_tfe_test.py b/tensorflow/python/eager/pywrap_tfe_test.py index 418ed75..46c5601 100644 --- a/tensorflow/python/eager/pywrap_tfe_test.py +++ b/tensorflow/python/eager/pywrap_tfe_test.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import resource_variable_ops class Tests(test.TestCase): @@ -55,6 +56,21 @@ class Tests(test.TestCase): @test_util.assert_no_new_tensors @test_util.assert_no_garbage_created + def testFastpathExecute_ResourceVariableMatMulCorrectResponse(self): + ctx = context.context() + a_2_by_2 = constant_op.constant(1.0, shape=[2, 2]) + m = resource_variable_ops.ResourceVariable(a_2_by_2) + x = pywrap_tensorflow.TFE_Py_FastPathExecute( + ctx._handle, ctx.device_name, "MatMul", None, None, m, m, "transpose_a", + False, "transpose_b", False) + y = pywrap_tensorflow.TFE_Py_FastPathExecute( + ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2, a_2_by_2, + "transpose_a", False, "transpose_b", False) + + self.assertAllEqual(x, y) + + @test_util.assert_no_new_tensors + @test_util.assert_no_garbage_created def testFastpathExecute_TapeWrite(self): ctx = context.context() with backprop.GradientTape(persistent=True) as tape: @@ -67,6 +83,21 @@ class Tests(test.TestCase): self.assertAllEqual(dz_dy.numpy(), constant_op.constant(4.0, shape=[2, 2]).numpy()) + @test_util.assert_no_new_tensors + @test_util.assert_no_garbage_created + def testFastpathExecute_ResourceVariableTapeWrite(self): + ctx = context.context() + with backprop.GradientTape(persistent=True) as tape: + a_2_by_2 = constant_op.constant(1.0, shape=[2, 2]) + m = resource_variable_ops.ResourceVariable(a_2_by_2) + tape.watch(m) + z = pywrap_tensorflow.TFE_Py_FastPathExecute( + ctx._handle, ctx.device_name, "MatMul", None, None, m, m, + "transpose_a", False, "transpose_b", False) + dz_dy = tape.gradient(z, [m])[0] + self.assertAllEqual(dz_dy.numpy(), + constant_op.constant(4.0, shape=[2, 2]).numpy()) + # Tests homogeneous list op @test_util.assert_no_new_tensors @test_util.assert_no_garbage_created diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index cbac3c6..6c5d692 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import variable_pb2 +from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import tape from tensorflow.python.framework import dtypes @@ -932,6 +933,9 @@ class ResourceVariable(variables.Variable): "Tensor object.") +pywrap_tensorflow.TFE_Py_RegisterResourceVariableType(ResourceVariable) + + def _dense_var_to_tensor(var, dtype=None, name=None, as_ref=False): return var._dense_var_to_tensor(dtype=dtype, name=name, as_ref=as_ref) # pylint: disable=protected-access diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 7ab0db5..b481ddf 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -31,6 +31,7 @@ limitations under the License. %rename("%s") TFE_Py_RegisterExceptionClass; %rename("%s") TFE_Py_RegisterBackwardFunctionGetter; %rename("%s") TFE_Py_RegisterFallbackExceptionClass; +%rename("%s") TFE_Py_RegisterResourceVariableType; %rename("%s") TFE_Py_Execute; %rename("%s") TFE_Py_FastPathExecute; %rename("%s") TFE_Py_RecordGradient; -- 2.7.4