tensorflow::Tensor tensor;
status->status = tensorflow::TF_TensorToTensor(t, &tensor);
if (!status->status.ok()) return nullptr;
- return new TFE_TensorHandle(tensor, nullptr);
+ return new TFE_TensorHandle(tensor, nullptr, nullptr);
}
void TFE_DeleteTensorHandle(TFE_TensorHandle* h) { delete h; }
// has device type XLA_CPU, and the other CPU.
const bool both_on_cpu = src_cpu && dst_cpu;
if (is_same_device || both_on_cpu) {
- return new TFE_TensorHandle(h->t, dst_cpu ? nullptr : dstd);
+ dstd = dst_cpu ? nullptr : dstd;
+ return new TFE_TensorHandle(h->t, dstd, dstd);
}
tensorflow::Tensor* src = &(h->t);
if (!dst_cpu && (src->dtype() != tensorflow::DT_VARIANT &&
}
tensorflow::Tensor dst(dstd->GetAllocator(attr), src->dtype(), src->shape());
if (src->shape().num_elements() == 0) {
- return new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd);
+ dstd = dst_cpu ? nullptr : dstd;
+ return new TFE_TensorHandle(dst, dstd, dstd);
}
tensorflow::DeviceContext* src_device_context = nullptr;
if (!src_cpu) {
});
n.WaitForNotification();
return (TF_GetCode(status) == TF_OK)
- ? new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd)
+ ? new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd,
+ dst_cpu ? nullptr : dstd)
: nullptr;
}
if (!status->status.ok()) return;
op->inputs.push_back(h->t);
op->input_devices.push_back(h->d);
+ op->input_op_devices.push_back(h->op_device);
op->attrs.NumInputs(op->inputs.size());
}
}
// We are only here if the policy is warn or silent copies, so we should
// trigger a copy.
- TFE_TensorHandle original{op->inputs[i], op->input_devices[i]};
+ TFE_TensorHandle original{op->inputs[i], op->input_devices[i],
+ op->device};
TF_Status* s = TF_NewStatus();
TFE_TensorHandle* copied_tensor = TFE_TensorHandleCopyToDevice(
&original, ctx, expected_device->name().c_str(), s);
// via `op_input_to_func_input`, adjust the actual inputs accordingly.
launch_op->inputs = op->inputs;
launch_op->input_devices = op->input_devices;
+ launch_op->input_op_devices = op->input_op_devices;
if (!op_input_to_func_input.empty()) {
DCHECK_EQ(op->inputs.size(), op_input_to_func_input.size());
if (!op->input_devices.empty()) {
op = xla_launch_op.get();
}
#endif // TENSORFLOW_EAGER_USE_XLA
-
TFE_Context* ctx = op->ctx;
tensorflow::Device* device = op->device;
+ // Ensure all resource-touching ops run in the device the resource is,
+ // regardless of anything else that has been specified. This is identical to
+ // the graph mode behavior.
+ for (int i = 0; i < op->inputs.size(); ++i) {
+ if (op->inputs[i].dtype() == tensorflow::DT_RESOURCE &&
+ op->input_op_devices[i] != device) {
+ tensorflow::Device* d = op->input_op_devices[i] == nullptr
+ ? ctx->devices()[0]
+ : op->input_op_devices[i];
+ VLOG(1) << "Changing device of operation " << op->name << " to "
+ << d->name() << " because input #" << i
+ << " is a resource in this device.";
+ device = d;
+ op->device = d;
+ }
+ }
if (!ctx->soft_placement && device == nullptr) {
// TODO(ashankar): ASSUMPTION: ctx->devices()[0] is always CPU
device = ctx->devices()[0];
(*output_memory_types)[i] == tensorflow::HOST_MEMORY) {
d = nullptr;
}
- retvals[i] = new TFE_TensorHandle(outputs[i], d);
+ retvals[i] = new TFE_TensorHandle(outputs[i], d, device);
}
}
} // extern "C"
TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t) {
- return new TFE_TensorHandle(t, nullptr);
+ return new TFE_TensorHandle(t, nullptr, nullptr);
}
const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory(
};
struct TFE_TensorHandle {
- TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d)
- : t(t), d(d) {}
+ TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d,
+ tensorflow::Device* op_device)
+ : t(t), d(d), op_device(op_device) {}
tensorflow::Tensor t;
// TODO(ashankar): d == nullptr iff local CPU
// TODO(ashankar): Reference count TFE_Context to ensure that 'd' of a
// TFE_TensorHandle does not outlive the TFE_Context from which it came?
tensorflow::Device* d;
+
+ // Device in which the op producing this tensor was executed. Equals to d for
+ // constant tensors.
+ tensorflow::Device* op_device;
};
struct TFE_Op {
const tensorflow::AttrTypeMap* attr_types;
std::vector<tensorflow::Tensor> inputs;
std::vector<tensorflow::Device*> input_devices;
+ std::vector<tensorflow::Device*> input_op_devices;
tensorflow::Device* device;
bool use_xla = false;
};
from tensorflow.python.framework import ops
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_resource_variable_ops
from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import resource_variable_ops
def execute(op_name, num_outputs, inputs, attrs=None):
attrs=('T', x.dtype.as_datatype_enum))[0].cpu().numpy()
self.assertEqual(3, result)
+ def testResourceTensorPlacement(self):
+ if not context.context().num_gpus():
+ self.skipTest('No GPUs found')
+
+ with context.device('gpu:0'):
+ v = resource_variable_ops.ResourceVariable(1.0)
+ with context.device('cpu:0'):
+ # Check that even though we specified the cpu device we'll run the read op
+ # in the device where the handle is.
+ self.assertAllEqual(
+ gen_resource_variable_ops.read_variable_op(v.handle, v.dtype), 1.0)
+
def testCopyBetweenDevices(self):
if not context.context().num_gpus():
self.skipTest('No GPUs found')