From 0fab6e888c5f90de3e878566123c1906261ce27e Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Fri, 2 Feb 2018 19:10:39 -0800 Subject: [PATCH] Extended TFE_OpSetDevice() with the ability to set an op device from non-GPU back to GPU. Added unit testing, and also refined unit test logic for checking the presence of a GPU device. The latter is needed when we add XLA device support. PiperOrigin-RevId: 184366172 --- tensorflow/c/eager/c_api.cc | 26 +++++++----- tensorflow/c/eager/c_api.h | 3 ++ tensorflow/c/eager/c_api_test.cc | 92 ++++++++++++++++++++++++++++------------ 3 files changed, 84 insertions(+), 37 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index d5b9bff..d65b592 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -253,15 +253,6 @@ TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, void TFE_DeleteOp(TFE_Op* op) { delete op; } -static void TFE_OpSetDeviceHelper(TFE_Op* op, tensorflow::Device* device, - TF_Status* status) { - // Questionable heuristic: Place the op on the same device as the first input - // placed outside of host memory? - if (IsCPU(op->device) && !IsCPU(device)) { - op->device = device; - } -} - void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { tensorflow::Device* d = nullptr; if (device_name != nullptr && strlen(device_name) > 0) { @@ -269,11 +260,24 @@ void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { op->ctx->session->device_mgr->LookupDevice(device_name, &d); if (!status->status.ok()) return; } - TFE_OpSetDeviceHelper(op, d, status); + op->device = d; +} + +const char* TFE_OpGetDevice(TFE_Op* op, TF_Status* status) { + tensorflow::Device* device = + (op->device == nullptr) ? op->ctx->devices()[0] : op->device; + return device->name().c_str(); } void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { - TFE_OpSetDeviceHelper(op, h->d, status); + // Questionable heuristic ... + // + // Motivation: After an 'op' is placed on GPU because some of its earlier + // inputs are on GPU, we want to keep the 'op' there, even if some later + // inputs of it are not on GPU. + if (IsCPU(op->device) && !IsCPU(h->d)) { + op->device = h->d; + } if (!status->status.ok()) return; op->inputs.push_back(h->t); op->input_devices.push_back(h->d); diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 387de07..6a2aff1 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -154,6 +154,9 @@ TF_CAPI_EXPORT extern void TFE_DeleteOp(TFE_Op* op); TF_CAPI_EXPORT extern void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status); +// The returned string remains valid throughout the lifetime of 'op'. +TF_CAPI_EXPORT extern const char* TFE_OpGetDevice(TFE_Op* op, + TF_Status* status); TF_CAPI_EXPORT extern void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status); diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 18e7a64..b0409af 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -60,6 +60,31 @@ TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) { return op; } +// If there is a GPU device, returns true and sets 'gpu_device_name' +// accordingly. +bool GetGPUDeviceName(TFE_Context* ctx, string* gpu_device_name) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + TF_DeviceList* devices = TFE_ContextListDevices(ctx, status.get()); + CHECK_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + const int num_devices = TF_DeviceListCount(devices); + for (int i = 0; i < num_devices; ++i) { + const string device_type(TF_DeviceListType(devices, i, status.get())); + CHECK_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get()); + const string device_name(TF_DeviceListName(devices, i, status.get())); + CHECK_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get()); + if (device_type == "GPU") { + *gpu_device_name = device_name; + LOG(INFO) << "Found GPU device " << device_name; + TF_DeleteDeviceList(devices); + return true; + } + } + TF_DeleteDeviceList(devices); + return false; +} + void BM_InitOp(int iters) { tensorflow::testing::StopTiming(); TF_Status* status = TF_NewStatus(); @@ -288,22 +313,15 @@ TEST(CAPI, TensorHandleSilentCopy) { TF_Tensor* t = TFE_TensorHandleResolve(hcpu, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - TF_DeviceList* devices = TFE_ContextListDevices(ctx, status.get()); - ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - const int num_devices = TF_DeviceListCount(devices); - // Disable the test if no GPU is present. - if (num_devices > 1) { - const int device_to_use = 1; - const string name(TF_DeviceListName(devices, device_to_use, status.get())); - ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); - - TFE_TensorHandle* hgpu = - TFE_TensorHandleCopyToDevice(hcpu, ctx, name.c_str(), status.get()); + string gpu_device_name; + if (GetGPUDeviceName(ctx, &gpu_device_name)) { + TFE_TensorHandle* hgpu = TFE_TensorHandleCopyToDevice( + hcpu, ctx, gpu_device_name.c_str(), status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); TFE_Op* matmul = MatMulOp(ctx, hcpu, hgpu); - TFE_OpSetDevice(matmul, name.c_str(), status.get()); + TFE_OpSetDevice(matmul, gpu_device_name.c_str(), status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); TFE_TensorHandle* retvals[1]; int num_retvals = 1; @@ -314,7 +332,6 @@ TEST(CAPI, TensorHandleSilentCopy) { TFE_DeleteTensorHandle(hgpu); } - TF_DeleteDeviceList(devices); TF_DeleteTensor(t); TFE_DeleteTensorHandle(hcpu); TFE_DeleteContext(ctx, status.get()); @@ -337,22 +354,15 @@ TEST(CAPI, TensorHandleSilentCopyLocal) { TF_Tensor* t = TFE_TensorHandleResolve(hcpu, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - TF_DeviceList* devices = TFE_ContextListDevices(ctx, status.get()); - ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - const int num_devices = TF_DeviceListCount(devices); - // Disable the test if no GPU is present. - if (num_devices > 1) { - const int device_to_use = 1; - const string name(TF_DeviceListName(devices, device_to_use, status.get())); - ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); - - TFE_TensorHandle* hgpu = - TFE_TensorHandleCopyToDevice(hcpu, ctx, name.c_str(), status.get()); + string gpu_device_name; + if (GetGPUDeviceName(ctx, &gpu_device_name)) { + TFE_TensorHandle* hgpu = TFE_TensorHandleCopyToDevice( + hcpu, ctx, gpu_device_name.c_str(), status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); TFE_Op* matmul = MatMulOp(ctx, hcpu, hgpu); - TFE_OpSetDevice(matmul, name.c_str(), status.get()); + TFE_OpSetDevice(matmul, gpu_device_name.c_str(), status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); TFE_TensorHandle* retvals[1]; int num_retvals = 1; @@ -363,13 +373,43 @@ TEST(CAPI, TensorHandleSilentCopyLocal) { TFE_DeleteTensorHandle(hgpu); } - TF_DeleteDeviceList(devices); TF_DeleteTensor(t); TFE_DeleteTensorHandle(hcpu); TFE_DeleteContext(ctx, status.get()); EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); } +TEST(CAPI, SetAndGetOpDevices) { + TF_Status* status = TF_NewStatus(); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_Context* ctx = TFE_NewContext(opts, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_DeleteContextOptions(opts); + + TFE_TensorHandle* m = TestMatrixTensorHandle(); + TFE_Op* matmul = MatMulOp(ctx, m, m); + + // Disable the test if no GPU is present. + string gpu_device_name; + if (GetGPUDeviceName(ctx, &gpu_device_name)) { + TFE_OpSetDevice(matmul, "GPU:0", status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + const char* device_name = TFE_OpGetDevice(matmul, status); + ASSERT_TRUE(strstr(device_name, "GPU:0") != nullptr); + + TFE_OpSetDevice(matmul, "CPU:0", status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + device_name = TFE_OpGetDevice(matmul, status); + ASSERT_TRUE(strstr(device_name, "CPU:0") != nullptr); + } + + TFE_DeleteOp(matmul); + TFE_DeleteTensorHandle(m); + TFE_DeleteContext(ctx, status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteStatus(status); +} + TEST(CAPI, Execute) { TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); -- 2.7.4