From 2e707494c4b1058e1186c67b1030f635bdf52dac Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Thu, 22 Feb 2018 12:24:22 -0800
Subject: [PATCH] Fix BaseGPUDevice, let it report the actual memory limit of
 the allocator. Also added a helper method to reset ProcessState.

PiperOrigin-RevId: 186655996
---
 tensorflow/core/common_runtime/gpu/gpu_device.cc | 29 +++++++++++++++++-------
 1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 15ff15f..8357cc5 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -1013,21 +1013,34 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
   GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
   CudaGpuId cuda_gpu_id = GpuIdManager::TfToCudaGpuId(tf_gpu_id);
   int numa_node = dev_locality.numa_node();
-  Bytes allocated_bytes = static_cast<Bytes>(memory_limit);
 
   gpu::StreamExecutor* se =
       GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
   const gpu::DeviceDescription& desc = se->GetDeviceDescription();
-  LOG(INFO) << "Creating TensorFlow device (" << device_name << " with "
-            << (memory_limit >> 20) << " MB memory) -> physical GPU ("
-            << GetShortDeviceDescription(cuda_gpu_id, desc) << ")";
   ProcessState* process_state = ProcessState::singleton();
+  Allocator* gpu_allocator = process_state->GetGPUAllocator(
+      options.config.gpu_options(), tf_gpu_id, memory_limit);
+  if (gpu_allocator == nullptr) {
+    return errors::Internal("Failed to get memory allocator for TF GPU ",
+                            tf_gpu_id.value(), " with ", memory_limit,
+                            " bytes of memory.");
+  }
+  AllocatorStats stats;
+  gpu_allocator->GetStats(&stats);
+  // 'memory_limit' is the required memory size, but if the allocator with given
+  // tf_gpu_id was created before, we'll use it instead of creating a new one
+  // (as TF gpu device is a shared resource), in which case the actual memory
+  // limit represented by 'stats.bytes_limit' used by that allocator may be
+  // different (which should be an error).
+  //
+  // TODO(laigd): report error if memory_limit doesn't match stats.bytes_limit.
   BaseGPUDevice* gpu_device = CreateGPUDevice(
-      options, device_name, allocated_bytes, dev_locality, tf_gpu_id,
-      GetShortDeviceDescription(cuda_gpu_id, desc),
-      process_state->GetGPUAllocator(options.config.gpu_options(), tf_gpu_id,
-                                     memory_limit),
+      options, device_name, static_cast<Bytes>(stats.bytes_limit), dev_locality,
+      tf_gpu_id, GetShortDeviceDescription(cuda_gpu_id, desc), gpu_allocator,
       process_state->GetCPUAllocator(numa_node));
+  LOG(INFO) << "Created TensorFlow device (" << device_name << " with "
+            << (stats.bytes_limit >> 20) << " MB memory) -> physical GPU ("
+            << GetShortDeviceDescription(cuda_gpu_id, desc) << ")";
   TF_RETURN_IF_ERROR(gpu_device->Init(options));
   devices->push_back(gpu_device);
 
-- 
2.7.4