From: Guangda Lai Date: Fri, 9 Feb 2018 20:20:38 +0000 (-0800) Subject: Split gpu_id.h and GpuIdManager out from build target //tensorflow/core:gpu_runtime... X-Git-Tag: upstream/v1.7.0~31^2~829 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=df982b8dea49eba273e33e4283c3b14eab171b04;p=platform%2Fupstream%2Ftensorflow.git Split gpu_id.h and GpuIdManager out from build target //tensorflow/core:gpu_runtime, to reduce the size of dependencies, so when other lightweight libraries like grappler utils needs the TfToCudaGpuId translation function it doesn't need to depend on things like stream executor and cuda libraries. PiperOrigin-RevId: 185175757 --- diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index a7f8533..d1fb9f4 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2255,12 +2255,23 @@ tf_cuda_library( ] + tf_additional_device_tracer_deps(), ) +cc_library( + name = "gpu_id", + srcs = ["common_runtime/gpu/gpu_id_manager.cc"], + hdrs = [ + "common_runtime/gpu/gpu_id.h", + "common_runtime/gpu/gpu_id_manager.h", + ], + deps = [ + ":lib", + ], +) + GPU_RUNTIME_HEADERS = [ "common_runtime/gpu/gpu_bfc_allocator.h", "common_runtime/gpu/gpu_cudamalloc_allocator.h", "common_runtime/gpu/gpu_debug_allocator.h", "common_runtime/gpu/gpu_device.h", - "common_runtime/gpu/gpu_id.h", "common_runtime/gpu/gpu_id_utils.h", "common_runtime/gpu/gpu_init.h", "common_runtime/gpu/gpu_managed_allocator.h", @@ -2279,7 +2290,6 @@ tf_cuda_library( "common_runtime/gpu/gpu_debug_allocator.cc", "common_runtime/gpu/gpu_device.cc", "common_runtime/gpu/gpu_device_factory.cc", - "common_runtime/gpu/gpu_id_utils.cc", "common_runtime/gpu/gpu_managed_allocator.cc", "common_runtime/gpu/gpu_stream_util.cc", "common_runtime/gpu/gpu_util.cc", @@ -2294,6 +2304,7 @@ tf_cuda_library( ":core_cpu_lib", ":framework", ":framework_internal", + ":gpu_id", ":gpu_init_impl", ":gpu_lib", ":graph", @@ -2883,6 +2894,7 @@ tf_cc_tests_gpu( linkstatic = tf_kernel_tests_linkstatic(), deps = [ ":gpu_headers_lib", + ":gpu_id", ":gpu_runtime", ":test", ], @@ -2894,7 +2906,7 @@ tf_cc_tests_gpu( srcs = glob(["user_ops/**/*_test.cc"]) + [ "common_runtime/gpu/gpu_bfc_allocator_test.cc", "common_runtime/gpu/gpu_device_test.cc", - "common_runtime/gpu/gpu_id_utils_test.cc", + "common_runtime/gpu/gpu_id_manager_test.cc", "common_runtime/gpu/gpu_event_mgr_test.cc", "common_runtime/gpu/pool_allocator_test.cc", ], @@ -2906,6 +2918,7 @@ tf_cc_tests_gpu( ":direct_session", ":framework", ":framework_internal", + ":gpu_id", ":gpu_runtime", ":lib", ":lib_internal", @@ -3301,6 +3314,7 @@ tf_cc_test_gpu( ":direct_session", ":framework", ":framework_internal", + ":gpu_id", ":gpu_runtime", ":lib", ":lib_internal", diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index a9485a8..0fb908b 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" #include "tensorflow/core/common_runtime/gpu/gpu_init.h" #include "tensorflow/core/common_runtime/gpu/gpu_stream_util.h" @@ -99,7 +100,7 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface { reinterpret_cast(scratch + Eigen::kCudaScratchSize); stream_ = cuda_stream; allocator_ = alloc; - const int cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id).value(); + const int cuda_gpu_id = GpuIdManager::TfToCudaGpuId(tf_gpu_id).value(); device_prop_ = &Eigen::m_deviceProperties[cuda_gpu_id]; } @@ -311,7 +312,7 @@ Status BaseGPUDevice::Init(const SessionOptions& options) { gpu_device_info_->stream = streams_[0]->compute; gpu_device_info_->default_context = device_contexts_[0]; gpu_device_info_->event_mgr = em_.get(); - gpu_device_info_->gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id_).value(); + gpu_device_info_->gpu_id = GpuIdManager::TfToCudaGpuId(tf_gpu_id_).value(); set_tensorflow_gpu_device_info(gpu_device_info_); // Whether and how the GPU device uses its own threadpool. @@ -955,7 +956,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, while (next_tf_gpu_id < memory_limit_bytes.size()) { TfGpuId tf_gpu_id(next_tf_gpu_id); ++next_tf_gpu_id; - GpuIdUtil::InsertTfCudaGpuIdPair(tf_gpu_id, cuda_gpu_id); + GpuIdManager::InsertTfCudaGpuIdPair(tf_gpu_id, cuda_gpu_id); } } const int num_tf_gpus = next_tf_gpu_id; @@ -1006,7 +1007,7 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options, const string device_name = strings::StrCat(name_prefix, "/device:GPU:", tf_gpu_id.value()); GpuIdUtil::CheckValidTfGpuId(tf_gpu_id); - CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id); + CudaGpuId cuda_gpu_id = GpuIdManager::TfToCudaGpuId(tf_gpu_id); int numa_node = dev_locality.numa_node(); Bytes allocated_bytes = static_cast(memory_limit); @@ -1078,7 +1079,7 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( all_tf_gpu_ids.push_back(TfGpuId(i)); } for (TfGpuId tf_gpu_id : all_tf_gpu_ids) { - CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id); + CudaGpuId cuda_gpu_id = GpuIdManager::TfToCudaGpuId(tf_gpu_id); // Get GPU bus_id from its reported NUMA affinity. Because GPUs are // virtualized in some environments, we can't just use the GPU id. // NUMA locales are indexed from 0, buses are indexed from 1. @@ -1106,7 +1107,7 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( LocalLinks* links = dev_locality.mutable_links(); for (const InterconnectMap& imap : interconnects) { for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) { - CudaGpuId cuda_gpu_dst = GpuIdUtil::TfToCudaGpuId(tf_gpu_dst); + CudaGpuId cuda_gpu_dst = GpuIdManager::TfToCudaGpuId(tf_gpu_dst); if (imap.directed_links.find({cuda_gpu_id, cuda_gpu_dst}) != imap.directed_links.end()) { InterconnectLink* ilink = links->add_link(); @@ -1121,7 +1122,7 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( // add high strength links to the others. for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) { if (tf_gpu_id == tf_gpu_dst) continue; - CudaGpuId cuda_gpu_dst = GpuIdUtil::TfToCudaGpuId(tf_gpu_dst); + CudaGpuId cuda_gpu_dst = GpuIdManager::TfToCudaGpuId(tf_gpu_dst); if (cuda_gpu_id == cuda_gpu_dst) { InterconnectLink* ilink = links->add_link(); ilink->set_device_id(tf_gpu_dst.value()); diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 82ce3a2..c88daa8 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" #include "tensorflow/core/common_runtime/gpu_device_context.h" #include "tensorflow/core/common_runtime/local_device.h" @@ -88,7 +89,7 @@ class BaseGPUDevice : public LocalDevice { // Returns the CUDA GPU id of this device within the native driver system; // e.g., for CUDA this is the ordinal of the GPU within the system. - int gpu_id() const { return GpuIdUtil::TfToCudaGpuId(tf_gpu_id_).value(); } + int gpu_id() const { return GpuIdManager::TfToCudaGpuId(tf_gpu_id_).value(); } // The executor that provides control for the device; e.g., for CUDA this // corresponds to the cuda context. diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc similarity index 79% rename from tensorflow/core/common_runtime/gpu/gpu_id_utils.cc rename to tensorflow/core/common_runtime/gpu/gpu_id_manager.cc index 92cd194..207afdc 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_utils.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include @@ -24,10 +24,10 @@ limitations under the License. namespace tensorflow { namespace { // Manages the map between TfGpuId and CUDA GPU id. -class GpuIdManager { +class TfToCudaGpuIdMap { public: - static GpuIdManager* singleton() { - static auto* manager = new GpuIdManager; + static TfToCudaGpuIdMap* singleton() { + static auto* manager = new TfToCudaGpuIdMap; return manager; } @@ -62,13 +62,13 @@ class GpuIdManager { }; } // namespace -void GpuIdUtil::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, - CudaGpuId cuda_gpu_id) { - GpuIdManager::singleton()->InsertOrDie(tf_gpu_id, cuda_gpu_id); +void GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, + CudaGpuId cuda_gpu_id) { + TfToCudaGpuIdMap::singleton()->InsertOrDie(tf_gpu_id, cuda_gpu_id); } -CudaGpuId GpuIdUtil::TfToCudaGpuId(TfGpuId tf_gpu_id) { - return CudaGpuId(GpuIdManager::singleton()->FindOrDie(tf_gpu_id)); +CudaGpuId GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id) { + return CudaGpuId(TfToCudaGpuIdMap::singleton()->FindOrDie(tf_gpu_id)); } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h new file mode 100644 index 0000000..33925d8 --- /dev/null +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h @@ -0,0 +1,33 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_MANAGER_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_MANAGER_H_ + +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" + +namespace tensorflow { + +// Class that manages the translation between Tensorflow GPU ids and CUDA GPU +// ids. +class GpuIdManager { + public: + static void InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id); + static CudaGpuId TfToCudaGpuId(TfGpuId tf_gpu_id); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_MANAGER_H_ diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc similarity index 67% rename from tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc rename to tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc index bebe00a..bdbd8d0 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/common_runtime/gpu/gpu_id.h" #include "tensorflow/core/platform/test.h" @@ -21,33 +21,33 @@ limitations under the License. namespace tensorflow { namespace test { -TEST(GpuIdTest, Basics) { +TEST(GpuIdManagerTest, Basics) { TfGpuId key_0(0); CudaGpuId value_0(0); - GpuIdUtil::InsertTfCudaGpuIdPair(key_0, value_0); - EXPECT_EQ(value_0, GpuIdUtil::TfToCudaGpuId(key_0)); + GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0); + EXPECT_EQ(value_0, GpuIdManager::TfToCudaGpuId(key_0)); // Multiple calls to map the same value is ok. - GpuIdUtil::InsertTfCudaGpuIdPair(key_0, value_0); - EXPECT_EQ(value_0, GpuIdUtil::TfToCudaGpuId(key_0)); + GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0); + EXPECT_EQ(value_0, GpuIdManager::TfToCudaGpuId(key_0)); // Map a different TfGpuId to a different value. TfGpuId key_1(3); CudaGpuId value_1(2); - GpuIdUtil::InsertTfCudaGpuIdPair(key_1, value_1); - EXPECT_EQ(value_1, GpuIdUtil::TfToCudaGpuId(key_1)); + GpuIdManager::InsertTfCudaGpuIdPair(key_1, value_1); + EXPECT_EQ(value_1, GpuIdManager::TfToCudaGpuId(key_1)); // Mapping a different TfGpuId to the same value is ok. TfGpuId key_2(10); - GpuIdUtil::InsertTfCudaGpuIdPair(key_2, value_1); - EXPECT_EQ(value_1, GpuIdUtil::TfToCudaGpuId(key_2)); + GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_1); + EXPECT_EQ(value_1, GpuIdManager::TfToCudaGpuId(key_2)); // Mapping the same TfGpuId to a different value will crash the program. - ASSERT_DEATH(GpuIdUtil::InsertTfCudaGpuIdPair(key_2, value_0), + ASSERT_DEATH(GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_0), "Mapping the same TfGpuId to a different CUDA GPU id"); // Getting an nonexistent mapping will crash the program. - ASSERT_DEATH(GpuIdUtil::TfToCudaGpuId(TfGpuId(100)), + ASSERT_DEATH(GpuIdManager::TfToCudaGpuId(TfGpuId(100)), "Could not find the mapping for TfGpuId"); } diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h index 6d196b1..2e90687 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_UTILS_H_ #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/common_runtime/gpu/gpu_init.h" #include "tensorflow/core/lib/gtl/int_type.h" #include "tensorflow/core/platform/stream_executor.h" @@ -27,9 +28,6 @@ namespace gpu = ::perftools::gputools; // Utility methods for translation between Tensorflow GPU ids and CUDA GPU ids. class GpuIdUtil { public: - static void InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id); - static CudaGpuId TfToCudaGpuId(TfGpuId tf_gpu_id); - // Convenient methods for getting the associated executor given a TfGpuId or // CudaGpuId. static gpu::port::StatusOr ExecutorForCudaGpuId( @@ -42,12 +40,12 @@ class GpuIdUtil { } static gpu::port::StatusOr ExecutorForTfGpuId( TfGpuId tf_gpu_id) { - return ExecutorForCudaGpuId(GpuIdUtil::TfToCudaGpuId(tf_gpu_id)); + return ExecutorForCudaGpuId(GpuIdManager::TfToCudaGpuId(tf_gpu_id)); } // Verify that the cuda_gpu_id associated with a TfGpuId is legitimate. static void CheckValidTfGpuId(TfGpuId tf_gpu_id) { - const CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id); + const CudaGpuId cuda_gpu_id = GpuIdManager::TfToCudaGpuId(tf_gpu_id); const int visible_device_count = GPUMachineManager()->VisibleDeviceCount(); CHECK_LT(cuda_gpu_id.value(), visible_device_count) << "cuda_gpu_id is outside discovered device range." diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc index b195de7..61013bd 100644 --- a/tensorflow/core/common_runtime/gpu/process_state.cc +++ b/tensorflow/core/common_runtime/gpu/process_state.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h" #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h" #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" #include "tensorflow/core/common_runtime/gpu/gpu_init.h" #include "tensorflow/core/common_runtime/gpu/pool_allocator.h" @@ -124,7 +125,7 @@ Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options, return nullptr; } - const CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id); + const CudaGpuId cuda_gpu_id = GpuIdManager::TfToCudaGpuId(tf_gpu_id); gpu_allocator = new GPUBFCAllocator(cuda_gpu_id, total_bytes, options, strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc"));