From 6583044d980686c04a20085098b335c98618d106 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Feb 2018 14:14:42 -0800 Subject: [PATCH] Make CPUAllocator VisitableAllocator, for better RDMA networking. PiperOrigin-RevId: 186518037 --- tensorflow/core/BUILD | 3 +- tensorflow/core/common_runtime/bfc_allocator.h | 2 +- .../common_runtime/gpu/gpu_cudamalloc_allocator.h | 2 +- .../core/common_runtime/gpu/gpu_debug_allocator.h | 2 +- .../core/common_runtime/gpu/pool_allocator.h | 2 +- tensorflow/core/common_runtime/mkl_cpu_allocator.h | 2 +- tensorflow/core/framework/allocator.cc | 62 ++++++++++++++++++++-- .../visitable_allocator.h | 6 +-- 8 files changed, 67 insertions(+), 14 deletions(-) rename tensorflow/core/{common_runtime => framework}/visitable_allocator.h (94%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 04307db..1893967 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -480,6 +480,7 @@ tf_cuda_library( "framework/type_index.h", "framework/type_traits.h", "framework/types.h", + "framework/visitable_allocator.h", "public/version.h", "util/activation_mode.h", "util/bcast.h", @@ -1812,6 +1813,7 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [ "framework/tracking_allocator.h", # only needed for tests "framework/unique_tensor_references.h", "framework/variant.h", + "framework/visitable_allocator.h", "platform/variant_coding.h", "util/command_line_flags.h", "util/env_var.h", @@ -2107,7 +2109,6 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/stats_publisher_interface.h", "common_runtime/step_stats_collector.h", "common_runtime/threadpool_device.h", - "common_runtime/visitable_allocator.h", "graph/gradients.h", "graph/quantize_training.h", ] + if_mkl(["graph/mkl_graph_util.h"]) diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index b8e7735..e34945d 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -23,7 +23,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/allocator_retry.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/visitable_allocator.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h index 2086973..0a58634 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h @@ -19,7 +19,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/visitable_allocator.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h index adce3a8..0db08dc 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h @@ -21,7 +21,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/visitable_allocator.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator.h b/tensorflow/core/common_runtime/gpu/pool_allocator.h index 91ce830..38d669e 100644 --- a/tensorflow/core/common_runtime/gpu/pool_allocator.h +++ b/tensorflow/core/common_runtime/gpu/pool_allocator.h @@ -24,7 +24,7 @@ limitations under the License. #include #include #include -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/visitable_allocator.h" #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 2a67c03..77eeb56 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -25,7 +25,7 @@ limitations under the License. #include #include #include "tensorflow/core/common_runtime/bfc_allocator.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/visitable_allocator.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc index 94bf34a..a382b8b 100644 --- a/tensorflow/core/framework/allocator.cc +++ b/tensorflow/core/framework/allocator.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/visitable_allocator.h" #include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/framework/log_memory.h" @@ -68,15 +68,19 @@ void EnableCPUAllocatorFullStats(bool enable) { cpu_allocator_collect_full_stats = enable; } -class CPUAllocator : public Allocator { +class CPUAllocator : public VisitableAllocator { public: - CPUAllocator() {} + CPUAllocator() : allocation_begun_(false) {} ~CPUAllocator() override {} string Name() override { return "cpu"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override { + if (!allocation_begun_) { + allocation_begun_ = true; + } + void* p = port::AlignedMalloc(num_bytes, alignment); if (cpu_allocator_collect_stats) { const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p); @@ -88,16 +92,38 @@ class CPUAllocator : public Allocator { stats_.max_alloc_size = std::max(stats_.max_alloc_size, alloc_size); } + + // visit each Visitor in alloc_visitors_ + if (p != nullptr) { + for (const Visitor& v : alloc_visitors_) { + v(p, num_bytes); + } + } + return p; } void DeallocateRaw(void* ptr) override { + std::size_t alloc_size; + bool init_alloc_size = false; if (cpu_allocator_collect_stats) { - const std::size_t alloc_size = - port::MallocExtension_GetAllocatedSize(ptr); + alloc_size = port::MallocExtension_GetAllocatedSize(ptr); + init_alloc_size = true; mutex_lock l(mu_); stats_.bytes_in_use -= alloc_size; } + + // visit each Visitor in free_visitors_ + if (ptr != nullptr) { + if (!init_alloc_size) { + alloc_size = port::MallocExtension_GetAllocatedSize(ptr); + init_alloc_size = true; + } + for (const Visitor& v : free_visitors_) { + v(ptr, alloc_size); + } + } + port::AlignedFree(ptr); } @@ -117,10 +143,36 @@ class CPUAllocator : public Allocator { return port::MallocExtension_GetAllocatedSize(ptr); } + // REQUIRES: can only add visitors before the first Allocate call + + void AddAllocVisitor(Visitor visitor) override { + mutex_lock lock(visitor_mutex_); + CHECK(!allocation_begun_) + << "AddAllocVisitor may not be called after allocation has begun."; + alloc_visitors_.push_back(visitor); + } + + void AddFreeVisitor(Visitor visitor) override { + mutex_lock lock(visitor_mutex_); + CHECK(!allocation_begun_) + << "AddFreeVisitor may not be called after allocation has begun."; + free_visitors_.push_back(visitor); + } + private: mutex mu_; AllocatorStats stats_ GUARDED_BY(mu_); + // visitor_mutex_ protects write access to alloc_visitors_ and free_visitors_. + // While write access is mutually exclusive, reads may happen concurrently. + // This is okay because we may only append to alloc_visitors_ and + // free_visitors_ before first allocation, and subsequently we only read these + // vectors. + mutex visitor_mutex_; + std::vector alloc_visitors_; + std::vector free_visitors_; + std::atomic allocation_begun_; + TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator); }; diff --git a/tensorflow/core/common_runtime/visitable_allocator.h b/tensorflow/core/framework/visitable_allocator.h similarity index 94% rename from tensorflow/core/common_runtime/visitable_allocator.h rename to tensorflow/core/framework/visitable_allocator.h index 8edf922..ed41b05 100644 --- a/tensorflow/core/common_runtime/visitable_allocator.h +++ b/tensorflow/core/framework/visitable_allocator.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ -#define TENSORFLOW_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ +#ifndef TENSORFLOW_CORE_FRAMEWORK_VISITABLE_ALLOCATOR_H_ +#define TENSORFLOW_CORE_FRAMEWORK_VISITABLE_ALLOCATOR_H_ #include #include "tensorflow/core/framework/allocator.h" @@ -76,4 +76,4 @@ class TrackingVisitableAllocator : public TrackingAllocator, VisitableAllocator* allocator_; }; } // namespace tensorflow -#endif // TENSORFLOW_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ +#endif // TENSORFLOW_CORE_FRAMEWORK_VISITABLE_ALLOCATOR_H_ -- 2.7.4