From 384ab54a95d7dc1d242fd6adcd354e68864f4c67 Mon Sep 17 00:00:00 2001 From: Mark Ryan Date: Tue, 22 May 2018 18:23:14 +0000 Subject: [PATCH] Fix BFCAllocator::Extend alignment issues The BFCAllocator::Extend method used an incorrect harcoded alignment of 32 in two different places. This led to alignment check assertions when Extend was called, as all memory is now expected to be 64 byte aligned. Signed-off-by: Mark Ryan --- tensorflow/core/common_runtime/bfc_allocator.cc | 8 ++++---- tensorflow/core/common_runtime/bfc_allocator.h | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 8f2a419..9cda178 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -86,7 +86,7 @@ BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) { return &(chunks_[h]); } -bool BFCAllocator::Extend(size_t rounded_bytes) { +bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { size_t available_bytes = memory_limit_ - total_region_allocated_bytes_; // Rounds available_bytes down to the nearest multiple of kMinAllocationSize. available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize; @@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) { // Try allocating. size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); - void* mem_addr = suballocator_->Alloc(32, bytes); + void* mem_addr = suballocator_->Alloc(alignment, bytes); if (mem_addr == nullptr && !started_backpedal_) { // Only backpedal once. started_backpedal_ = true; @@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) { while (mem_addr == nullptr) { bytes = RoundedBytes(bytes * kBackpedalFactor); if (bytes < rounded_bytes) break; - mem_addr = suballocator_->Alloc(32, bytes); + mem_addr = suballocator_->Alloc(alignment, bytes); } } @@ -261,7 +261,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, } // Try to extend - if (Extend(rounded_bytes)) { + if (Extend(unused_alignment, rounded_bytes)) { ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes); if (ptr != nullptr) { return ptr; diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index ba5a3ee..52aedb1 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -305,7 +305,8 @@ class BFCAllocator : public VisitableAllocator { // Try to add a new memory region that can satisfy an allocation of // 'rounded_bytes' bytes. Returns true on success and false on // failure. - bool Extend(size_t rounded_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_); + bool Extend(size_t alignment, size_t rounded_bytes) + EXCLUSIVE_LOCKS_REQUIRED(lock_); // Returns a pointer to an underlying allocated chunk of size // 'rounded_bytes'. -- 2.7.4