From 3c2e7fac1c13660bc0da1bb7e0cbc127543cd22b Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Fri, 18 May 2018 12:24:20 -0700 Subject: [PATCH] [TF:XLA] Delete the reverseDFS scheduler. With recent improvements to the List scheduler, reverseDFS is no longer needed. PiperOrigin-RevId: 197183727 --- tensorflow/compiler/xla/service/hlo_scheduling.cc | 62 +++++------------------ tensorflow/compiler/xla/service/hlo_scheduling.h | 9 ---- 2 files changed, 14 insertions(+), 57 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index 0254581..51c29d4 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -426,10 +426,12 @@ StatusOr MinimumMemoryForComputation( return result.heap_size; } -StatusOr> DFSMemorySchedulerImpl( +StatusOr> DFSMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, - const LogicalBuffer::SizeFunction& size_function, bool reverse_heuristics) { + const LogicalBuffer::SizeFunction& size_function, + const tensorflow::gtl::FlatMap& + memory_by_computation) { // This ordering is based on DFS post-order, with a heuristic to decide which // operand to visit first. The heuristic is based on 'extra_users', which is // simply users-1 for each instruction. By subtracting 1, we're saying that @@ -469,16 +471,15 @@ StatusOr> DFSMemorySchedulerImpl( return Status::OK(); }); TF_RETURN_IF_ERROR(computation.AcceptWithOperandOrder( - &visitor, [&extra_users, &total_sizes, reverse_heuristics]( - const HloInstruction* a, const HloInstruction* b) { - auto lhs = std::tuple(extra_users[a], - total_sizes[a], b->name()); - auto rhs = std::tuple(extra_users[b], - total_sizes[b], a->name()); - - // Reverse heuristics. This helps some cases as a different starting - // point of gradient descent, see b/78906799 for more context. - return reverse_heuristics ? rhs > lhs : lhs > rhs; + &visitor, [&extra_users, &total_sizes](const HloInstruction* a, + const HloInstruction* b) { + if (extra_users[a] != extra_users[b]) { + return extra_users[a] > extra_users[b]; + } + if (total_sizes[a] != total_sizes[b]) { + return total_sizes[a] > total_sizes[b]; + } + return a->name() < b->name(); })); CHECK_EQ(sequence.size(), computation.instruction_count()); return sequence; @@ -505,26 +506,6 @@ StatusOr> PostOrderMemoryScheduler( post_order.end()}; } -StatusOr> DFSMemoryScheduler( - const HloComputation& computation, - const TuplePointsToAnalysis& points_to_analysis, - const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& - memory_by_computation) { - return DFSMemorySchedulerImpl(computation, points_to_analysis, size_function, - /*reverse_heuristics=*/false); -} - -StatusOr> DFSMemorySchedulerReverse( - const HloComputation& computation, - const TuplePointsToAnalysis& points_to_analysis, - const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& - memory_by_computation) { - return DFSMemorySchedulerImpl(computation, points_to_analysis, size_function, - /*reverse_heuristics=*/true); -} - StatusOr> DefaultMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, @@ -568,18 +549,7 @@ StatusOr> DefaultMemoryScheduler( VLOG(2) << "Min-memory post order sequence: " << HumanReadableNumBytes(post_order_memory); - TF_ASSIGN_OR_RETURN( - std::vector reverse_dfs, - DFSMemorySchedulerReverse(computation, points_to_analysis, size_function, - memory_by_computation)); - TF_ASSIGN_OR_RETURN( - const int64 reverse_dfs_memory, - MinimumMemoryForComputation(computation, reverse_dfs, points_to_analysis, - size_function)); - VLOG(2) << "Min-memory reverse_dfs sequence: " - << HumanReadableNumBytes(reverse_dfs_memory); - auto min_memory = std::min( - {dfs_memory, post_order_memory, reverse_dfs_memory, list_memory}); + auto min_memory = std::min({dfs_memory, post_order_memory, list_memory}); if (min_memory == list_memory) { VLOG(2) << "Chose min-memory list sequence: " @@ -589,10 +559,6 @@ StatusOr> DefaultMemoryScheduler( VLOG(2) << "Chose min-memory dfs sequence: " << HumanReadableNumBytes(dfs_memory); return dfs_sequence; - } else if (min_memory == reverse_dfs_memory) { - VLOG(2) << "Chose min-memory reverse_dfs memory: " - << HumanReadableNumBytes(reverse_dfs_memory); - return reverse_dfs; } else { VLOG(2) << "Chose min-memory post_order sequence: " << HumanReadableNumBytes(post_order_memory); diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.h b/tensorflow/compiler/xla/service/hlo_scheduling.h index 0e5ac20..49b927e 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.h +++ b/tensorflow/compiler/xla/service/hlo_scheduling.h @@ -76,15 +76,6 @@ StatusOr> PostOrderMemoryScheduler( const tensorflow::gtl::FlatMap& memory_by_computation); -// DFS-order scheduler with reversed heuristics. This helps some cases (see -// b/78906799). -StatusOr> DFSMemorySchedulerReverse( - const HloComputation& computation, - const TuplePointsToAnalysis& points_to_analysis, - const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& - memory_by_computation); - // The default scheduling algorithm. Runs both the list scheduler // and the DFS scheduler, and chooses whichever returns a lower min-memory, // not accounting for fragmentation. -- 2.7.4