From 56502dc77e7ead9c9a4f63bf3405a937307a6f37 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Tue, 22 May 2018 15:01:15 -0700 Subject: [PATCH] [TF:XLA] Add clarification to the DFS scheduler. PiperOrigin-RevId: 197629355 --- tensorflow/compiler/xla/service/hlo_scheduling.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index 6397e46..29c3377 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -457,6 +457,13 @@ StatusOr> DFSMemoryScheduler( extra_users[hlo] += extra_users[operand]; total_sizes[hlo] += total_sizes[operand]; } + // total_sizes[hlo] transitively includes the sizes of all nodes that + // lead to it. But computation is a DAG, so we are double-counting nodes, + // which can lead to overflows for large programs. + // cumulative_total_size caps the size to prevent overflows. + // NOTE(dimvar): this is quite ugly and should be changed. It's unclear + // why we care about transitive sizes; when scheduling a node, its input + // and output buffers should be all that matters, not its "history". total_sizes[hlo] = std::min(total_sizes[hlo], cumulative_total_size); } CHECK_EQ(extra_users.size(), computation.instruction_count()); -- 2.7.4