From 15879526893886852b64d60b72c40bc6daeda22e Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 8 May 2018 17:29:01 -0700 Subject: [PATCH] [XLA:GPU] Disable multi-streaming by default. Run all GPU work on one stream by default. We've found experimentally that multi-streaming creates significant additional memory pressure on some models, and we don't have any good benchmarks where multi-streaming helps on which to tune the stream-assignment heuristics. So just disable it for now. PiperOrigin-RevId: 195903229 --- tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc | 6 ++++++ tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc | 9 +++++++++ tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc | 9 +++++++++ 3 files changed, 24 insertions(+) diff --git a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc index bc84057..f42fb92 100644 --- a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc +++ b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc @@ -47,6 +47,12 @@ void SetDebugOptionsDefaults(DebugOptions* flags) { // Set cudnn batchnorm off by default; it does not provide a performance win // on average. flags->set_xla_gpu_use_cudnn_batchnorm(false); + + // Run all GPU work on one stream by default. Using multiple streams + // increases memory usage and we lack strong motivating benchmarks for tuning + // the heuristics needed to decide when to run on multiple streams. See + // b/77879207. + flags->set_xla_gpu_disable_multi_streaming(true); } // Allocates flag_values and flag_objects; this function must not be called more diff --git a/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc b/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc index 6436abc..e230d53 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc @@ -42,6 +42,15 @@ class HloScheduleTest : public HloTestBase { .ConsumeValueOrDie(); } + std::unique_ptr CreateNewModule() { + HloModuleConfig config; + auto debug_options = GetDebugOptionsForTest(); + debug_options.set_xla_gpu_disable_multi_streaming(false); + config.set_debug_options(debug_options); + return MakeUnique("test_module", VersionedComputationHandle(), + config); + } + HloVec RemoveHlo(const HloVec& input, const std::unordered_set& remove) { HloVec result(input); diff --git a/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc index b42767d..696fa7e 100644 --- a/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc +++ b/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc @@ -28,6 +28,15 @@ namespace gpu { class StreamAssignmentTest : public HloTestBase { protected: + std::unique_ptr CreateNewModule() { + HloModuleConfig config; + auto debug_options = GetDebugOptionsForTest(); + debug_options.set_xla_gpu_disable_multi_streaming(false); + config.set_debug_options(debug_options); + return MakeUnique("test_module", VersionedComputationHandle(), + config); + } + // Pre-canned shapes. Shape f32_2x2_ = ShapeUtil::MakeShape(F32, {2, 2}); }; -- 2.7.4