From 67b6696f9620734369ae99e7895fa6570d7faca6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 22 May 2018 12:34:51 -0700 Subject: [PATCH] [XLA:GPU] Emit fused reduces from batchnorm expander This is an intermediate step until we have working multi-output fusion. Once we have it, this change should be reverted as it might interfere with fusion. PiperOrigin-RevId: 197605814 --- tensorflow/compiler/xla/service/gpu/gpu_compiler.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index d50153d..1445684 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -157,11 +157,13 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, if (hlo_module->config().debug_options().xla_gpu_use_cudnn_batchnorm()) { pass.AddPass(); } + // TODO(kramerb): Remove use_fusion once instruction fusion can create + // multi-output fusions from the unfused expander output. pass.AddPass( /*rewrite_training_op=*/true, /*rewrite_inference_op=*/true, /*rewrite_grad_op=*/true, - /*use_fusion=*/false); + /*use_fusion=*/true); // Rewrite gather ops into smaller ones. pass.AddPass(); -- 2.7.4