[XLA:GPU] Remove unused Thunk::ShouldBlockFutureThunks function.

author Justin Lebar <jlebar@google.com>

Fri, 11 May 2018 02:28:35 +0000 (19:28 -0700)

committer TensorFlower Gardener <gardener@tensorflow.org>

Fri, 11 May 2018 02:31:17 +0000 (19:31 -0700)
author Justin Lebar <jlebar@google.com>
Fri, 11 May 2018 02:28:35 +0000 (19:28 -0700)
committer TensorFlower Gardener <gardener@tensorflow.org>
Fri, 11 May 2018 02:31:17 +0000 (19:31 -0700)
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc

index 04b4f7a..e09bee0 100644 (file)
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -164,9 +164,6 @@ Status GpuExecutable::ExecuteThunks(
                                  sub_streams, hlo_module_->entry_computation());
    uint64 start_micros = tensorflow::Env::Default()->NowMicros();
  
-  // The next event enqueued on stream N must not run until the thunk at
-  // last_blocking_thunk_for_stream[N] completes.
-  std::map<int32, const Thunk*> last_blocking_thunk_for_stream;
    std::map<const Thunk*, std::unique_ptr<se::Event>> thunk_to_finish_event;
    for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
      TF_RETURN_IF_ERROR(thunk->Initialize(*this));
@@ -179,18 +176,10 @@ Status GpuExecutable::ExecuteThunks(
        stream->ThenWaitFor(FindOrDie(thunk_to_finish_event, dependency).get());
      }
  
-    if (last_blocking_thunk_for_stream.count(stream_no)) {
-      stream->ThenWaitFor(FindOrDie(thunk_to_finish_event,
-                                    last_blocking_thunk_for_stream[stream_no])
-                              .get());
-      last_blocking_thunk_for_stream.erase(stream_no);
-    }
-
      // If this thunk requests it, wait for all currently-executing thunks to
      // finish.  This is useful e.g. if the thunk is about to perform autotuning.
      if (thunk->ShouldHaltAllActivityBeforeRunning(stream)) {
        TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDone());
-      last_blocking_thunk_for_stream.clear();
      }
  
      profiler.StartOperation();
@@ -198,22 +187,11 @@ Status GpuExecutable::ExecuteThunks(
              << thunk->hlo_instruction()->ToString() << " on stream "
              << stream_no;
      TF_RETURN_IF_ERROR(thunk->ExecuteOnStream(buffer_allocations, stream));
-    if (thunk_schedule_->Depended(thunk) || thunk->ShouldBlockFutureThunks()) {
+    if (thunk_schedule_->Depended(thunk)) {
        auto finish_event = MakeUnique<se::Event>(main_stream->parent());
        finish_event->Init();
        stream->ThenRecordEvent(finish_event.get());
        thunk_to_finish_event[thunk] = std::move(finish_event);
-
-      if (thunk->ShouldBlockFutureThunks()) {
-        // Set last_blocking_thunk_for_stream on all streams other than this one
-        // so that all other streams will wait for this thunk to complete before
-        // executing any events that occur later in the total order.
-        for (int32 i = 0; i < sub_streams.size() + 1; ++i) {
-          if (i != stream_no) {
-            last_blocking_thunk_for_stream[i] = thunk;
-          }
-        }
-      }
      }
      profiler.FinishOperation(thunk->hlo_instruction());
    }
diff --git a/tensorflow/compiler/xla/service/gpu/thunk.h b/tensorflow/compiler/xla/service/gpu/thunk.h

index a0c785e..57d9212 100644 (file)
--- a/tensorflow/compiler/xla/service/gpu/thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/thunk.h
@@ -89,16 +89,6 @@ class Thunk {
      return false;
    }
  
-  // Indicates whether thunks scheduled after this one should wait for this one
-  // to complete before running. For example, a convolution thunk creates a
-  // scratch allocator, then kicks off a convolution in cudnn via the stream
-  // executor. When the stream executor call returns, the scratch allocator goes
-  // out of scope, and the scratch memory is deallocated. In this case, the
-  // convolution thunk needs to return true so that future thunks wait for the
-  // convolution thunk to avoid reusing the deallocated memory until the
-  // convolution thunk is done with it.
-  virtual bool ShouldBlockFutureThunks() { return false; }
-
    // Execute the kernel for the thunk on the given stream. This method must be
    // called after Initialize and can be called multiple times over Thunk's
    // lifetime. Stream argument must be non-null.
author	Justin Lebar <jlebar@google.com>
	Fri, 11 May 2018 02:28:35 +0000 (19:28 -0700)
committer	TensorFlower Gardener <gardener@tensorflow.org>
	Fri, 11 May 2018 02:31:17 +0000 (19:31 -0700)
tensorflow/compiler/xla/service/gpu/gpu_executable.cc		patch \| blob \| history
tensorflow/compiler/xla/service/gpu/thunk.h		patch \| blob \| history