Don't spin in a loop when we're not waiting on any GPU events.

author Justin Lebar <jlebar@google.com>

Tue, 20 Mar 2018 10:48:38 +0000 (03:48 -0700)

committer TensorFlower Gardener <gardener@tensorflow.org>

Tue, 20 Mar 2018 10:53:07 +0000 (03:53 -0700)
author Justin Lebar <jlebar@google.com>
Tue, 20 Mar 2018 10:48:38 +0000 (03:48 -0700)
committer TensorFlower Gardener <gardener@tensorflow.org>
Tue, 20 Mar 2018 10:53:07 +0000 (03:53 -0700)
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc

index 2452efc..af6a59a 100644 (file)
--- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
@@ -30,10 +30,6 @@ EventMgr::EventMgr(gpu::StreamExecutor* se, const GPUOptions& gpu_options)
        polling_active_delay_usecs_(gpu_options.polling_active_delay_usecs()
                                        ? gpu_options.polling_active_delay_usecs()
                                        : 10),
-      polling_inactive_delay_msecs_(
-          gpu_options.polling_inactive_delay_msecs()
-              ? gpu_options.polling_inactive_delay_msecs()
-              : 1),
        accumulated_stream_(nullptr),
        accumulated_tensors_(new TensorReferenceVector),
        accumulated_tensor_bytes_(0),
@@ -78,16 +74,22 @@ EventMgr::~EventMgr() {
  
  void EventMgr::StartPollingLoop() {
    CHECK(polling_stopped_ == nullptr);
-  stop_polling_.reset(new Notification);
+  {
+    mutex_lock l(mu_);
+    stop_polling_ = false;
+  }
    polling_stopped_.reset(new Notification);
    threadpool_.Schedule([this]() { PollLoop(); });
  }
  
  void EventMgr::StopPollingLoop() {
-  if (stop_polling_) {
-    stop_polling_->Notify();
+  if (polling_stopped_) {
+    {
+      mutex_lock l(mu_);
+      stop_polling_ = true;
+      events_pending_.notify_all();
+    }
      polling_stopped_->WaitForNotification();
-    stop_polling_.reset(nullptr);
      polling_stopped_.reset(nullptr);
    }
  }
@@ -121,28 +123,31 @@ void EventMgr::FlushAccumulatedTensors() {
    accumulated_stream_ = nullptr;
  }
  
-// A polling loop to detect completion of GPU events.  There's a
-// tradeoff between achieving low latency detection, which argues for
-// little delay between calls, and minimizing CPU use and lock
-// contention, which argue for longer delay.  The current strategy is
-// to poll frequently when the queue is non-empty, and infrequently
-// otherwise.
+// A polling loop to detect completion of GPU events.
+//
+// While one or more events is outstanding, poll for completed events.  When no
+// events are outstanding, we sleep until one is enqueued.
  void EventMgr::PollLoop() {
-  bool queue_empty = false;
-  while (!stop_polling_->HasBeenNotified()) {
-    if (queue_empty) {
-      mutex_lock l(mu_);
-      WaitForMilliseconds(&l, &events_pending_, polling_inactive_delay_msecs_);
-    } else {
-      Env::Default()->SleepForMicroseconds(polling_active_delay_usecs_);
-    }
-    ToFreeVector to_free;
+  ToFreeVector to_free;
+  while (true) {
+    bool events_still_pending;
      {
        mutex_lock l(mu_);
+      if (stop_polling_) {
+        break;
+      }
+      if (used_events_.empty()) {
+        events_pending_.wait(l);
+      }
        PollEvents(true, &to_free);
-      queue_empty = used_events_.empty();
+      events_still_pending = !used_events_.empty();
      }
      FreeMemory(to_free);
+    to_free.clear();
+
+    if (events_still_pending) {
+      Env::Default()->SleepForMicroseconds(polling_active_delay_usecs_);
+    }
    }
    polling_stopped_->Notify();
  }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h

index 9692b24..d23898e 100644 (file)
--- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h
@@ -94,7 +94,6 @@ class EventMgr {
    perftools::gputools::StreamExecutor* const exec_;
    const int64 deferred_bytes_threshold_;
    const int32 polling_active_delay_usecs_;
-  const int32 polling_inactive_delay_msecs_;
    mutex mu_;
    condition_variable events_pending_ GUARDED_BY(mu_);
  
@@ -180,7 +179,7 @@ class EventMgr {
    // A FIFO queue of InUse events and associated tensors.
    std::deque<InUse> used_events_ GUARDED_BY(mu_);
  
-  std::unique_ptr<Notification> stop_polling_;
+  bool stop_polling_ GUARDED_BY(mu_);
    std::unique_ptr<Notification> polling_stopped_;
  
    // The main PollLoop for the event manager runs in this threadpool.
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto

index abbbe39..a3557e4 100644 (file)
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -67,9 +67,7 @@ message GPUOptions {
    // set or set to 0, gets set to a non-zero default.
    int32 polling_active_delay_usecs = 6;
  
-  // In the event polling loop sleep this many millisconds between
-  // PollEvents calls, when the queue is empty.  If value is not
-  // set or set to 0, gets set to a non-zero default.
+  // This field is deprecated and ignored.
    int32 polling_inactive_delay_msecs = 7;
  
    // Force all tensors to be gpu_compatible. On a GPU-enabled TensorFlow,
author	Justin Lebar <jlebar@google.com>
	Tue, 20 Mar 2018 10:48:38 +0000 (03:48 -0700)
committer	TensorFlower Gardener <gardener@tensorflow.org>
	Tue, 20 Mar 2018 10:53:07 +0000 (03:53 -0700)
tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc		patch \| blob \| history
tensorflow/core/common_runtime/gpu/gpu_event_mgr.h		patch \| blob \| history
tensorflow/core/protobuf/config.proto		patch \| blob \| history