polling_active_delay_usecs_(gpu_options.polling_active_delay_usecs()
? gpu_options.polling_active_delay_usecs()
: 10),
- polling_inactive_delay_msecs_(
- gpu_options.polling_inactive_delay_msecs()
- ? gpu_options.polling_inactive_delay_msecs()
- : 1),
accumulated_stream_(nullptr),
accumulated_tensors_(new TensorReferenceVector),
accumulated_tensor_bytes_(0),
void EventMgr::StartPollingLoop() {
CHECK(polling_stopped_ == nullptr);
- stop_polling_.reset(new Notification);
+ {
+ mutex_lock l(mu_);
+ stop_polling_ = false;
+ }
polling_stopped_.reset(new Notification);
threadpool_.Schedule([this]() { PollLoop(); });
}
void EventMgr::StopPollingLoop() {
- if (stop_polling_) {
- stop_polling_->Notify();
+ if (polling_stopped_) {
+ {
+ mutex_lock l(mu_);
+ stop_polling_ = true;
+ events_pending_.notify_all();
+ }
polling_stopped_->WaitForNotification();
- stop_polling_.reset(nullptr);
polling_stopped_.reset(nullptr);
}
}
accumulated_stream_ = nullptr;
}
-// A polling loop to detect completion of GPU events. There's a
-// tradeoff between achieving low latency detection, which argues for
-// little delay between calls, and minimizing CPU use and lock
-// contention, which argue for longer delay. The current strategy is
-// to poll frequently when the queue is non-empty, and infrequently
-// otherwise.
+// A polling loop to detect completion of GPU events.
+//
+// While one or more events is outstanding, poll for completed events. When no
+// events are outstanding, we sleep until one is enqueued.
void EventMgr::PollLoop() {
- bool queue_empty = false;
- while (!stop_polling_->HasBeenNotified()) {
- if (queue_empty) {
- mutex_lock l(mu_);
- WaitForMilliseconds(&l, &events_pending_, polling_inactive_delay_msecs_);
- } else {
- Env::Default()->SleepForMicroseconds(polling_active_delay_usecs_);
- }
- ToFreeVector to_free;
+ ToFreeVector to_free;
+ while (true) {
+ bool events_still_pending;
{
mutex_lock l(mu_);
+ if (stop_polling_) {
+ break;
+ }
+ if (used_events_.empty()) {
+ events_pending_.wait(l);
+ }
PollEvents(true, &to_free);
- queue_empty = used_events_.empty();
+ events_still_pending = !used_events_.empty();
}
FreeMemory(to_free);
+ to_free.clear();
+
+ if (events_still_pending) {
+ Env::Default()->SleepForMicroseconds(polling_active_delay_usecs_);
+ }
}
polling_stopped_->Notify();
}
perftools::gputools::StreamExecutor* const exec_;
const int64 deferred_bytes_threshold_;
const int32 polling_active_delay_usecs_;
- const int32 polling_inactive_delay_msecs_;
mutex mu_;
condition_variable events_pending_ GUARDED_BY(mu_);
// A FIFO queue of InUse events and associated tensors.
std::deque<InUse> used_events_ GUARDED_BY(mu_);
- std::unique_ptr<Notification> stop_polling_;
+ bool stop_polling_ GUARDED_BY(mu_);
std::unique_ptr<Notification> polling_stopped_;
// The main PollLoop for the event manager runs in this threadpool.
// set or set to 0, gets set to a non-zero default.
int32 polling_active_delay_usecs = 6;
- // In the event polling loop sleep this many millisconds between
- // PollEvents calls, when the queue is empty. If value is not
- // set or set to 0, gets set to a non-zero default.
+ // This field is deprecated and ignored.
int32 polling_inactive_delay_msecs = 7;
// Force all tensors to be gpu_compatible. On a GPU-enabled TensorFlow,