[ThreadPool] Solve thread transitions issue (#4344)
authorZhao Wu <wuzhaozju@gmail.com>
Wed, 20 Nov 2019 20:43:20 +0000 (04:43 +0800)
committerWuwei Lin <wuwei@apache.org>
Wed, 20 Nov 2019 20:43:20 +0000 (15:43 -0500)
* [ThreadPool] Solve thread transitions issue

* Use pthread_atfork to avoid master thread affinity be derived by child.

* Code Format

* comment of exclude_worker0_

* set full cpu affinity

* Redundant blank line

* CPPLint

* CPPLint namespace

* CPPLint

* Fix the wrong logic of bind master thread.

src/runtime/thread_pool.cc
src/runtime/threading_backend.cc

index e9e6d03..ee10fa6 100644 (file)
@@ -283,6 +283,10 @@ class ThreadPool {
       // The SpscTaskQueue only hosts ONE item at a time
       queues_.emplace_back(std::unique_ptr<SpscTaskQueue>(new SpscTaskQueue()));
     }
+    const char* exclude_worker0 = getenv("TVM_EXCLUDE_WORKER0");
+    if (exclude_worker0 && atoi(exclude_worker0) == 0) {
+      exclude_worker0_ = false;
+    }
     threads_ = std::unique_ptr<tvm::runtime::threading::ThreadGroup>(
         new tvm::runtime::threading::ThreadGroup(
           num_workers_, [this](int worker_id) { this->RunWorker(worker_id); },
@@ -369,7 +373,7 @@ class ThreadPool {
   int num_workers_;
   // number of workers used (can be restricted with affinity pref)
   int num_workers_used_;
-  // if excluding worker 0 and using master to run task 0
+  // if or not to exclude worker 0 and use master to run task 0
 #ifndef _LIBCPP_SGX_CONFIG
   bool exclude_worker0_{true};
 #else
index ae98dfb..9c8d688 100644 (file)
@@ -133,25 +133,44 @@ class ThreadGroup::Impl {
           sizeof(cpu_set_t), &cpuset);
 #endif
     }
-    if (exclude_worker0) {  // bind the master thread to core 0
-      cpu_set_t cpuset;
-      CPU_ZERO(&cpuset);
-      if (reverse) {
-        CPU_SET(sorted_order_[sorted_order_.size() - 1], &cpuset);
-      } else {
-        CPU_SET(sorted_order_[0], &cpuset);
-      }
+    if (exclude_worker0) {  // master thread run task
 #if defined(__ANDROID__)
-      sched_setaffinity(pthread_self(),
-        sizeof(cpu_set_t), &cpuset);
+      SetFullCpuAffinity();
 #else
-      pthread_setaffinity_np(pthread_self(),
-        sizeof(cpu_set_t), &cpuset);
+      // if we set TVM_BIND_MASTER_THREAD to be 1, we will bind master thread
+      // to core 0.
+      const char* bind_master_thread = getenv("TVM_BIND_MASTER_THREAD");
+      if (bind_master_thread && atoi(bind_master_thread) == 1) {
+        cpu_set_t cpuset;
+        CPU_ZERO(&cpuset);
+        if (reverse) {
+          CPU_SET(sorted_order_[sorted_order_.size() - 1], &cpuset);
+        } else {
+          CPU_SET(sorted_order_[0], &cpuset);
+        }
+        pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+      }
+      pthread_atfork(nullptr, nullptr, ThreadGroup::Impl::SetFullCpuAffinity);
 #endif
     }
 #endif
   }
 
+  static void SetFullCpuAffinity() {
+#if defined(__linux__) || defined(__ANDROID__)
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+    for (unsigned i = 0; i < std::thread::hardware_concurrency(); i++) {
+      CPU_SET(i, &cpuset);
+    }
+#if defined(__ANDROID__)
+    sched_setaffinity(pthread_self(), sizeof(cpu_set_t), &cpuset);
+#else
+    pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+#endif
+#endif
+  }
+
   void InitSortedOrder() {
     unsigned int threads = std::thread::hardware_concurrency();
     std::vector<std::pair <unsigned int, int64_t> > max_freqs;