From ab5de487813b4849dfb5415ee60595654dff06be Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 26 Apr 2018 15:33:38 -0700
Subject: [PATCH] Remove the inter-op thread pool

Forgot about this in cl/194299356.  However, when I checked cl/194299356, I
found that we actually (incorrectly?) used the *intra* op thread pool in the
parallel CPU executable?  Does that mean the inter op thread pool was always
unused?

PiperOrigin-RevId: 194464734
---
 tensorflow/compiler/xla/executable_run_options.cc           | 11 -----------
 tensorflow/compiler/xla/executable_run_options.h            |  7 -------
 tensorflow/compiler/xla/python/local_computation_builder.cc |  3 ---
 tensorflow/compiler/xla/service/backend.cc                  |  7 -------
 tensorflow/compiler/xla/service/backend.h                   |  7 -------
 tensorflow/compiler/xla/service/hlo_runner.cc               |  6 +++---
 tensorflow/compiler/xla/service/service.cc                  |  8 +++-----
 tensorflow/compiler/xla/tests/local_client_test_base.cc     |  2 --
 8 files changed, 6 insertions(+), 45 deletions(-)
diff --git a/tensorflow/compiler/xla/executable_run_options.cc b/tensorflow/compiler/xla/executable_run_options.cc
index 99b8f05..a472747 100644
--- a/tensorflow/compiler/xla/executable_run_options.cc
+++ b/tensorflow/compiler/xla/executable_run_options.cc
@@ -45,17 +45,6 @@ stream_executor::Stream* ExecutableRunOptions::stream() const {
   return stream_;
 }
 
-ExecutableRunOptions& ExecutableRunOptions::set_inter_op_thread_pool(
-    tensorflow::thread::ThreadPool* inter_op_thread_pool) {
-  inter_op_thread_pool_ = inter_op_thread_pool;
-  return *this;
-}
-
-tensorflow::thread::ThreadPool* ExecutableRunOptions::inter_op_thread_pool()
-    const {
-  return inter_op_thread_pool_;
-}
-
 ExecutableRunOptions& ExecutableRunOptions::set_intra_op_thread_pool(
     const Eigen::ThreadPoolDevice* intra_op_thread_pool) {
   intra_op_thread_pool_ = intra_op_thread_pool;
diff --git a/tensorflow/compiler/xla/executable_run_options.h b/tensorflow/compiler/xla/executable_run_options.h
index a306ae1..416131b 100644
--- a/tensorflow/compiler/xla/executable_run_options.h
+++ b/tensorflow/compiler/xla/executable_run_options.h
@@ -65,12 +65,6 @@ class ExecutableRunOptions {
   ExecutableRunOptions& set_stream(stream_executor::Stream* stream);
   stream_executor::Stream* stream() const;
 
-  // Sets the thread pool on which to run parallel CPU backend
-  // computations. Does not take ownership.
-  ExecutableRunOptions& set_inter_op_thread_pool(
-      tensorflow::thread::ThreadPool* inter_op_thread_pool);
-  tensorflow::thread::ThreadPool* inter_op_thread_pool() const;
-
   // Sets the thread pool device on which to run Eigen subcomputations.
   // Does not take ownership.
   ExecutableRunOptions& set_intra_op_thread_pool(
@@ -93,7 +87,6 @@ class ExecutableRunOptions {
   int device_ordinal_ = -1;
   DeviceAssignment* device_assignment_ = nullptr;
   stream_executor::Stream* stream_ = nullptr;
-  tensorflow::thread::ThreadPool* inter_op_thread_pool_ = nullptr;
   const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr;
   ExecutionProfile* execution_profile_ = nullptr;
   int rng_seed_ = 0;
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index 24e17ab..7102f46 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -197,8 +197,6 @@ StatusOr<std::unique_ptr<Literal>> CompiledLocalComputation::Execute(
         ExecutableRunOptions options;
         options.set_device_ordinal(device_ordinal);
         options.set_allocator(client->backend().memory_allocator());
-        options.set_inter_op_thread_pool(
-            client->backend().inter_op_thread_pool());
         options.set_intra_op_thread_pool(
             client->backend().eigen_intra_op_thread_pool_device());
         options.set_device_assignment(&device_assignment);
@@ -242,7 +240,6 @@ LocalShapedBuffer* CompiledLocalComputation::ExecuteWithShapedBuffers(
   // Execute
   ExecutableRunOptions options;
   options.set_allocator(client->backend().memory_allocator());
-  options.set_inter_op_thread_pool(client->backend().inter_op_thread_pool());
   options.set_intra_op_thread_pool(
       client->backend().eigen_intra_op_thread_pool_device());
   ScopedShapedBuffer result_buffer =
diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc
index b1d616e..349b324 100644
--- a/tensorflow/compiler/xla/service/backend.cc
+++ b/tensorflow/compiler/xla/service/backend.cc
@@ -138,9 +138,6 @@ Backend::Backend(
       << "Service found no devices for backend " << platform_->Name() << '.';
 
   if (platform->id() == se::host::kHostPlatformId) {
-    inter_op_thread_pool_.reset(new tensorflow::thread::ThreadPool(
-        tensorflow::Env::Default(), "xla_inter_op",
-        tensorflow::port::NumSchedulableCPUs()));
     const int num_threads = intra_op_parallelism_threads > 0
                                 ? intra_op_parallelism_threads
                                 : tensorflow::port::NumSchedulableCPUs();
@@ -155,10 +152,6 @@ int Backend::default_device_ordinal() const {
   return default_stream_executor()->device_ordinal();
 }
 
-tensorflow::thread::ThreadPool* Backend::inter_op_thread_pool() const {
-  return inter_op_thread_pool_.get();
-}
-
 const Eigen::ThreadPoolDevice* Backend::eigen_intra_op_thread_pool_device()
     const {
   if (intra_op_thread_pool_wrapper_ == nullptr) {
diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h
index d32a0a4..6546602 100644
--- a/tensorflow/compiler/xla/service/backend.h
+++ b/tensorflow/compiler/xla/service/backend.h
@@ -140,10 +140,6 @@ class Backend {
   // be equivalent to an executable compiled for the other.
   StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b);
 
-  // For the host platform, returns the threadpool to use when scheduling
-  // parallel operators. For other platforms, returns NULL.
-  tensorflow::thread::ThreadPool* inter_op_thread_pool() const;
-
   // For the host platform, returns the configured eigen threadpool device to be
   // used for scheduling work. For other platforms, returns NULL.
   const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const;
@@ -178,9 +174,6 @@ class Backend {
   // The default memory allocator to use.
   std::unique_ptr<StreamExecutorMemoryAllocator> memory_allocator_;
 
-  // For the CPU backend, a threadpool for scheduling parallel operators.
-  std::unique_ptr<tensorflow::thread::ThreadPool> inter_op_thread_pool_;
-
   // For the CPU backend, an Eigen threadpool device for use by Eigen code.
   std::unique_ptr<EigenThreadPoolWrapper> intra_op_thread_pool_wrapper_;
 };
diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index 81c43db..48da1a5 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -278,14 +278,14 @@ ServiceExecutableRunOptions HloRunner::GetServiceRunOptionsForDevice(
   run_options.set_device_ordinal(device);
   run_options.set_stream(stream);
   run_options.set_allocator(backend().memory_allocator());
-  run_options.set_inter_op_thread_pool(backend().inter_op_thread_pool());
   run_options.set_intra_op_thread_pool(
       backend().eigen_intra_op_thread_pool_device());
   if (device_assignment != nullptr) {
     run_options.set_device_assignment(device_assignment);
   }
-  return ServiceExecutableRunOptions(run_options, backend().StreamBorrower(),
-                                     backend().inter_op_thread_pool());
+  return ServiceExecutableRunOptions(
+      run_options, backend().StreamBorrower(),
+      /*xla_intra_op_thread_pool=*/backend().eigen_intra_op_thread_pool());
 }
 
 Backend& HloRunner::backend() {
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 086bd61..6e0d07a 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -574,7 +574,6 @@ Service::ExecuteParallelAndRegisterResult(
       ExecutableRunOptions options;
       options.set_stream(streams.back().get());
       options.set_allocator(backend->memory_allocator());
-      options.set_inter_op_thread_pool(backend->inter_op_thread_pool());
       options.set_intra_op_thread_pool(
           backend->eigen_intra_op_thread_pool_device());
       options.set_device_assignment(&device_assignment);
@@ -688,12 +687,12 @@ StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
     options.set_stream(stream.get());
     options.set_device_ordinal(stream->parent()->device_ordinal());
     options.set_allocator(backend->memory_allocator());
-    options.set_inter_op_thread_pool(backend->inter_op_thread_pool());
     options.set_intra_op_thread_pool(
         backend->eigen_intra_op_thread_pool_device());
     options.set_device_assignment(&device_assignment);
-    run_options.emplace_back(options, backend->StreamBorrower(),
-                             backend->inter_op_thread_pool());
+    run_options.emplace_back(
+        options, backend->StreamBorrower(),
+        /*xla_intra_op_thread_pool=*/backend->eigen_intra_op_thread_pool());
   }
 
   if (options_.number_of_replicas() == 1) {
@@ -1240,7 +1239,6 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg,
     ExecutableRunOptions options;
     options.set_stream(stream.get());
     options.set_allocator(execute_backend_->memory_allocator());
-    options.set_inter_op_thread_pool(execute_backend_->inter_op_thread_pool());
     options.set_intra_op_thread_pool(
         execute_backend_->eigen_intra_op_thread_pool_device());
 
diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc
index ca8e4cd..e859b30 100644
--- a/tensorflow/compiler/xla/tests/local_client_test_base.cc
+++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc
@@ -149,8 +149,6 @@ ExecutableBuildOptions LocalClientTestBase::DefaultExecutableBuildOptions()
 
 ExecutableRunOptions LocalClientTestBase::DefaultExecutableRunOptions() const {
   ExecutableRunOptions run_options;
-  run_options.set_inter_op_thread_pool(
-      local_client_->backend().inter_op_thread_pool());
   run_options.set_intra_op_thread_pool(thread_pool_wrapper_->device.get());
   run_options.set_allocator(GetOrCreateAllocator(local_client_->platform()));
   return run_options;
-- 
2.7.4