From ab5de487813b4849dfb5415ee60595654dff06be Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 26 Apr 2018 15:33:38 -0700 Subject: [PATCH] Remove the inter-op thread pool Forgot about this in cl/194299356. However, when I checked cl/194299356, I found that we actually (incorrectly?) used the *intra* op thread pool in the parallel CPU executable? Does that mean the inter op thread pool was always unused? PiperOrigin-RevId: 194464734 --- tensorflow/compiler/xla/executable_run_options.cc | 11 ----------- tensorflow/compiler/xla/executable_run_options.h | 7 ------- tensorflow/compiler/xla/python/local_computation_builder.cc | 3 --- tensorflow/compiler/xla/service/backend.cc | 7 ------- tensorflow/compiler/xla/service/backend.h | 7 ------- tensorflow/compiler/xla/service/hlo_runner.cc | 6 +++--- tensorflow/compiler/xla/service/service.cc | 8 +++----- tensorflow/compiler/xla/tests/local_client_test_base.cc | 2 -- 8 files changed, 6 insertions(+), 45 deletions(-) diff --git a/tensorflow/compiler/xla/executable_run_options.cc b/tensorflow/compiler/xla/executable_run_options.cc index 99b8f05..a472747 100644 --- a/tensorflow/compiler/xla/executable_run_options.cc +++ b/tensorflow/compiler/xla/executable_run_options.cc @@ -45,17 +45,6 @@ stream_executor::Stream* ExecutableRunOptions::stream() const { return stream_; } -ExecutableRunOptions& ExecutableRunOptions::set_inter_op_thread_pool( - tensorflow::thread::ThreadPool* inter_op_thread_pool) { - inter_op_thread_pool_ = inter_op_thread_pool; - return *this; -} - -tensorflow::thread::ThreadPool* ExecutableRunOptions::inter_op_thread_pool() - const { - return inter_op_thread_pool_; -} - ExecutableRunOptions& ExecutableRunOptions::set_intra_op_thread_pool( const Eigen::ThreadPoolDevice* intra_op_thread_pool) { intra_op_thread_pool_ = intra_op_thread_pool; diff --git a/tensorflow/compiler/xla/executable_run_options.h b/tensorflow/compiler/xla/executable_run_options.h index a306ae1..416131b 100644 --- a/tensorflow/compiler/xla/executable_run_options.h +++ b/tensorflow/compiler/xla/executable_run_options.h @@ -65,12 +65,6 @@ class ExecutableRunOptions { ExecutableRunOptions& set_stream(stream_executor::Stream* stream); stream_executor::Stream* stream() const; - // Sets the thread pool on which to run parallel CPU backend - // computations. Does not take ownership. - ExecutableRunOptions& set_inter_op_thread_pool( - tensorflow::thread::ThreadPool* inter_op_thread_pool); - tensorflow::thread::ThreadPool* inter_op_thread_pool() const; - // Sets the thread pool device on which to run Eigen subcomputations. // Does not take ownership. ExecutableRunOptions& set_intra_op_thread_pool( @@ -93,7 +87,6 @@ class ExecutableRunOptions { int device_ordinal_ = -1; DeviceAssignment* device_assignment_ = nullptr; stream_executor::Stream* stream_ = nullptr; - tensorflow::thread::ThreadPool* inter_op_thread_pool_ = nullptr; const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr; ExecutionProfile* execution_profile_ = nullptr; int rng_seed_ = 0; diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index 24e17ab..7102f46 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -197,8 +197,6 @@ StatusOr> CompiledLocalComputation::Execute( ExecutableRunOptions options; options.set_device_ordinal(device_ordinal); options.set_allocator(client->backend().memory_allocator()); - options.set_inter_op_thread_pool( - client->backend().inter_op_thread_pool()); options.set_intra_op_thread_pool( client->backend().eigen_intra_op_thread_pool_device()); options.set_device_assignment(&device_assignment); @@ -242,7 +240,6 @@ LocalShapedBuffer* CompiledLocalComputation::ExecuteWithShapedBuffers( // Execute ExecutableRunOptions options; options.set_allocator(client->backend().memory_allocator()); - options.set_inter_op_thread_pool(client->backend().inter_op_thread_pool()); options.set_intra_op_thread_pool( client->backend().eigen_intra_op_thread_pool_device()); ScopedShapedBuffer result_buffer = diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc index b1d616e..349b324 100644 --- a/tensorflow/compiler/xla/service/backend.cc +++ b/tensorflow/compiler/xla/service/backend.cc @@ -138,9 +138,6 @@ Backend::Backend( << "Service found no devices for backend " << platform_->Name() << '.'; if (platform->id() == se::host::kHostPlatformId) { - inter_op_thread_pool_.reset(new tensorflow::thread::ThreadPool( - tensorflow::Env::Default(), "xla_inter_op", - tensorflow::port::NumSchedulableCPUs())); const int num_threads = intra_op_parallelism_threads > 0 ? intra_op_parallelism_threads : tensorflow::port::NumSchedulableCPUs(); @@ -155,10 +152,6 @@ int Backend::default_device_ordinal() const { return default_stream_executor()->device_ordinal(); } -tensorflow::thread::ThreadPool* Backend::inter_op_thread_pool() const { - return inter_op_thread_pool_.get(); -} - const Eigen::ThreadPoolDevice* Backend::eigen_intra_op_thread_pool_device() const { if (intra_op_thread_pool_wrapper_ == nullptr) { diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h index d32a0a4..6546602 100644 --- a/tensorflow/compiler/xla/service/backend.h +++ b/tensorflow/compiler/xla/service/backend.h @@ -140,10 +140,6 @@ class Backend { // be equivalent to an executable compiled for the other. StatusOr devices_equivalent(int device_ordinal_a, int device_ordinal_b); - // For the host platform, returns the threadpool to use when scheduling - // parallel operators. For other platforms, returns NULL. - tensorflow::thread::ThreadPool* inter_op_thread_pool() const; - // For the host platform, returns the configured eigen threadpool device to be // used for scheduling work. For other platforms, returns NULL. const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const; @@ -178,9 +174,6 @@ class Backend { // The default memory allocator to use. std::unique_ptr memory_allocator_; - // For the CPU backend, a threadpool for scheduling parallel operators. - std::unique_ptr inter_op_thread_pool_; - // For the CPU backend, an Eigen threadpool device for use by Eigen code. std::unique_ptr intra_op_thread_pool_wrapper_; }; diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 81c43db..48da1a5 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -278,14 +278,14 @@ ServiceExecutableRunOptions HloRunner::GetServiceRunOptionsForDevice( run_options.set_device_ordinal(device); run_options.set_stream(stream); run_options.set_allocator(backend().memory_allocator()); - run_options.set_inter_op_thread_pool(backend().inter_op_thread_pool()); run_options.set_intra_op_thread_pool( backend().eigen_intra_op_thread_pool_device()); if (device_assignment != nullptr) { run_options.set_device_assignment(device_assignment); } - return ServiceExecutableRunOptions(run_options, backend().StreamBorrower(), - backend().inter_op_thread_pool()); + return ServiceExecutableRunOptions( + run_options, backend().StreamBorrower(), + /*xla_intra_op_thread_pool=*/backend().eigen_intra_op_thread_pool()); } Backend& HloRunner::backend() { diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 086bd61..6e0d07a 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -574,7 +574,6 @@ Service::ExecuteParallelAndRegisterResult( ExecutableRunOptions options; options.set_stream(streams.back().get()); options.set_allocator(backend->memory_allocator()); - options.set_inter_op_thread_pool(backend->inter_op_thread_pool()); options.set_intra_op_thread_pool( backend->eigen_intra_op_thread_pool_device()); options.set_device_assignment(&device_assignment); @@ -688,12 +687,12 @@ StatusOr Service::ExecuteAndRegisterResult( options.set_stream(stream.get()); options.set_device_ordinal(stream->parent()->device_ordinal()); options.set_allocator(backend->memory_allocator()); - options.set_inter_op_thread_pool(backend->inter_op_thread_pool()); options.set_intra_op_thread_pool( backend->eigen_intra_op_thread_pool_device()); options.set_device_assignment(&device_assignment); - run_options.emplace_back(options, backend->StreamBorrower(), - backend->inter_op_thread_pool()); + run_options.emplace_back( + options, backend->StreamBorrower(), + /*xla_intra_op_thread_pool=*/backend->eigen_intra_op_thread_pool()); } if (options_.number_of_replicas() == 1) { @@ -1240,7 +1239,6 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, ExecutableRunOptions options; options.set_stream(stream.get()); options.set_allocator(execute_backend_->memory_allocator()); - options.set_inter_op_thread_pool(execute_backend_->inter_op_thread_pool()); options.set_intra_op_thread_pool( execute_backend_->eigen_intra_op_thread_pool_device()); diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index ca8e4cd..e859b30 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -149,8 +149,6 @@ ExecutableBuildOptions LocalClientTestBase::DefaultExecutableBuildOptions() ExecutableRunOptions LocalClientTestBase::DefaultExecutableRunOptions() const { ExecutableRunOptions run_options; - run_options.set_inter_op_thread_pool( - local_client_->backend().inter_op_thread_pool()); run_options.set_intra_op_thread_pool(thread_pool_wrapper_->device.get()); run_options.set_allocator(GetOrCreateAllocator(local_client_->platform())); return run_options; -- 2.7.4