#include "tensorflow/core/common_runtime/process_util.h"
+#ifdef INTEL_MKL
+#include <omp.h>
+#endif
#include <string.h>
#include "tensorflow/core/lib/core/threadpool.h"
}
int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
- const int32 t = options.config.inter_op_parallelism_threads();
- if (t != 0) return t;
+ const int32 inter_op = options.config.inter_op_parallelism_threads();
+ if (inter_op != 0) return inter_op;
+#ifdef INTEL_MKL
+ // MKL library executes ops in parallel using OMP threads
+ // Set inter_op conservatively to avoid thread oversubscription that could
+ // lead to severe perf degradations and OMP resource exhaustion
+ const int mkl_intra_op = omp_get_max_threads();
+ CHECK_GE(mkl_intra_op, 1);
+ const int32 mkl_inter_op = std::max(
+ (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
+ VLOG(0) << "Creating new thread pool with default inter op setting: "
+ << mkl_inter_op
+ << ". Tune using inter_op_parallelism_threads for best performance.";
+ return mkl_inter_op;
+#else
// Default to using the number of cores available in the process.
return port::NumSchedulableCPUs();
+#endif
}
thread::ThreadPool* NewThreadPoolFromSessionOptions(