From 48fe839d567e222916cea81d3c7be25cee964c1f Mon Sep 17 00:00:00 2001 From: James Reed Date: Fri, 8 Feb 2019 13:45:43 -0800 Subject: [PATCH] delete critical section in TH*Tensor_addmm (#16889) Summary: This was serializing all calls to `addmm` (and any op that used it, in my case `bmm`) in the entire process, and led to downright atrocious performance in the TorchScript threaded runtime. Removing this gives a 2x throughput boost for high-load machine translation inference. The original justification for this is dubious: there are other `gemm` callsites in the codebase that are not protected by critical sections. And in caffe2 land we never had any issues with nonreentrant BLAS libraries Pull Request resolved: https://github.com/pytorch/pytorch/pull/16889 Differential Revision: D14008928 Pulled By: jamesr66a fbshipit-source-id: 498e2133bd6564dba539a2d9751f4e61afbce608 --- aten/src/TH/generic/THTensorMath.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/aten/src/TH/generic/THTensorMath.cpp b/aten/src/TH/generic/THTensorMath.cpp index 29904c9..16ff7d2 100644 --- a/aten/src/TH/generic/THTensorMath.cpp +++ b/aten/src/TH/generic/THTensorMath.cpp @@ -1041,7 +1041,6 @@ void THTensor_(addmm)(THTensor *r_, scalar_t beta, THTensor *t, scalar_t alpha, int64_t ldm1_ = (transpose_m1 == 'n' ? m1_->stride((transpose_r == 'n' ? 1 : 0)) : m1_->stride((transpose_r == 'n' ? 0 : 1))); int64_t ldm2_ = (transpose_m2 == 'n' ? m2_->stride((transpose_r == 'n' ? 1 : 0)) : m2_->stride((transpose_r == 'n' ? 0 : 1))); -#pragma omp critical(blasgemm) /* do the operation */ THBlas_(gemm)(transpose_m1, transpose_m2, -- 2.7.4