From cd8dd49fba28bb5542f93fa865902af97ce05985 Mon Sep 17 00:00:00 2001
From: Jianyu Huang <jianyuhuang@fb.com>
Date: Tue, 18 Dec 2018 23:17:11 -0800
Subject: [PATCH] race condition fix of using mutable_data inside OPENMP region
 for batched matmul (#15371)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15371

Similar to D13387692:

Never call mutable_data from an OpenMP region!!!

Reviewed By: jspark1105

Differential Revision: D13511259

fbshipit-source-id: 100812d2a547c0a1d5018749d5fdc88162375673
---
 caffe2/quantization/server/batch_matmul_dnnlowp_op.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/caffe2/quantization/server/batch_matmul_dnnlowp_op.cc b/caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
index ad40049..6aa932a 100644
--- a/caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
+++ b/caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
@@ -419,6 +419,8 @@ bool BatchMatMulDNNLowPOp<T>::RunOnDevice() {
 #endif
 
     if (!dequantize_output_) {
+      auto Y_data = Y->template mutable_data<T>();
+
       auto row_offset_len_per_thread =
           PackAWithRowOffset<uint8_t>::rowOffsetBufferSize();
       row_offsets_.resize(
@@ -463,8 +465,7 @@ bool BatchMatMulDNNLowPOp<T>::RunOnDevice() {
           fbgemmPacked(
               packA,
               *Bq_packed_[B_batch_idx],
-              reinterpret_cast<uint8_t*>(Y->template mutable_data<T>()) +
-                  p * Y_stride + i * M * N,
+              reinterpret_cast<uint8_t*>(Y_data) + p * Y_stride + i * M * N,
               Y_int32_.data() + p * Y_stride + i * M * N,
               N,
               outputProcObj,
-- 
2.7.4