race condition fix of using mutable_data inside OPENMP region for batched matmul...
authorJianyu Huang <jianyuhuang@fb.com>
Wed, 19 Dec 2018 07:17:11 +0000 (23:17 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Wed, 19 Dec 2018 07:22:56 +0000 (23:22 -0800)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15371

Similar to D13387692:

Never call mutable_data from an OpenMP region!!!

Reviewed By: jspark1105

Differential Revision: D13511259

fbshipit-source-id: 100812d2a547c0a1d5018749d5fdc88162375673

caffe2/quantization/server/batch_matmul_dnnlowp_op.cc

index ad40049..6aa932a 100644 (file)
@@ -419,6 +419,8 @@ bool BatchMatMulDNNLowPOp<T>::RunOnDevice() {
 #endif
 
     if (!dequantize_output_) {
+      auto Y_data = Y->template mutable_data<T>();
+
       auto row_offset_len_per_thread =
           PackAWithRowOffset<uint8_t>::rowOffsetBufferSize();
       row_offsets_.resize(
@@ -463,8 +465,7 @@ bool BatchMatMulDNNLowPOp<T>::RunOnDevice() {
           fbgemmPacked(
               packA,
               *Bq_packed_[B_batch_idx],
-              reinterpret_cast<uint8_t*>(Y->template mutable_data<T>()) +
-                  p * Y_stride + i * M * N,
+              reinterpret_cast<uint8_t*>(Y_data) + p * Y_stride + i * M * N,
               Y_int32_.data() + p * Y_stride + i * M * N,
               N,
               outputProcObj,