From c3203efe808c5d042e1acd20d19467bcff597487 Mon Sep 17 00:00:00 2001 From: John Shen Date: Thu, 9 Sep 2021 09:30:32 -0700 Subject: [PATCH] [pytorch] Make qlinear weight packing thread safe (#63804) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/63804 Adding a lock around weight packing section of qlinear + qlinear_dynamic Test Plan: automated tests Reviewed By: kimishpatel Differential Revision: D30340957 fbshipit-source-id: 1c9faf796c4ffbc74345396188a6f1154a76bea6 --- aten/src/ATen/native/quantized/cpu/qlinear.cpp | 2 ++ aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp | 3 +++ aten/src/ATen/native/quantized/cpu/qnnpack_utils.h | 1 + 3 files changed, 6 insertions(+) diff --git a/aten/src/ATen/native/quantized/cpu/qlinear.cpp b/aten/src/ATen/native/quantized/cpu/qlinear.cpp index 9f3bb4b..9e6ddc0 100644 --- a/aten/src/ATen/native/quantized/cpu/qlinear.cpp +++ b/aten/src/ATen/native/quantized/cpu/qlinear.cpp @@ -280,6 +280,8 @@ at::Tensor PackedLinearWeightsQnnp::apply_impl( size_t cols_w = input_contig.size(input_contig.dim() - 1); auto input_scale = input_contig.q_scale(); + // QNNPack is not thread safe + std::lock_guard lock(qnnp_mutex_); if (!this->input_scale.has_value() || this->input_scale.value() != input_scale) { // Get the original weight and adjust it to uint8 from int8 diff --git a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp index 3331a03..09f4228 100644 --- a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp +++ b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp @@ -266,6 +266,9 @@ at::Tensor PackedLinearWeightsQnnp::apply_dynamic_impl(at::Tensor input) { /*qmin=*/0, /*qmax=*/255); float* weight_scales_data = w_scales.data_ptr(); + + // QNNPack is not thread safe + std::lock_guard lock(qnnp_mutex_); if (!input_scale.has_value() || input_scale.value() != q_params.scale) { generate_requantization_scales( // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack_utils.h b/aten/src/ATen/native/quantized/cpu/qnnpack_utils.h index 91ede92..fa42765 100644 --- a/aten/src/ATen/native/quantized/cpu/qnnpack_utils.h +++ b/aten/src/ATen/native/quantized/cpu/qnnpack_utils.h @@ -75,6 +75,7 @@ struct PackedLinearWeightsQnnp : public LinearPackedParamsBase { c10::optional bias); private: + std::mutex qnnp_mutex_; template at::Tensor apply_impl( at::Tensor input, -- 2.7.4