[quant] Add op support for linear_relu_dynamic_fp16 (#63824)

author Supriya Rao <supriyar@fb.com>

Fri, 27 Aug 2021 04:05:56 +0000 (21:05 -0700)

committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>

Fri, 27 Aug 2021 04:12:04 +0000 (21:12 -0700)
author Supriya Rao <supriyar@fb.com>
Fri, 27 Aug 2021 04:05:56 +0000 (21:05 -0700)
committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Fri, 27 Aug 2021 04:12:04 +0000 (21:12 -0700)
diff --git a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp

index 23c6158889db2c9cf3179199f3bbc03f9c2b1cfe..3331a0387111cc4eeaf457af7991a549a915a39a 100644 (file)
--- a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
@@ -451,8 +451,14 @@ class QLinearDynamicFp16 final {
      TORCH_CHECK(
          fbgemm::fbgemmSupportedCPU(), "Your CPU doesn't support FBGEMM.");
  
-    TORCH_INTERNAL_ASSERT(!ReluFused);
-    return packed_weight->apply_dynamic(std::move(input));
+    auto output = packed_weight->apply_dynamic(std::move(input));
+
+    // Call the relu operator here until fp16 linear dynamic in FBGEMM
+    // supports it natively.
+    if (ReluFused) {
+      output.relu_();
+    }
+    return output;
    }
  #else // USE_FBGEMM
    static at::Tensor run(
@@ -471,6 +477,7 @@ TORCH_LIBRARY_IMPL(quantized, CPU, m) {
    m.impl(TORCH_SELECTIVE_NAME("quantized::linear_dynamic"), TORCH_FN(QLinearDynamicInt8<false>::run));
    m.impl(TORCH_SELECTIVE_NAME("quantized::linear_relu_dynamic"), TORCH_FN(QLinearDynamicInt8<true>::run));
    m.impl(TORCH_SELECTIVE_NAME("quantized::linear_dynamic_fp16"), TORCH_FN(QLinearDynamicFp16<false>::run));
+  m.impl(TORCH_SELECTIVE_NAME("quantized::linear_relu_dynamic_fp16"), TORCH_FN(QLinearDynamicFp16<true>::run));
  }
  
  TORCH_LIBRARY_IMPL(_quantized, CPU, m) {
diff --git a/aten/src/ATen/native/quantized/library.cpp b/aten/src/ATen/native/quantized/library.cpp

index 8ead74f326ff24e27e1c76ec9becbb8ea222147c..3dcf75b1ccb326fa9963005343bbc6fe687e25d7 100644 (file)
--- a/aten/src/ATen/native/quantized/library.cpp
+++ b/aten/src/ATen/native/quantized/library.cpp
@@ -142,6 +142,7 @@ TORCH_LIBRARY(quantized, m) {
    m.def(TORCH_SELECTIVE_SCHEMA("quantized::linear_dynamic(Tensor X, __torch__.torch.classes.quantized.LinearPackedParamsBase W_prepack, bool reduce_range=False) -> Tensor Y"));
    m.def(TORCH_SELECTIVE_SCHEMA("quantized::linear_relu_dynamic(Tensor X, __torch__.torch.classes.quantized.LinearPackedParamsBase W_prepack, bool reduce_range=False) -> Tensor Y"));
    m.def(TORCH_SELECTIVE_SCHEMA("quantized::linear_dynamic_fp16(Tensor X, __torch__.torch.classes.quantized.LinearPackedParamsBase W_prepack) -> Tensor Y"));
+  m.def(TORCH_SELECTIVE_SCHEMA("quantized::linear_relu_dynamic_fp16(Tensor X, __torch__.torch.classes.quantized.LinearPackedParamsBase W_prepack) -> Tensor Y"));
    m.def(TORCH_SELECTIVE_SCHEMA("quantized::linear_prepack(Tensor W, Tensor? B=None) -> __torch__.torch.classes.quantized.LinearPackedParamsBase W_prepack"));
    m.def(TORCH_SELECTIVE_SCHEMA("quantized::linear_prepack_fp16(Tensor W, Tensor? B=None) -> __torch__.torch.classes.quantized.LinearPackedParamsBase W_prepack"));
    m.def(TORCH_SELECTIVE_SCHEMA("quantized::linear_prepack_legacy(Tensor W, Tensor? B=None) -> Tensor W_prepack"));
diff --git a/test/quantization/core/test_quantized_op.py b/test/quantization/core/test_quantized_op.py

index 86fe3509ab4fb5f50e180ab6aa513e96b38ec6d9..49b7c968476121463135143a94719c433df03a9e 100644 (file)
--- a/test/quantization/core/test_quantized_op.py
+++ b/test/quantization/core/test_quantized_op.py
@@ -2782,6 +2782,38 @@ class TestDynamicQuantizedLinear(TestCase):
          self.assertEqual(Y_fp32, Y_fp32_ref,
                           msg="torch.ops.quantized.fbgemm_linear_dynamic results are off")
  
+    @skipIfNoFBGEMM
+    def test_qlinear_dynamic_fp16(self):
+
+        options = itertools.product(
+            (2, 4),         # batch_size
+            (4, 5, 12),     # input_channels
+            (4, 7, 8),      # output_channels
+            (True, False),  # use_bias
+            (True, False),  # use_relu
+        )
+        for batch_size, input_channels, output_channels, use_bias, use_relu in options:
+            qlinear_prepack = torch.ops.quantized.linear_prepack_fp16
+            if use_relu:
+                qlinear_dynamic = torch.ops.quantized.linear_relu_dynamic_fp16
+            else:
+                qlinear_dynamic = torch.ops.quantized.linear_dynamic_fp16
+
+            x = torch.randn(batch_size, input_channels)
+            w = torch.randn(output_channels, input_channels)
+            bias = torch.randn(output_channels) if use_bias else None
+
+            w_packed = qlinear_prepack(w, bias)
+            out = qlinear_dynamic(x, w_packed)
+
+            # qlinear_dynamic_fp16 uses FP32 activation tensors and FP16 weight tensors
+            # output is FP32
+            w_fp16 = w.to(torch.float16).to(torch.float32)
+            ref = F.linear(x, w_fp16, bias)
+            if use_relu:
+                ref.relu_()
+
+            self.assertEqual(out, ref)
  
  class TestDynamicQuantizedRNNOp(TestCase):
      """Tests the correctness of the dynamic quantized lstm/gru."""
author	Supriya Rao <supriyar@fb.com>
	Fri, 27 Aug 2021 04:05:56 +0000 (21:05 -0700)
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
	Fri, 27 Aug 2021 04:12:04 +0000 (21:12 -0700)
aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp		patch \| blob \| history
aten/src/ATen/native/quantized/library.cpp		patch \| blob \| history
test/quantization/core/test_quantized_op.py		patch \| blob \| history