Minor bug fix in dnnlowp (#15841)
authorSummer Deng <summerdeng@fb.com>
Thu, 10 Jan 2019 01:11:53 +0000 (17:11 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Thu, 10 Jan 2019 01:18:30 +0000 (17:18 -0800)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15841

Fix the bugs in dnnlowp to support int8/int16 quantization for sparsenn.

Reviewed By: jspark1105

Differential Revision: D13600878

fbshipit-source-id: 27f06d7c54a663208320c8f211714220a9b49540

caffe2/quantization/server/dnnlowp.cc
caffe2/quantization/server/dnnlowp_op.h

index 90d4029..19c59a6 100644 (file)
@@ -223,7 +223,11 @@ TensorQuantizationParams QuantizationFactory::ChooseQuantizationParams(
       return ChooseQuantizationParams(min, max, precision, preserve_sparsity);
     }
 
-    Histogram hist(2048, min, max);
+    /** Ajust the granularity of histogram collection to
+     * the quantization precision. Use 8x more number of bins
+     * in the histogram should be sufficient for linear quantization.
+     */
+    Histogram hist(1 << (precision + 3), min, max);
     for (int i = 0; i < len; ++i) {
       hist.Add(values[i]);
     }
index 5690675..d941456 100644 (file)
@@ -135,8 +135,8 @@ class DNNLowPOp : public Operator<CPUContext> {
       actual = OutputTensorCPU_(0)->template data<float>();
     } else {
       actual_temp.resize(OutputTensorCPU_(0)->numel());
-      fbgemm::Dequantize<float>(
-          OutputTensorCPU_(0)->template data<float>(),
+      fbgemm::Dequantize<T>(
+          OutputTensorCPU_(0)->template data<T>(),
           actual_temp.data(),
           OutputTensorCPU_(0)->numel(),
           out_qparams_);