From 5af9aaa5bbfbad51f420acde0f992279404032e5 Mon Sep 17 00:00:00 2001
From: Summer Deng <summerdeng@fb.com>
Date: Wed, 9 Jan 2019 17:11:53 -0800
Subject: [PATCH] Minor bug fix in dnnlowp (#15841)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15841

Fix the bugs in dnnlowp to support int8/int16 quantization for sparsenn.

Reviewed By: jspark1105

Differential Revision: D13600878

fbshipit-source-id: 27f06d7c54a663208320c8f211714220a9b49540
---
 caffe2/quantization/server/dnnlowp.cc   | 6 +++++-
 caffe2/quantization/server/dnnlowp_op.h | 4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/caffe2/quantization/server/dnnlowp.cc b/caffe2/quantization/server/dnnlowp.cc
index 90d4029..19c59a6 100644
--- a/caffe2/quantization/server/dnnlowp.cc
+++ b/caffe2/quantization/server/dnnlowp.cc
@@ -223,7 +223,11 @@ TensorQuantizationParams QuantizationFactory::ChooseQuantizationParams(
       return ChooseQuantizationParams(min, max, precision, preserve_sparsity);
     }
 
-    Histogram hist(2048, min, max);
+    /** Ajust the granularity of histogram collection to
+     * the quantization precision. Use 8x more number of bins
+     * in the histogram should be sufficient for linear quantization.
+     */
+    Histogram hist(1 << (precision + 3), min, max);
     for (int i = 0; i < len; ++i) {
       hist.Add(values[i]);
     }
diff --git a/caffe2/quantization/server/dnnlowp_op.h b/caffe2/quantization/server/dnnlowp_op.h
index 5690675..d941456 100644
--- a/caffe2/quantization/server/dnnlowp_op.h
+++ b/caffe2/quantization/server/dnnlowp_op.h
@@ -135,8 +135,8 @@ class DNNLowPOp : public Operator<CPUContext> {
       actual = OutputTensorCPU_(0)->template data<float>();
     } else {
       actual_temp.resize(OutputTensorCPU_(0)->numel());
-      fbgemm::Dequantize<float>(
-          OutputTensorCPU_(0)->template data<float>(),
+      fbgemm::Dequantize<T>(
+          OutputTensorCPU_(0)->template data<T>(),
           actual_temp.data(),
           OutputTensorCPU_(0)->numel(),
           out_qparams_);
-- 
2.7.4