Add ultra low precision options (#14133)

author Summer Deng <summerdeng@fb.com>

Sun, 18 Nov 2018 20:49:39 +0000 (12:49 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Sun, 18 Nov 2018 20:51:34 +0000 (12:51 -0800)
author Summer Deng <summerdeng@fb.com>
Sun, 18 Nov 2018 20:49:39 +0000 (12:49 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Sun, 18 Nov 2018 20:51:34 +0000 (12:51 -0800)
diff --git a/caffe2/quantization/server/conv_dnnlowp_op.cc b/caffe2/quantization/server/conv_dnnlowp_op.cc

index 8550758..6249429 100644 (file)
--- a/caffe2/quantization/server/conv_dnnlowp_op.cc
+++ b/caffe2/quantization/server/conv_dnnlowp_op.cc
@@ -30,6 +30,8 @@ C10_DEFINE_bool(
      "Dump quantized input and weight tensors used in Conv and FC operators "
      "during the first iteration");
  
+DECLARE_bool(caffe2_dnnlowp_force_slow_path);
+
  namespace caffe2 {
  
  using namespace std;
@@ -273,7 +275,8 @@ void ConvDNNLowPOp<T, ReluFused>::QuantizeWeight_() {
  
    bool packW = ConvPoolOpBase<CPUContext>::order_ == StorageOrder::NHWC &&
        OperatorBase::debug_def().engine() != "DNNLOWP_ACC16" &&
-      is_same<T, uint8_t>::value && GetCpuId().avx2();
+      is_same<T, uint8_t>::value && GetCpuId().avx2() &&
+      !FLAGS_caffe2_dnnlowp_force_slow_path;
  
    bool depthwise_3x3_fast_path = false, depthwise_3x3x3_fast_path = false;
    if (TakeDepthWise3x3FastPath_()) {
@@ -371,6 +374,8 @@ void ConvDNNLowPOp<T, ReluFused>::QuantizeWeight_() {
            OperatorBase::debug_def().engine() == "DNNLOWP_ACC16" ||
            depthwise_3x3_fast_path) {
          reason = "";
+      } else if (FLAGS_caffe2_dnnlowp_force_slow_path) {
+        reason = "slow path enforced";
        } else {
          assert(false);
        }
diff --git a/caffe2/quantization/server/dnnlowp.cc b/caffe2/quantization/server/dnnlowp.cc

index 37529db..c84efad 100644 (file)
--- a/caffe2/quantization/server/dnnlowp.cc
+++ b/caffe2/quantization/server/dnnlowp.cc
@@ -50,6 +50,10 @@ C10_DEFINE_int32(
    dnnlowp_copy_to_32bit_frequency, 32,
    "When outlier-aware quantization is used, this option specifies how often "
    "we spill 16-bit accumulated numbers to 32-bit during the first pass");
+DEFINE_bool(
+    caffe2_dnnlowp_force_slow_path,
+    false,
+    "When true, use slow path in quantization");
  
  namespace dnnlowp {
  
@@ -327,7 +331,8 @@ void Quantize(
    bool avx2_support = cpuid.avx2();
    bool fma_support = cpuid.fma();
    if (avx2_support && fma_support && qparams.precision == 8 &&
-      std::is_same<T, uint8_t>::value) {
+      std::is_same<T, uint8_t>::value &&
+      !FLAGS_caffe2_dnnlowp_force_slow_path) {
      // fast path
      constexpr int VLEN = 8;
      std::size_t i = 0;
author	Summer Deng <summerdeng@fb.com>
	Sun, 18 Nov 2018 20:49:39 +0000 (12:49 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Sun, 18 Nov 2018 20:51:34 +0000 (12:51 -0800)
caffe2/quantization/server/conv_dnnlowp_op.cc		patch \| blob \| history
caffe2/quantization/server/dnnlowp.cc		patch \| blob \| history