clean up D13579188 (#15759)

author Jongsoo Park <jongsoo@fb.com>

Tue, 8 Jan 2019 02:45:32 +0000 (18:45 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Tue, 8 Jan 2019 02:48:25 +0000 (18:48 -0800)
author Jongsoo Park <jongsoo@fb.com>
Tue, 8 Jan 2019 02:45:32 +0000 (18:45 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Tue, 8 Jan 2019 02:48:25 +0000 (18:48 -0800)
diff --git a/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc b/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc

index b4bf136..45b5918 100644 (file)
--- a/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc
+++ b/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc
@@ -21,19 +21,19 @@ C10_DECLARE_bool(caffe2_dnnlowp_shared_int32_buffer);
  // Thresholds to fallback to 32-bit accumulation when 16-bit accumulation
  // doesn't provide performance benefits.
  C10_DEFINE_double(
-    caffe2_dnnlowp_fallback_to_32_bit_accumulation_density_threshold,
+    caffe2_dnnlowp_acc16_density_threshold,
      0.05,
      "If density of outlier is higher than this, fallback to 32-bit accumulation");
  C10_DEFINE_int32(
-    caffe2_dnnlowp_fallback_to_32_bit_accumulation_m_threshold,
+    caffe2_dnnlowp_acc16_m_threshold,
      0,
      "If m is smaller than this, fallback to 32-bit accumulation");
  C10_DEFINE_int32(
-    caffe2_dnnlowp_fallback_to_32_bit_accumulation_n_threshold,
+    caffe2_dnnlowp_acc16_n_threshold,
      0,
      "If n is smaller than this, fallback to 32-bit accumulation");
  C10_DEFINE_int32(
-    caffe2_dnnlowp_fallback_to_32_bit_accumulation_k_threshold,
+    caffe2_dnnlowp_acc16_k_threshold,
      0,
      "If k is smaller than this, fallback to 32-bit accumulation");
  
@@ -51,7 +51,13 @@ ConvDNNLowPAcc16Op<ReluFused>::ConvDNNLowPAcc16Op(
            FLAGS_caffe2_dnnlowp_nbits_in_non_outlier)),
        copy_to_32bit_frequency_(OperatorBase::GetSingleArgument<int>(
            "copy_to_32bit_frequency",
-          FLAGS_caffe2_dnnlowp_copy_to_32bit_frequency)) {}
+          FLAGS_caffe2_dnnlowp_copy_to_32bit_frequency)) {
+  if (nbits_in_non_outlier_ == 0) {
+    LOG(INFO) << "nbits_in_non_outlier == 0 means everything is outlier so we "
+                 "fallback to acc32";
+    fallback_to_32_bit_accumulation_ = true;
+  }
+}
  
  template <bool ReluFused>
  bool ConvDNNLowPAcc16Op<ReluFused>::GetQuantizationParameters_() {
@@ -66,7 +72,7 @@ bool ConvDNNLowPAcc16Op<ReluFused>::GetQuantizationParameters_() {
    if (!Wq_acc16_packed_ &&
        this->template InputIsType<Int8ConvDNNLowPPackedWeightBlob>(FILTER)) {
      CAFFE_ENFORCE_EQ(
-        ConvPoolOpBase<CPUContext>::order_,
+        this->order_,
          StorageOrder::NHWC,
          "Pre-packed weight only works with NHWC layout");
      // If the input is already packed
@@ -100,44 +106,34 @@ bool ConvDNNLowPAcc16Op<ReluFused>::GetQuantizationParameters_() {
      this->SetOutputSize(X, Y, filter.dim32(0));
      const int output_image_size = this->GetDimsSize(*Y);
  
-    if (N * output_image_size <
-        FLAGS_caffe2_dnnlowp_fallback_to_32_bit_accumulation_m_threshold) {
-      LOG(INFO)
-          << "M " << N * output_image_size << " is smaller than threshold "
-          << FLAGS_caffe2_dnnlowp_fallback_to_32_bit_accumulation_m_threshold
-          << " . Falling back to acc32";
+    if (N * output_image_size < FLAGS_caffe2_dnnlowp_acc16_m_threshold) {
+      LOG(INFO) << "M " << N * output_image_size
+                << " of Conv layer with weight blob "
+                << this->debug_def().input(1) << " is smaller than threshold "
+                << FLAGS_caffe2_dnnlowp_acc16_m_threshold
+                << " . Falling back to acc32";
        fallback_to_32_bit_accumulation_ = true;
        return true;
      }
-    if (num_out_channels / group_ <
-        FLAGS_caffe2_dnnlowp_fallback_to_32_bit_accumulation_n_threshold) {
-      LOG(INFO)
-          << "N " << num_out_channels / group_ << " is smaller than threshold "
-          << FLAGS_caffe2_dnnlowp_fallback_to_32_bit_accumulation_n_threshold
-          << " . Falling back to acc32";
+    if (num_out_channels / group_ < FLAGS_caffe2_dnnlowp_acc16_n_threshold) {
+      LOG(INFO) << "N " << num_out_channels / group_
+                << " of Conv layer with weight blob "
+                << this->debug_def().input(1) << " is smaller than threshold "
+                << FLAGS_caffe2_dnnlowp_acc16_n_threshold
+                << " . Falling back to acc32";
        fallback_to_32_bit_accumulation_ = true;
        return true;
      }
-    if (kernel_dim <
-        FLAGS_caffe2_dnnlowp_fallback_to_32_bit_accumulation_k_threshold) {
-      LOG(INFO)
-          << "K " << kernel_dim << " is smaller than threshold "
-          << FLAGS_caffe2_dnnlowp_fallback_to_32_bit_accumulation_k_threshold
-          << " . Falling back to acc32";
+    if (kernel_dim < FLAGS_caffe2_dnnlowp_acc16_k_threshold) {
+      LOG(INFO) << "K " << kernel_dim << " of Conv layer with weight blob "
+                << this->debug_def().input(1) << " is smaller than threshold "
+                << FLAGS_caffe2_dnnlowp_acc16_k_threshold
+                << " . Falling back to acc32";
        fallback_to_32_bit_accumulation_ = true;
        return true;
      }
    }
  
-  if (nbits_in_non_outlier_ == 0) {
-    // nbits_in_non_outlier_ == 0 means everything is outlier and we can just
-    // use 32-bit accumulation.
-    LOG(INFO) << "nbits_in_non_outlier == 0 means everything is outlier so we "
-                 "fallback to acc32";
-    fallback_to_32_bit_accumulation_ = true;
-    return true;
-  }
-
    // Separate out outliers
    if (!Wq_outlier_ && this->order_ == StorageOrder::NHWC &&
        nbits_in_non_outlier_ < 8) {
@@ -152,30 +148,28 @@ bool ConvDNNLowPAcc16Op<ReluFused>::GetQuantizationParameters_() {
      int outlier_cnt = Wq_outlier_->ColPtr()[num_out_channels];
  
      LOG(INFO) << "Proportion of outlier for Conv layer with weight blob "
-              << OperatorBase::debug_def().input(1) << " is "
+              << this->debug_def().input(1) << " is "
                << static_cast<float>(outlier_cnt) / W_quantized_.size();
      LOG(INFO) << "nbits_in_non_outlier " << nbits_in_non_outlier_
                << " copy_to_32bit_frequency " << copy_to_32bit_frequency_;
  
      if (static_cast<float>(outlier_cnt) / W_quantized_.size() >
-        FLAGS_caffe2_dnnlowp_fallback_to_32_bit_accumulation_density_threshold) {
-      LOG(INFO)
-          << "Density of outliers is higher than threshold "
-          << FLAGS_caffe2_dnnlowp_fallback_to_32_bit_accumulation_density_threshold
-          << " . Falling back to acc32";
+        FLAGS_caffe2_dnnlowp_acc16_density_threshold) {
+      LOG(INFO) << "Density of outliers is higher than threshold "
+                << FLAGS_caffe2_dnnlowp_acc16_density_threshold
+                << " . Falling back to acc32";
        fallback_to_32_bit_accumulation_ = true;
        Wq_outlier_.reset();
        return true;
      }
    }
  
-  bool packW = ConvPoolOpBase<CPUContext>::order_ == StorageOrder::NHWC &&
-      GetCpuId().avx2();
+  bool packW = this->order_ == StorageOrder::NHWC && GetCpuId().avx2();
  
    if (first_invocation_) {
      if (!packW) {
        string reason;
-      if (ConvPoolOpBase<CPUContext>::order_ != StorageOrder::NHWC) {
+      if (this->order_ != StorageOrder::NHWC) {
          reason = "fbgemm only supports NHWC layout";
        } else if (!GetCpuId().avx2()) {
          reason = "fbgemm only supports AVX2+";
@@ -193,8 +187,7 @@ bool ConvDNNLowPAcc16Op<ReluFused>::GetQuantizationParameters_() {
          }
        }
      }
-    if (nbits_in_non_outlier_ < 8 &&
-        ConvPoolOpBase<CPUContext>::order_ != StorageOrder::NHWC) {
+    if (nbits_in_non_outlier_ < 8 && this->order_ != StorageOrder::NHWC) {
        static int log_occurences = 0;
        if (log_occurences < 32) {
          ++log_occurences;
diff --git a/caffe2/quantization/server/fbgemm_pack_op.cc b/caffe2/quantization/server/fbgemm_pack_op.cc

index abb1ba4..7581e8c 100644 (file)
--- a/caffe2/quantization/server/fbgemm_pack_op.cc
+++ b/caffe2/quantization/server/fbgemm_pack_op.cc
@@ -261,7 +261,7 @@ bool FullyConnectedDNNLowPPackWeightOp::RunOnDevice() {
            ExtractOutlierMatrix(1, K, N, nbits_in_non_outlier_, W_quantized));
        int outlier_cnt = Y->W_outlier->ColPtr()[N];
  
-      LOG(INFO) << "Proportion of outlier for Conv layer with weight blob "
+      LOG(INFO) << "Proportion of outlier for FC layer with weight blob "
                  << this->debug_def().input(0) << " is "
                  << static_cast<float>(outlier_cnt) / W_quantized.size();
        LOG(INFO) << "nbits_in_non_outlier " << nbits_in_non_outlier_;
@@ -397,7 +397,9 @@ bool ConvDNNLowPPackWeightOp::RunOnDevice() {
    ComputeColumnOffsets(
        kernel_dim, M, W_quantized.data(), Y->qparams, *Y->column_offsets);
  
-  if (this->debug_def().engine() == "DNNLOWP_ACC16") {
+  // When nbits_in_non_outlier == 0, we fall back to acc32
+  if (this->debug_def().engine() == "DNNLOWP_ACC16" &&
+      nbits_in_non_outlier_ > 0) {
      if (nbits_in_non_outlier_ < 8) {
        Y->W_outlier.reset(ExtractOutlierMatrix(
            group_, kernel_dim, M, nbits_in_non_outlier_, W_quantized));
author	Jongsoo Park <jongsoo@fb.com>
	Tue, 8 Jan 2019 02:45:32 +0000 (18:45 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Tue, 8 Jan 2019 02:48:25 +0000 (18:48 -0800)
caffe2/quantization/server/conv_dnnlowp_acc16_op.cc		patch \| blob \| history
caffe2/quantization/server/fbgemm_pack_op.cc		patch \| blob \| history