use acc16 only when n>128 and k>128 in Skylake (#18672)

author Jongsoo Park <jongsoo@fb.com>

Mon, 1 Apr 2019 15:49:37 +0000 (08:49 -0700)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Mon, 1 Apr 2019 15:52:28 +0000 (08:52 -0700)
author Jongsoo Park <jongsoo@fb.com>
Mon, 1 Apr 2019 15:49:37 +0000 (08:49 -0700)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Mon, 1 Apr 2019 15:52:28 +0000 (08:52 -0700)
diff --git a/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc b/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc

index b339e52..f356b42 100644 (file)
--- a/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc
+++ b/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc
@@ -106,6 +106,20 @@ bool ConvDNNLowPAcc16Op<ReluFused>::GetQuantizationParameters_() {
      Tensor* Y = OutputTensorCPU_(0, sizes, at::dtype<uint8_t>());
      const int output_image_size = this->GetDimsSize(*Y);
  
+    // In Skylake, acc16 is not faster when N or K is smaller than 128
+    constexpr int SKYLAKE_ACC16_N_THRESHOLD_MIN = 128,
+                  SKYLAKE_ACC16_K_THRESHOLD_MIN = 128;
+    int acc16_n_threshold = FLAGS_caffe2_dnnlowp_acc16_n_threshold;
+    if (caffe2::GetCpuId().avx512f() &&
+        acc16_n_threshold < SKYLAKE_ACC16_N_THRESHOLD_MIN) {
+      acc16_n_threshold = SKYLAKE_ACC16_N_THRESHOLD_MIN;
+    }
+    int acc16_k_threshold = FLAGS_caffe2_dnnlowp_acc16_k_threshold;
+    if (caffe2::GetCpuId().avx512f() &&
+        acc16_k_threshold < SKYLAKE_ACC16_K_THRESHOLD_MIN) {
+      acc16_k_threshold = SKYLAKE_ACC16_K_THRESHOLD_MIN;
+    }
+
      if (N * output_image_size < FLAGS_caffe2_dnnlowp_acc16_m_threshold) {
        LOG(INFO) << "M " << N * output_image_size
                  << " of Conv layer with weight blob "
@@ -115,20 +129,18 @@ bool ConvDNNLowPAcc16Op<ReluFused>::GetQuantizationParameters_() {
        fallback_to_32_bit_accumulation_ = true;
        return true;
      }
-    if (num_out_channels / group_ < FLAGS_caffe2_dnnlowp_acc16_n_threshold) {
+    if (num_out_channels / group_ < acc16_n_threshold) {
        LOG(INFO) << "N " << num_out_channels / group_
                  << " of Conv layer with weight blob "
                  << this->debug_def().input(1) << " is smaller than threshold "
-                << FLAGS_caffe2_dnnlowp_acc16_n_threshold
-                << " . Falling back to acc32";
+                << acc16_n_threshold << " . Falling back to acc32";
        fallback_to_32_bit_accumulation_ = true;
        return true;
      }
-    if (kernel_dim < FLAGS_caffe2_dnnlowp_acc16_k_threshold) {
+    if (kernel_dim < acc16_k_threshold) {
        LOG(INFO) << "K " << kernel_dim << " of Conv layer with weight blob "
                  << this->debug_def().input(1) << " is smaller than threshold "
-                << FLAGS_caffe2_dnnlowp_acc16_k_threshold
-                << " . Falling back to acc32";
+                << acc16_k_threshold << " . Falling back to acc32";
        fallback_to_32_bit_accumulation_ = true;
        return true;
      }
author	Jongsoo Park <jongsoo@fb.com>
	Mon, 1 Apr 2019 15:49:37 +0000 (08:49 -0700)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Mon, 1 Apr 2019 15:52:28 +0000 (08:52 -0700)