From 264e16bffde7cf3a9491bc07fabe30c6d61d0490 Mon Sep 17 00:00:00 2001
From: Jianyu Huang <jianyuhuang@fb.com>
Date: Fri, 11 Jan 2019 19:21:47 -0800
Subject: [PATCH] Make it consistent for OperatorBase usage (#15908)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15908

"OperatorBase::" is changed to "this->template ".

For example,

  # This no longer works
  OperatorBase::GetSingleArgument<>()
  # Should change to:
  this->template GetSingleArgument<>()

https://fb.workplace.com/groups/101100140348621/permalink/576804082778222/

Follow up of D13574832.

Sample Diff:
D9319742, D10045844.

Reviewed By: jspark1105

Differential Revision: D13613574

fbshipit-source-id: 2cb4094557b4af78d41e289816cad3e1194fb82c
---
 .../quantization/server/batch_matmul_dnnlowp_op.cc | 19 ++++++++---------
 caffe2/quantization/server/concat_dnnlowp_op.cc    |  8 ++++----
 .../quantization/server/conv_dnnlowp_acc16_op.cc   | 11 +++++-----
 caffe2/quantization/server/conv_dnnlowp_op.cc      | 23 ++++++++++-----------
 caffe2/quantization/server/conv_relu_op.cc         | 12 +++++------
 .../quantization/server/dequantize_dnnlowp_op.cc   |  2 +-
 .../quantization/server/elementwise_dnnlowp_op.h   |  2 +-
 .../server/elementwise_linear_dnnlowp_op.cc        |  2 +-
 .../server/fully_connected_dnnlowp_acc16_op.cc     |  6 +++---
 .../server/fully_connected_dnnlowp_op.cc           | 24 +++++++++-------------
 .../server/fully_connected_fake_lowp_op.h          | 12 +++++------
 .../server/fully_connected_rowwise_dnnlowp_op.cc   | 11 +++++-----
 .../quantization/server/group_norm_dnnlowp_op.cc   |  8 ++++----
 caffe2/quantization/server/lstm_unit_dnnlowp_op.cc |  4 ++--
 caffe2/quantization/server/relu_dnnlowp_op.cc      |  2 +-
 caffe2/quantization/server/relu_dnnlowp_op.h       |  1 +
 caffe2/quantization/server/utility_dnnlowp_ops.h   |  2 +-
 17 files changed, 71 insertions(+), 78 deletions(-)
diff --git a/caffe2/quantization/server/batch_matmul_dnnlowp_op.cc b/caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
index 6aa932a..2df5076 100644
--- a/caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
+++ b/caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
@@ -35,11 +35,11 @@ BatchMatMulDNNLowPOp<T>::BatchMatMulDNNLowPOp(
     const OperatorDef& operator_def,
     Workspace* ws)
     : BaseType(operator_def, ws),
-      trans_a_(OperatorBase::GetSingleArgument<int>("trans_a", 0)),
-      trans_b_(OperatorBase::GetSingleArgument<int>("trans_b", 0)),
-      broadcast_(OperatorBase::GetSingleArgument<int>("broadcast", 0)),
+      trans_a_(this->template GetSingleArgument<int>("trans_a", 0)),
+      trans_b_(this->template GetSingleArgument<int>("trans_b", 0)),
+      broadcast_(this->template GetSingleArgument<int>("broadcast", 0)),
       is_B_constant_(
-          OperatorBase::GetSingleArgument<bool>("constant_B", false)) {}
+          this->template GetSingleArgument<bool>("constant_B", false)) {}
 
 template <typename T>
 bool BatchMatMulDNNLowPOp<T>::RunOnDevice() {
@@ -280,7 +280,7 @@ bool BatchMatMulDNNLowPOp<T>::RunOnDevice() {
   int num_batches_B = B.numel() / (K * N);
   if (!first_invocation_ && !Bq_packed_.empty() &&
       num_batches_B * N != column_offsets_.size()) {
-    LOG(INFO) << "Operator with output " << OperatorBase::debug_def().output(0)
+    LOG(INFO) << "Operator with output " << this->debug_def().output(0)
               << " does not have constant B";
     is_B_constant_ = false;
     Bq_packed_.clear();
@@ -295,12 +295,12 @@ bool BatchMatMulDNNLowPOp<T>::RunOnDevice() {
       vector<int8_t> B_quantized_temp(K * N);
       column_offsets_.resize(num_batches_B * N);
       for (int i = 0; i < num_batches_B; ++i) {
-        if (OperatorBase::InputIsType<int8::Int8TensorCPU>(1)) {
+        if (this->template InputIsType<int8::Int8TensorCPU>(1)) {
           B_qparams_.push_back(TensorQuantizationParams());
           B_qparams_[i].scale =
-              OperatorBase::Input<int8::Int8TensorCPU>(1).scale;
+              this->template Input<int8::Int8TensorCPU>(1).scale;
           B_qparams_[i].zero_point =
-              OperatorBase::Input<int8::Int8TensorCPU>(1).zero_point +
+              this->template Input<int8::Int8TensorCPU>(1).zero_point +
               signed_min;
 
           const T* B_data = B.template data<T>() + i * B_quantized_temp.size();
@@ -381,8 +381,7 @@ bool BatchMatMulDNNLowPOp<T>::RunOnDevice() {
       } else {
         assert(false);
       }
-      LOG(WARNING) << "BatchMatMul with output "
-                   << OperatorBase::debug_def().output(0)
+      LOG(WARNING) << "BatchMatMul with output " << this->debug_def().output(0)
                    << " falls back to slow path because " << reason;
     }
     B_qparams_.resize(1);
diff --git a/caffe2/quantization/server/concat_dnnlowp_op.cc b/caffe2/quantization/server/concat_dnnlowp_op.cc
index fdd96a7..b58686f 100644
--- a/caffe2/quantization/server/concat_dnnlowp_op.cc
+++ b/caffe2/quantization/server/concat_dnnlowp_op.cc
@@ -16,11 +16,11 @@ ConcatDNNLowPOp<T>::ConcatDNNLowPOp(
     Workspace* ws)
     : BaseType(operator_def, ws) {
   if (HasArgument("axis")) {
-    axis_ = OperatorBase::GetSingleArgument<int>("axis", -1);
-    add_axis_ = OperatorBase::GetSingleArgument<int>("add_axis", 0);
+    axis_ = this->template GetSingleArgument<int>("axis", -1);
+    add_axis_ = this->template GetSingleArgument<int>("add_axis", 0);
   } else {
     axis_ = GetDimFromOrderString(
-        OperatorBase::GetSingleArgument<string>("order", "NCHW"));
+        this->template GetSingleArgument<string>("order", "NCHW"));
     add_axis_ = 0;
   }
   CAFFE_ENFORCE_GE(axis_, 0);
@@ -35,7 +35,7 @@ bool ConcatDNNLowPOp<T>::RunOnDevice() {
   Tensor* split = nullptr;
   int* axis_data = nullptr;
   if (OutputSize() >= 2) {
-    split = OperatorBase::Output<Tensor>(1, CPU);
+    split = this->template Output<Tensor>(1, CPU);
     split->Resize(vector<int64_t>(1, InputSize()));
     axis_data = split->template mutable_data<int>();
   }
diff --git a/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc b/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc
index 45b5918..a32a7b0 100644
--- a/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc
+++ b/caffe2/quantization/server/conv_dnnlowp_acc16_op.cc
@@ -46,10 +46,10 @@ ConvDNNLowPAcc16Op<ReluFused>::ConvDNNLowPAcc16Op(
     const OperatorDef& operator_def,
     Workspace* ws)
     : ConvDNNLowPOp<uint8_t, ReluFused>(operator_def, ws),
-      nbits_in_non_outlier_(OperatorBase::GetSingleArgument<int>(
+      nbits_in_non_outlier_(this->template GetSingleArgument<int>(
           "nbits_in_non_outlier",
           FLAGS_caffe2_dnnlowp_nbits_in_non_outlier)),
-      copy_to_32bit_frequency_(OperatorBase::GetSingleArgument<int>(
+      copy_to_32bit_frequency_(this->template GetSingleArgument<int>(
           "copy_to_32bit_frequency",
           FLAGS_caffe2_dnnlowp_copy_to_32bit_frequency)) {
   if (nbits_in_non_outlier_ == 0) {
@@ -181,8 +181,7 @@ bool ConvDNNLowPAcc16Op<ReluFused>::GetQuantizationParameters_() {
         static int log_occurences = 0;
         if (log_occurences < 32) {
           ++log_occurences;
-          LOG(WARNING) << "Conv with weight "
-                       << OperatorBase::debug_def().input(FILTER)
+          LOG(WARNING) << "Conv with weight " << this->debug_def().input(FILTER)
                        << " falls back to slow path because " << reason;
         }
       }
@@ -772,8 +771,8 @@ bool ConvDNNLowPAcc16Op<ReluFused>::RunOnDeviceWithOrderNHWC() {
   dt = chrono::duration<double>(t_end - t_very_begin).count();
   double ops = 2. * N * output_image_size * M * kernel_dim;
   double gops = ops / dt / 1e9;
-  LOG(INFO) << "this=" << this << " " << OperatorBase::debug_def().type()
-            << " output=" << OperatorBase::debug_def().output(0) << " "
+  LOG(INFO) << "this=" << this << " " << this->debug_def().type()
+            << " output=" << this->debug_def().output(0) << " "
             << N * output_image_size << "x" << M << "x" << kernel_dim
             << " G=" << group_ << " C/G=" << C / group_ << " K/G=" << M / group_
             << " R=" << kernel_h() << " S=" << kernel_w() << " : " << dt * 1e3
diff --git a/caffe2/quantization/server/conv_dnnlowp_op.cc b/caffe2/quantization/server/conv_dnnlowp_op.cc
index e8cb039..8a97fe7 100644
--- a/caffe2/quantization/server/conv_dnnlowp_op.cc
+++ b/caffe2/quantization/server/conv_dnnlowp_op.cc
@@ -57,7 +57,7 @@ ConvDNNLowPOp<T, ReluFused>::ConvDNNLowPOp(
   }
 
   quantize_groupwise_ =
-      OperatorBase::GetSingleArgument<bool>("quantize_groupwise", false);
+      this->template GetSingleArgument<bool>("quantize_groupwise", false);
 }
 
 template <typename T, bool ReluFused>
@@ -213,12 +213,12 @@ void ConvDNNLowPOp<T, ReluFused>::QuantizeBias_() {
       b_quantized_data_ = b_quantized_->data();
     } else {
       const auto& bias = InputTensorCPU_(BIAS);
-      if (OperatorBase::InputIsType<int8::Int8TensorCPU>(BIAS)) {
+      if (this->template InputIsType<int8::Int8TensorCPU>(BIAS)) {
         TensorQuantizationParams bias_qparams;
         bias_qparams.scale =
-            OperatorBase::Input<int8::Int8TensorCPU>(BIAS).scale;
+            this->template Input<int8::Int8TensorCPU>(BIAS).scale;
         bias_qparams.zero_point =
-            OperatorBase::Input<int8::Int8TensorCPU>(BIAS).zero_point;
+            this->template Input<int8::Int8TensorCPU>(BIAS).zero_point;
         CAFFE_ENFORCE_LE(
             std::abs(
                 bias_qparams.scale -
@@ -368,8 +368,7 @@ void ConvDNNLowPOp<T, ReluFused>::QuantizeWeight_() {
         static int log_occurences = 0;
         if (log_occurences < 32) {
           ++log_occurences;
-          LOG(WARNING) << "Conv with weight "
-                       << OperatorBase::debug_def().input(FILTER)
+          LOG(WARNING) << "Conv with weight " << this->debug_def().input(FILTER)
                        << " falls back to slow path because " << reason;
         }
       }
@@ -685,7 +684,7 @@ void ConvDNNLowPOp<T, ReluFused>::RunOnDeviceEpilogueNHWC_(
         ++log_occurences;
         LOG(WARNING) << "Cannot do group-wise quantization without "
                         "static quantization of activations for "
-                     << OperatorBase::debug_def().output(0);
+                     << this->debug_def().output(0);
       }
     }
 
@@ -999,14 +998,14 @@ void ConvDNNLowPOp<T, ReluFused>::ConvNHWCCore_(
         N * Y_HxW * group_,
         kernel_dim,
         col_buffer_data,
-        OperatorBase::debug_def().input(INPUT));
+        this->debug_def().input(INPUT));
 
     // Dump weight
     StoreMatrixInMatrixMarketFormat(
         group_ * M,
         kernel_dim,
         W_quantized_.data(),
-        OperatorBase::debug_def().input(FILTER));
+        this->debug_def().input(FILTER));
   }
 
   if (TakeDepthWise3x3x3FastPath_()) {
@@ -1351,9 +1350,9 @@ bool ConvDNNLowPOp<T, ReluFused>::RunOnDeviceWithOrderNHWC() {
     double ops = 2. * N * Y_HxW * M * kernel_dim;
     dt = chrono::duration<double>(t_end - t_very_begin).count();
     double gops = ops / dt / 1e9;
-    LOG(INFO) << "this=" << this << " " << OperatorBase::debug_def().type()
-              << " output=" << OperatorBase::debug_def().output(0) << " "
-              << N * Y_HxW << "x" << M << "x" << kernel_dim << " G=" << group_
+    LOG(INFO) << "this=" << this << " " << this->debug_def().type()
+              << " output=" << this->debug_def().output(0) << " " << N * Y_HxW
+              << "x" << M << "x" << kernel_dim << " G=" << group_
               << " C/G=" << C / group_ << " K/G=" << M / group_
               << " R=" << kernel_h() << " S=" << kernel_w() << " : " << dt * 1e3
               << " ms " << gops << " gops";
diff --git a/caffe2/quantization/server/conv_relu_op.cc b/caffe2/quantization/server/conv_relu_op.cc
index e3a3cd4..6668389 100644
--- a/caffe2/quantization/server/conv_relu_op.cc
+++ b/caffe2/quantization/server/conv_relu_op.cc
@@ -5,10 +5,10 @@ namespace caffe2 {
 template <typename T, class Context>
 bool ConvReluOp<T, Context>::RunOnDeviceWithOrderNCHW() {
   // Delegate to local conv operator
-  for (int i = 0; i < OperatorBase::InputSize(); ++i) {
+  for (int i = 0; i < this->InputSize(); ++i) {
     local_input_blobs_[i]->ShareExternal(
-        const_cast<void*>(OperatorBase::Inputs()[i]->GetRaw()),
-        OperatorBase::Inputs()[i]->meta());
+        const_cast<void*>(this->Inputs()[i]->GetRaw()),
+        this->Inputs()[i]->meta());
   }
 
   if (!local_op_->RunOnDeviceWithOrderNCHW()) {
@@ -36,10 +36,10 @@ bool ConvReluOp<T, Context>::RunOnDeviceWithOrderNCHW() {
 template <typename T, class Context>
 bool ConvReluOp<T, Context>::RunOnDeviceWithOrderNHWC() {
   // Delegate to local conv operator
-  for (int i = 0; i < OperatorBase::InputSize(); ++i) {
+  for (int i = 0; i < this->InputSize(); ++i) {
     local_input_blobs_[i]->ShareExternal(
-        const_cast<void*>(OperatorBase::Inputs()[i]->GetRaw()),
-        OperatorBase::Inputs()[i]->meta());
+        const_cast<void*>(this->Inputs()[i]->GetRaw()),
+        this->Inputs()[i]->meta());
   }
 
   if (!local_op_->RunOnDeviceWithOrderNHWC()) {
diff --git a/caffe2/quantization/server/dequantize_dnnlowp_op.cc b/caffe2/quantization/server/dequantize_dnnlowp_op.cc
index 6d81e27..9994f03 100644
--- a/caffe2/quantization/server/dequantize_dnnlowp_op.cc
+++ b/caffe2/quantization/server/dequantize_dnnlowp_op.cc
@@ -19,7 +19,7 @@ bool DequantizeDNNLowPOp<T>::RunOnDevice() {
       GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
 
   const TensorCPU& input = InputIsType<int8::Int8TensorCPU>(0)
-      ? OperatorBase::Input<int8::Int8TensorCPU>(0).t
+      ? this->template Input<int8::Int8TensorCPU>(0).t
       : Input(0);
 
   CAFFE_ENFORCE(input.template IsType<T>());
diff --git a/caffe2/quantization/server/elementwise_dnnlowp_op.h b/caffe2/quantization/server/elementwise_dnnlowp_op.h
index 27bf965..aac1020 100644
--- a/caffe2/quantization/server/elementwise_dnnlowp_op.h
+++ b/caffe2/quantization/server/elementwise_dnnlowp_op.h
@@ -25,7 +25,7 @@ class UnaryElementwiseWithArgsDNNLowPOp : public Operator<CPUContext> {
       arguments_parsed_ = true;
     }
 
-    auto& input = OperatorBase::Input<int8::Int8TensorCPU>(0).t;
+    auto& input = this->template Input<int8::Int8TensorCPU>(0).t;
     auto& output = Outputs()[0]->template GetMutable<int8::Int8TensorCPU>()->t;
     output.ResizeLike(input);
     functor_(
diff --git a/caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc b/caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc
index 364fde5..6220391 100644
--- a/caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc
+++ b/caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc
@@ -25,7 +25,7 @@ ElementwiseLinearDNNLowPOp<T>::ElementwiseLinearDNNLowPOp(
     const OperatorDef& operator_def,
     Workspace* ws)
     : BaseType(operator_def, ws),
-      axis_(OperatorBase::GetSingleArgument<int>("axis", 1)) {}
+      axis_(this->template GetSingleArgument<int>("axis", 1)) {}
 
 template <typename T>
 bool ElementwiseLinearDNNLowPOp<T>::RunOnDevice() {
diff --git a/caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.cc b/caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.cc
index addc7e9..b23f34a 100644
--- a/caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.cc
+++ b/caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.cc
@@ -13,10 +13,10 @@ FullyConnectedDNNLowPAcc16Op::FullyConnectedDNNLowPAcc16Op(
     const OperatorDef& operator_def,
     Workspace* ws)
     : FullyConnectedDNNLowPOp<uint8_t>(operator_def, ws),
-      nbits_in_non_outlier_(OperatorBase::GetSingleArgument<int>(
+      nbits_in_non_outlier_(this->template GetSingleArgument<int>(
           "nbits_in_non_outlier",
           FLAGS_caffe2_dnnlowp_nbits_in_non_outlier)),
-      copy_to_32bit_frequency_(OperatorBase::GetSingleArgument<int>(
+      copy_to_32bit_frequency_(this->template GetSingleArgument<int>(
           "copy_to_32bit_frequency",
           FLAGS_caffe2_dnnlowp_copy_to_32bit_frequency)) {}
 
@@ -77,7 +77,7 @@ bool FullyConnectedDNNLowPAcc16Op::RunOnDevice() {
         int outlier_cnt = Wq_outlier_->ColPtr()[N];
 
         LOG(INFO) << "Proportion of outlier for FC layer with weight blob "
-                  << OperatorBase::debug_def().input(1) << " is "
+                  << this->debug_def().input(1) << " is "
                   << (float)outlier_cnt / W_quantized_.size();
 
         LOG(INFO) << "copy_to_32bit_frequency " << copy_to_32bit_frequency_;
diff --git a/caffe2/quantization/server/fully_connected_dnnlowp_op.cc b/caffe2/quantization/server/fully_connected_dnnlowp_op.cc
index 04c0867..6ed121a 100644
--- a/caffe2/quantization/server/fully_connected_dnnlowp_op.cc
+++ b/caffe2/quantization/server/fully_connected_dnnlowp_op.cc
@@ -27,12 +27,12 @@ FullyConnectedDNNLowPOp<T>::FullyConnectedDNNLowPOp(
     const OperatorDef& operator_def,
     Workspace* ws)
     : BaseType(operator_def, ws),
-      axis_(OperatorBase::GetSingleArgument<int32_t>("axis", 1)),
-      axis_w_(OperatorBase::GetSingleArgument<int32_t>("axis_w", 1)),
+      axis_(this->template GetSingleArgument<int32_t>("axis", 1)),
+      axis_w_(this->template GetSingleArgument<int32_t>("axis_w", 1)),
       b_quantized_(make_shared<vector<int32_t>>()),
       column_offsets_(make_shared<vector<int32_t>>()),
       is_weight_constant_(
-          OperatorBase::GetSingleArgument<bool>("constant_weight", true)) {
+          this->template GetSingleArgument<bool>("constant_weight", true)) {
   if (!is_weight_constant_) {
     LOG(INFO) << operator_def.output(0) << " is_weight_constant "
               << is_weight_constant_;
@@ -298,12 +298,10 @@ bool FullyConnectedDNNLowPOp<T>::RunOnDevice() {
 
   if (FLAGS_caffe2_dnnlowp_dump_tensors) {
     // Dump input activation
-    StoreMatrixInMatrixMarketFormat(
-        M, K, Xdata, OperatorBase::debug_def().input(0));
+    StoreMatrixInMatrixMarketFormat(M, K, Xdata, this->debug_def().input(0));
 
     // Dump weight
-    StoreMatrixInMatrixMarketFormat(
-        N, K, Wdata, OperatorBase::debug_def().input(1));
+    StoreMatrixInMatrixMarketFormat(N, K, Wdata, this->debug_def().input(1));
   }
 
   if (VLOG_IS_ON(3)) {
@@ -374,9 +372,8 @@ bool FullyConnectedDNNLowPOp<T>::RunOnDevice() {
     dt = chrono::duration<double>(t_end - t_very_begin).count();
     double gops = ops / dt / 1e9;
     VLOG(3) << "@PERF this=" << this
-            << " output=" << OperatorBase::debug_def().output(0) << " " << M
-            << "x" << N << "x" << K << ": " << dt * 1e3 << " ms " << gops
-            << " gops";
+            << " output=" << this->debug_def().output(0) << " " << M << "x" << N
+            << "x" << K << ": " << dt * 1e3 << " ms " << gops << " gops";
   }
 
   return true;
@@ -412,7 +409,7 @@ bool FullyConnectedDNNLowPOp<T>::GetQuantizationParameters_() {
   int signed_min = -(1 << (qfactory_->GetWeightPrecision() - 1));
   if (is_weight_constant_) {
     bool fast_path = is_same<T, uint8_t>::value && GetCpuId().avx2() &&
-        OperatorBase::debug_def().engine() != "DNNLOWP_ACC16";
+        this->debug_def().engine() != "DNNLOWP_ACC16";
 
     if ((fast_path && !Wq_packed_) || (!fast_path && W_quantized_.empty())) {
       if (this->template InputIsType<Int8FCDNNLowPPackedWeightBlob>(1)) {
@@ -449,14 +446,13 @@ bool FullyConnectedDNNLowPOp<T>::GetQuantizationParameters_() {
           reason = "fbgemm only supports 8-bit integers";
         } else if (!GetCpuId().avx2()) {
           reason = "fbgemm only supports AVX2";
-        } else if (OperatorBase::debug_def().engine() == "DNNLOWP_ACC16") {
+        } else if (this->debug_def().engine() == "DNNLOWP_ACC16") {
           reason = "";
         } else {
           assert(false);
         }
         if (!reason.empty()) {
-          LOG(WARNING) << "Conv with weight "
-                       << OperatorBase::debug_def().input(1)
+          LOG(WARNING) << "Conv with weight " << this->debug_def().input(1)
                        << " falls back to slow path because " << reason;
         }
       }
diff --git a/caffe2/quantization/server/fully_connected_fake_lowp_op.h b/caffe2/quantization/server/fully_connected_fake_lowp_op.h
index 290df6e..6cbfc90 100644
--- a/caffe2/quantization/server/fully_connected_fake_lowp_op.h
+++ b/caffe2/quantization/server/fully_connected_fake_lowp_op.h
@@ -52,10 +52,10 @@ class FullyConnectedFakeLowpFPOp final : public Operator<Context> {
   USE_OPERATOR_CONTEXT_FUNCTIONS;
   FullyConnectedFakeLowpFPOp(const OperatorDef& operator_def, Workspace* ws)
       : Operator<Context>(operator_def, ws),
-        axis_(OperatorBase::GetSingleArgument<int32_t>("axis", 1)),
-        axis_w_(OperatorBase::GetSingleArgument<int32_t>("axis_w", 1)),
+        axis_(this->template GetSingleArgument<int32_t>("axis", 1)),
+        axis_w_(this->template GetSingleArgument<int32_t>("axis_w", 1)),
         float16_compute_(
-            OperatorBase::GetSingleArgument<bool>("float16_compute", false)) {}
+            this->template GetSingleArgument<bool>("float16_compute", false)) {}
   ~FullyConnectedFakeLowpFPOp() {}
 
   template <
@@ -98,10 +98,10 @@ class FullyConnectedGradientFakeLowpFPOp : public Operator<Context> {
       const OperatorDef& operator_def,
       Workspace* ws)
       : Operator<Context>(operator_def, ws),
-        axis_(OperatorBase::GetSingleArgument<int32_t>("axis", 1)),
-        axis_w_(OperatorBase::GetSingleArgument<int32_t>("axis_w", 1)),
+        axis_(this->template GetSingleArgument<int32_t>("axis", 1)),
+        axis_w_(this->template GetSingleArgument<int32_t>("axis_w", 1)),
         float16_compute_(
-            OperatorBase::GetSingleArgument<bool>("float16_compute", false)) {}
+            this->template GetSingleArgument<bool>("float16_compute", false)) {}
   ~FullyConnectedGradientFakeLowpFPOp() {}
 
   template <
diff --git a/caffe2/quantization/server/fully_connected_rowwise_dnnlowp_op.cc b/caffe2/quantization/server/fully_connected_rowwise_dnnlowp_op.cc
index 00fbb87..92c2ea8 100644
--- a/caffe2/quantization/server/fully_connected_rowwise_dnnlowp_op.cc
+++ b/caffe2/quantization/server/fully_connected_rowwise_dnnlowp_op.cc
@@ -14,12 +14,12 @@ FullyConnectedRowWiseDNNLowPOp<T>::FullyConnectedRowWiseDNNLowPOp(
     const OperatorDef& operator_def,
     Workspace* ws)
     : BaseType(operator_def, ws),
-      axis_(OperatorBase::GetSingleArgument<int32_t>("axis", 1)),
-      axis_w_(OperatorBase::GetSingleArgument<int32_t>("axis_w", 1)),
+      axis_(this->template GetSingleArgument<int32_t>("axis", 1)),
+      axis_w_(this->template GetSingleArgument<int32_t>("axis_w", 1)),
       b_quantized_(make_shared<vector<int32_t>>()),
       column_offsets_(make_shared<vector<int32_t>>()),
       is_weight_constant_(
-          OperatorBase::GetSingleArgument<bool>("constant_weight", true)) {
+          this->template GetSingleArgument<bool>("constant_weight", true)) {
   using namespace dnnlowp;
   LOG(INFO) << "Using Rowwise Quantization!";
   if (!is_weight_constant_) {
@@ -232,9 +232,8 @@ bool FullyConnectedRowWiseDNNLowPOp<T>::RunOnDevice() {
     dt = chrono::duration<double>(t_end - t_very_begin).count();
     double gops = ops / dt / 1e9;
     VLOG(3) << "@PERF this=" << this
-            << " output=" << OperatorBase::debug_def().output(0) << " " << M
-            << "x" << N << "x" << K << ": " << dt * 1e3 << " ms " << gops
-            << " gops";
+            << " output=" << this->debug_def().output(0) << " " << M << "x" << N
+            << "x" << K << ": " << dt * 1e3 << " ms " << gops << " gops";
   }
 
   return true;
diff --git a/caffe2/quantization/server/group_norm_dnnlowp_op.cc b/caffe2/quantization/server/group_norm_dnnlowp_op.cc
index 88d0613..8268b05 100644
--- a/caffe2/quantization/server/group_norm_dnnlowp_op.cc
+++ b/caffe2/quantization/server/group_norm_dnnlowp_op.cc
@@ -56,9 +56,9 @@ void GroupNormDNNLowPOp<T>::QuantizeGamma() {
       const int C = gamma.size();
       gamma_quantized_.resize(C);
       gamma_quantized_data_ = gamma_quantized_.data();
-      if (OperatorBase::InputIsType<int8::Int8TensorCPU>(GAMMA)) {
+      if (this->template InputIsType<int8::Int8TensorCPU>(GAMMA)) {
         const auto& gamma_int8 =
-            OperatorBase::Input<int8::Int8TensorCPU>(GAMMA);
+            this->template Input<int8::Int8TensorCPU>(GAMMA);
         auto& gamma_qparams = in_qparams_[GAMMA];
         gamma_qparams.scale = gamma_int8.scale;
         const T* gamma_data = gamma.template data<T>();
@@ -118,8 +118,8 @@ void GroupNormDNNLowPOp<T>::QuantizeBeta() {
     const auto& X_qparams = in_qparams_[INPUT];
     const auto& gamma_qparams = in_qparams_[GAMMA];
     auto& beta_qparams = in_qparams_[BETA];
-    if (OperatorBase::InputIsType<int8::Int8TensorCPU>(BETA)) {
-      const auto& beta_int8 = OperatorBase::Input<int8::Int8TensorCPU>(BETA);
+    if (this->template InputIsType<int8::Int8TensorCPU>(BETA)) {
+      const auto& beta_int8 = this->template Input<int8::Int8TensorCPU>(BETA);
       beta_qparams.scale = beta_int8.scale;
       beta_qparams.zero_point = beta_int8.zero_point;
       CAFFE_ENFORCE_LE(
diff --git a/caffe2/quantization/server/lstm_unit_dnnlowp_op.cc b/caffe2/quantization/server/lstm_unit_dnnlowp_op.cc
index 8f38170..4b6e64b 100644
--- a/caffe2/quantization/server/lstm_unit_dnnlowp_op.cc
+++ b/caffe2/quantization/server/lstm_unit_dnnlowp_op.cc
@@ -16,7 +16,7 @@ LSTMUnitDNNLowPOp<T>::LSTMUnitDNNLowPOp(
     Workspace* ws)
     : LSTMUnitOp<CPUContext>(operator_def, ws),
       drop_states_(
-          OperatorBase::template GetSingleArgument<bool>("drop_states", false)),
+          this->template GetSingleArgument<bool>("drop_states", false)),
       qfactory_(GetQuantizationFactoryOf(this)) {}
 
 template <typename T>
@@ -39,7 +39,7 @@ OpWrapper<LSTMUnitOp<CPUContext>, T>* LSTMUnitDNNLowPOp<T>::Fp32Op_() {
 template <typename T>
 const TensorCPU& LSTMUnitDNNLowPOp<T>::InputTensorCPU_(int idx) {
   return InputIsType<int8::Int8TensorCPU>(idx)
-      ? OperatorBase::Input<int8::Int8TensorCPU>(idx).t
+      ? this->template Input<int8::Int8TensorCPU>(idx).t
       : Input(idx);
 }
 
diff --git a/caffe2/quantization/server/relu_dnnlowp_op.cc b/caffe2/quantization/server/relu_dnnlowp_op.cc
index 0e5b3f6..dd718c4 100644
--- a/caffe2/quantization/server/relu_dnnlowp_op.cc
+++ b/caffe2/quantization/server/relu_dnnlowp_op.cc
@@ -7,7 +7,7 @@ namespace caffe2 {
 template <typename T>
 bool ReluDNNLowPOp<T>::RunOnDevice() {
   auto& X = InputIsType<int8::Int8TensorCPU>(0)
-      ? OperatorBase::Input<int8::Int8TensorCPU>(0).t
+      ? (this->template Input<int8::Int8TensorCPU>(0)).t
       : Input(0);
 
   TensorCPU* Y = nullptr;
diff --git a/caffe2/quantization/server/relu_dnnlowp_op.h b/caffe2/quantization/server/relu_dnnlowp_op.h
index 0837c7b..f308e90 100644
--- a/caffe2/quantization/server/relu_dnnlowp_op.h
+++ b/caffe2/quantization/server/relu_dnnlowp_op.h
@@ -10,6 +10,7 @@ namespace caffe2 {
 template <typename T>
 class ReluDNNLowPOp final : public Operator<CPUContext> {
  public:
+  USE_OPERATOR_FUNCTIONS(CPUContext);
   ReluDNNLowPOp(const OperatorDef& operator_def, Workspace* ws)
       : Operator<CPUContext>(operator_def, ws),
         qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {}
diff --git a/caffe2/quantization/server/utility_dnnlowp_ops.h b/caffe2/quantization/server/utility_dnnlowp_ops.h
index 141d86d..1a0d830 100644
--- a/caffe2/quantization/server/utility_dnnlowp_ops.h
+++ b/caffe2/quantization/server/utility_dnnlowp_ops.h
@@ -37,7 +37,7 @@ class GatherDNNLowPOp final : public GatherOp<CPUContext> {
   bool DoRunWithType() {
     // If we endup using it on GPU doing O(N) memcpy is probably not best :)
     // TODO: implement prefetching if it starts mattering (TF does it)
-    auto& data = OperatorBase::Input<int8::Int8TensorCPU>(DATA).t;
+    auto& data = (this->template Input<int8::Int8TensorCPU>(DATA)).t;
     auto& indices = Input(INDICES);
     auto* output = &Outputs()[0]->template GetMutable<int8::Int8TensorCPU>()->t;
 
-- 
2.7.4