From: Huamin Li <huaminli@fb.com>
Date: Fri, 12 Apr 2019 18:38:02 +0000 (-0700)
Subject: use C10_REGISTER for GELU op
X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~253
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c480798a1cb704b542bc74707108df575f3d4ee5;p=platform%2Fupstream%2Fpytorch.git

use C10_REGISTER for GELU op

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19090

Reviewed By: BIT-silence

Differential Revision: D14864737

fbshipit-source-id: 8debd53171f7068726f0ab777a13ca46becbfbdf
---

diff --git a/caffe2/operators/gelu_op.cc b/caffe2/operators/gelu_op.cc
index 8bd9bf0..ad4ddda 100644
--- a/caffe2/operators/gelu_op.cc
+++ b/caffe2/operators/gelu_op.cc
@@ -65,18 +65,8 @@ bool GeluGradientFunctor<CPUContext>::Forward(
   return true;
 }
 
-REGISTER_CPU_OPERATOR(
-    Gelu,
-    UnaryElementwiseWithArgsOp<
-        TensorTypes<float>,
-        CPUContext,
-        GeluFunctor<CPUContext>>);
-REGISTER_CPU_OPERATOR(
-    GeluGradient,
-    BinaryElementwiseWithArgsOp<
-        TensorTypes<float>,
-        CPUContext,
-        GeluGradientFunctor<CPUContext>>);
+REGISTER_CPU_OPERATOR(Gelu, GeluOp<CPUContext>);
+REGISTER_CPU_OPERATOR(GeluGradient, GeluGradientOp<CPUContext>);
 
 namespace {
 
@@ -130,3 +120,12 @@ class GetGeluGradient : public GradientMakerBase {
 REGISTER_GRADIENT(Gelu, GetGeluGradient);
 
 } // namespace caffe2
+
+C10_REGISTER_CAFFE2_OPERATOR_CPU(
+    Gelu,
+    (std::vector<c10::Argument>{
+        c10::Argument("input"),
+        c10::Argument("fast_gelu", BoolType::get(), c10::nullopt, false),
+    }),
+    (std::vector<c10::Argument>{c10::Argument("output")}),
+    caffe2::GeluOp<caffe2::CPUContext>);
diff --git a/caffe2/operators/gelu_op.cu b/caffe2/operators/gelu_op.cu
index 4c42fc43..dce18c9 100644
--- a/caffe2/operators/gelu_op.cu
+++ b/caffe2/operators/gelu_op.cu
@@ -142,17 +142,7 @@ bool GeluGradientFunctor<CUDAContext>::Forward(
   return true;
 }
 
-REGISTER_CUDA_OPERATOR(
-    Gelu,
-    UnaryElementwiseWithArgsOp<
-        TensorTypes<float>,
-        CUDAContext,
-        GeluFunctor<CUDAContext>>);
-REGISTER_CUDA_OPERATOR(
-    GeluGradient,
-    BinaryElementwiseWithArgsOp<
-        TensorTypes<float>,
-        CUDAContext,
-        GeluGradientFunctor<CUDAContext>>);
+REGISTER_CUDA_OPERATOR(Gelu, GeluOp<CUDAContext>);
+REGISTER_CUDA_OPERATOR(GeluGradient, GeluGradientOp<CUDAContext>);
 
 } // namespace caffe2
diff --git a/caffe2/operators/gelu_op.h b/caffe2/operators/gelu_op.h
index 594315e..86e35a7 100644
--- a/caffe2/operators/gelu_op.h
+++ b/caffe2/operators/gelu_op.h
@@ -6,6 +6,8 @@
 #include "caffe2/core/operator.h"
 #include "caffe2/operators/elementwise_ops.h"
 
+C10_DECLARE_CAFFE2_OPERATOR(Gelu);
+
 namespace caffe2 {
 
 namespace gelu_utils {
@@ -44,6 +46,18 @@ struct GeluGradientFunctor {
   const bool fast_gelu;
 };
 
+template <class Context>
+using GeluOp = UnaryElementwiseWithArgsOp<
+    TensorTypes<float>,
+    Context,
+    GeluFunctor<Context>>;
+
+template <class Context>
+using GeluGradientOp = BinaryElementwiseWithArgsOp<
+    TensorTypes<float>,
+    Context,
+    GeluGradientFunctor<Context>>;
+
 } // namespace caffe2
 
 #endif // CAFFE2_OPERATORS_GELU_OP_H_
diff --git a/caffe2/python/operator_test/torch_integration_test.py b/caffe2/python/operator_test/torch_integration_test.py
index 07343d9..893d314 100644
--- a/caffe2/python/operator_test/torch_integration_test.py
+++ b/caffe2/python/operator_test/torch_integration_test.py
@@ -6,6 +6,7 @@ from hypothesis import given
 import caffe2.python.hypothesis_test_util as hu
 import hypothesis.strategies as st
 import numpy as np
+from scipy.stats import norm
 import unittest
 
 
@@ -447,6 +448,26 @@ class TorchIntegration(hu.HypothesisTestCase):
     def test_roi_align_cuda(self):
         self._test_roi_align(device="cuda")
 
+    @given(X=hu.tensor(),
+           fast_gelu=st.booleans())
+    def _test_gelu_op(self, X, fast_gelu, device):
+        def _gelu_ref(_X):
+            return (_X * norm.cdf(_X).astype(np.float32), )
+        expected_output, = _gelu_ref(X)
+        actual_output = torch.ops._caffe2.Gelu(torch.tensor(X), fast_gelu)
+
+        rtol = 1e-3 if fast_gelu else 1e-4
+        atol = 1e-5
+        torch.testing.assert_allclose(
+            expected_output, actual_output.cpu(), rtol=rtol, atol=atol)
+
+    def test_gelu_op(self):
+        self._test_gelu_op(device="cpu")
+
+    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
+    def test_gelu_op_cuda(self):
+        self._test_gelu_op(device="cuda")
+
 
 if __name__ == '__main__':
     unittest.main()