use C10_REGISTER for GELU op
authorHuamin Li <huaminli@fb.com>
Fri, 12 Apr 2019 18:38:02 +0000 (11:38 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 12 Apr 2019 18:41:04 +0000 (11:41 -0700)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19090

Reviewed By: BIT-silence

Differential Revision: D14864737

fbshipit-source-id: 8debd53171f7068726f0ab777a13ca46becbfbdf

caffe2/operators/gelu_op.cc
caffe2/operators/gelu_op.cu
caffe2/operators/gelu_op.h
caffe2/python/operator_test/torch_integration_test.py

index 8bd9bf0..ad4ddda 100644 (file)
@@ -65,18 +65,8 @@ bool GeluGradientFunctor<CPUContext>::Forward(
   return true;
 }
 
-REGISTER_CPU_OPERATOR(
-    Gelu,
-    UnaryElementwiseWithArgsOp<
-        TensorTypes<float>,
-        CPUContext,
-        GeluFunctor<CPUContext>>);
-REGISTER_CPU_OPERATOR(
-    GeluGradient,
-    BinaryElementwiseWithArgsOp<
-        TensorTypes<float>,
-        CPUContext,
-        GeluGradientFunctor<CPUContext>>);
+REGISTER_CPU_OPERATOR(Gelu, GeluOp<CPUContext>);
+REGISTER_CPU_OPERATOR(GeluGradient, GeluGradientOp<CPUContext>);
 
 namespace {
 
@@ -130,3 +120,12 @@ class GetGeluGradient : public GradientMakerBase {
 REGISTER_GRADIENT(Gelu, GetGeluGradient);
 
 } // namespace caffe2
+
+C10_REGISTER_CAFFE2_OPERATOR_CPU(
+    Gelu,
+    (std::vector<c10::Argument>{
+        c10::Argument("input"),
+        c10::Argument("fast_gelu", BoolType::get(), c10::nullopt, false),
+    }),
+    (std::vector<c10::Argument>{c10::Argument("output")}),
+    caffe2::GeluOp<caffe2::CPUContext>);
index 4c42fc4..dce18c9 100644 (file)
@@ -142,17 +142,7 @@ bool GeluGradientFunctor<CUDAContext>::Forward(
   return true;
 }
 
-REGISTER_CUDA_OPERATOR(
-    Gelu,
-    UnaryElementwiseWithArgsOp<
-        TensorTypes<float>,
-        CUDAContext,
-        GeluFunctor<CUDAContext>>);
-REGISTER_CUDA_OPERATOR(
-    GeluGradient,
-    BinaryElementwiseWithArgsOp<
-        TensorTypes<float>,
-        CUDAContext,
-        GeluGradientFunctor<CUDAContext>>);
+REGISTER_CUDA_OPERATOR(Gelu, GeluOp<CUDAContext>);
+REGISTER_CUDA_OPERATOR(GeluGradient, GeluGradientOp<CUDAContext>);
 
 } // namespace caffe2
index 594315e..86e35a7 100644 (file)
@@ -6,6 +6,8 @@
 #include "caffe2/core/operator.h"
 #include "caffe2/operators/elementwise_ops.h"
 
+C10_DECLARE_CAFFE2_OPERATOR(Gelu);
+
 namespace caffe2 {
 
 namespace gelu_utils {
@@ -44,6 +46,18 @@ struct GeluGradientFunctor {
   const bool fast_gelu;
 };
 
+template <class Context>
+using GeluOp = UnaryElementwiseWithArgsOp<
+    TensorTypes<float>,
+    Context,
+    GeluFunctor<Context>>;
+
+template <class Context>
+using GeluGradientOp = BinaryElementwiseWithArgsOp<
+    TensorTypes<float>,
+    Context,
+    GeluGradientFunctor<Context>>;
+
 } // namespace caffe2
 
 #endif // CAFFE2_OPERATORS_GELU_OP_H_
index 07343d9..893d314 100644 (file)
@@ -6,6 +6,7 @@ from hypothesis import given
 import caffe2.python.hypothesis_test_util as hu
 import hypothesis.strategies as st
 import numpy as np
+from scipy.stats import norm
 import unittest
 
 
@@ -447,6 +448,26 @@ class TorchIntegration(hu.HypothesisTestCase):
     def test_roi_align_cuda(self):
         self._test_roi_align(device="cuda")
 
+    @given(X=hu.tensor(),
+           fast_gelu=st.booleans())
+    def _test_gelu_op(self, X, fast_gelu, device):
+        def _gelu_ref(_X):
+            return (_X * norm.cdf(_X).astype(np.float32), )
+        expected_output, = _gelu_ref(X)
+        actual_output = torch.ops._caffe2.Gelu(torch.tensor(X), fast_gelu)
+
+        rtol = 1e-3 if fast_gelu else 1e-4
+        atol = 1e-5
+        torch.testing.assert_allclose(
+            expected_output, actual_output.cpu(), rtol=rtol, atol=atol)
+
+    def test_gelu_op(self):
+        self._test_gelu_op(device="cpu")
+
+    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
+    def test_gelu_op_cuda(self):
+        self._test_gelu_op(device="cuda")
+
 
 if __name__ == '__main__':
     unittest.main()