return true;
}
-REGISTER_CPU_OPERATOR(
- Gelu,
- UnaryElementwiseWithArgsOp<
- TensorTypes<float>,
- CPUContext,
- GeluFunctor<CPUContext>>);
-REGISTER_CPU_OPERATOR(
- GeluGradient,
- BinaryElementwiseWithArgsOp<
- TensorTypes<float>,
- CPUContext,
- GeluGradientFunctor<CPUContext>>);
+REGISTER_CPU_OPERATOR(Gelu, GeluOp<CPUContext>);
+REGISTER_CPU_OPERATOR(GeluGradient, GeluGradientOp<CPUContext>);
namespace {
REGISTER_GRADIENT(Gelu, GetGeluGradient);
} // namespace caffe2
+
+C10_REGISTER_CAFFE2_OPERATOR_CPU(
+ Gelu,
+ (std::vector<c10::Argument>{
+ c10::Argument("input"),
+ c10::Argument("fast_gelu", BoolType::get(), c10::nullopt, false),
+ }),
+ (std::vector<c10::Argument>{c10::Argument("output")}),
+ caffe2::GeluOp<caffe2::CPUContext>);
return true;
}
-REGISTER_CUDA_OPERATOR(
- Gelu,
- UnaryElementwiseWithArgsOp<
- TensorTypes<float>,
- CUDAContext,
- GeluFunctor<CUDAContext>>);
-REGISTER_CUDA_OPERATOR(
- GeluGradient,
- BinaryElementwiseWithArgsOp<
- TensorTypes<float>,
- CUDAContext,
- GeluGradientFunctor<CUDAContext>>);
+REGISTER_CUDA_OPERATOR(Gelu, GeluOp<CUDAContext>);
+REGISTER_CUDA_OPERATOR(GeluGradient, GeluGradientOp<CUDAContext>);
} // namespace caffe2
#include "caffe2/core/operator.h"
#include "caffe2/operators/elementwise_ops.h"
+C10_DECLARE_CAFFE2_OPERATOR(Gelu);
+
namespace caffe2 {
namespace gelu_utils {
const bool fast_gelu;
};
+template <class Context>
+using GeluOp = UnaryElementwiseWithArgsOp<
+ TensorTypes<float>,
+ Context,
+ GeluFunctor<Context>>;
+
+template <class Context>
+using GeluGradientOp = BinaryElementwiseWithArgsOp<
+ TensorTypes<float>,
+ Context,
+ GeluGradientFunctor<Context>>;
+
} // namespace caffe2
#endif // CAFFE2_OPERATORS_GELU_OP_H_
import caffe2.python.hypothesis_test_util as hu
import hypothesis.strategies as st
import numpy as np
+from scipy.stats import norm
import unittest
def test_roi_align_cuda(self):
self._test_roi_align(device="cuda")
+ @given(X=hu.tensor(),
+ fast_gelu=st.booleans())
+ def _test_gelu_op(self, X, fast_gelu, device):
+ def _gelu_ref(_X):
+ return (_X * norm.cdf(_X).astype(np.float32), )
+ expected_output, = _gelu_ref(X)
+ actual_output = torch.ops._caffe2.Gelu(torch.tensor(X), fast_gelu)
+
+ rtol = 1e-3 if fast_gelu else 1e-4
+ atol = 1e-5
+ torch.testing.assert_allclose(
+ expected_output, actual_output.cpu(), rtol=rtol, atol=atol)
+
+ def test_gelu_op(self):
+ self._test_gelu_op(device="cpu")
+
+ @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
+ def test_gelu_op_cuda(self):
+ self._test_gelu_op(device="cuda")
+
if __name__ == '__main__':
unittest.main()