port glu to use structured kernel approach (#61800)
authorMaksim Levental <maksim.levental@gmail.com>
Fri, 27 Aug 2021 00:59:59 +0000 (17:59 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Fri, 27 Aug 2021 01:01:28 +0000 (18:01 -0700)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/61800

resubmitting because the [last one](https://github.com/pytorch/pytorch/pull/61433) was unrecoverable due to making changes incorrectly in the stack

Test Plan: Imported from OSS

Reviewed By: iramazanli

Differential Revision: D29812492

Pulled By: makslevental

fbshipit-source-id: c3dfeacd1e00a526e24fbaab02dad48069d690ef

aten/src/ATen/native/Activation.h
aten/src/ATen/native/GatedLinearUnit.cpp
aten/src/ATen/native/cpu/Activation.cpp
aten/src/ATen/native/cuda/Activation.cu
aten/src/ATen/native/native_functions.yaml

index 01782fa..f0c6d82 100644 (file)
@@ -51,7 +51,7 @@ DECLARE_DISPATCH(softshrink_fn, softshrink_stub);
 DECLARE_DISPATCH(shrink_backward_fn, shrink_backward_stub);
 DECLARE_DISPATCH(leaky_relu_fn, leaky_relu_stub);
 DECLARE_DISPATCH(leaky_relu_backward_fn, leaky_relu_backward_stub);
-DECLARE_DISPATCH(activation_fn, glu_stub);
+DECLARE_DISPATCH(structured_activation_fn, glu_stub);
 DECLARE_DISPATCH(activation_backward_fn, glu_backward_stub);
 DECLARE_DISPATCH(structured_activation_fn, silu_stub);
 DECLARE_DISPATCH(structured_activation_backward_fn, silu_backward_stub);
index a0e2c16..c585caa 100644 (file)
@@ -3,12 +3,11 @@
 #include <ATen/native/Activation.h>
 
 namespace at {
-namespace native {
-
-DEFINE_DISPATCH(glu_stub);
-DEFINE_DISPATCH(glu_backward_stub);
 
-Tensor& glu_out(const Tensor& self, int64_t dim, Tensor &result) {
+namespace meta {
+TORCH_META_FUNC(glu) (
+    const Tensor& self, int64_t dim
+) {
   // this can't pass anyway because a 0-dimensional tensor has "size" 1, which
   // can't be evenly halved, but give a nicer error message here.
   TORCH_CHECK(self.dim() > 0, "glu does not support 0-dimensional tensors");
@@ -16,23 +15,24 @@ Tensor& glu_out(const Tensor& self, int64_t dim, Tensor &result) {
   const int64_t nIn = self.size(wrap_dim);
   TORCH_CHECK(nIn % 2 == 0, "Halving dimension must be even, but dimension ",
               wrap_dim, " is size ", nIn);
+
   // size output to half of input
   const int64_t selfSize = nIn / 2;
-  auto newSizes = self.sizes().vec();
-  newSizes[wrap_dim] = selfSize;
-  result.resize_(newSizes);
-  // half tensor
   Tensor firstHalf = self.narrow(wrap_dim, 0, selfSize);
   Tensor secondHalf = self.narrow(wrap_dim, selfSize, selfSize);
-
-  auto iter = TensorIterator::borrowing_binary_op(result, firstHalf, secondHalf);
-  glu_stub(iter.device_type(), iter);
-  return result;
+  build_borrowing_binary_op(maybe_get_output(), firstHalf, secondHalf);
 }
+} // namespace meta
+
+namespace native {
+
+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+DEFINE_DISPATCH(glu_stub);
+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+DEFINE_DISPATCH(glu_backward_stub);
 
-Tensor glu(const Tensor& self, int64_t dim) {
-  auto result = at::empty({0}, self.options());
-  return at::glu_out(result, self, dim);
+TORCH_IMPL_FUNC(glu_out) (const Tensor& self, int64_t dim, const Tensor& out) {
+  glu_stub(device_type(), *this);
 }
 
 Tensor& glu_backward_cpu_out(const Tensor& grad_output, const Tensor& input,
index fc5cc0d..34b5471 100644 (file)
@@ -519,7 +519,7 @@ void softplus_backward_kernel(TensorIteratorBase& iter, const Scalar& beta_, con
   });
 }
 
-void glu_kernel(TensorIterator& iter) {
+void glu_kernel(TensorIteratorBase& iter) {
   AT_DISPATCH_FLOATING_TYPES(iter.dtype(), "glu_cpu", [&] {
     using Vec = Vectorized<scalar_t>;
     const scalar_t one_val(1);
index 1229149..7c87830 100644 (file)
@@ -28,7 +28,7 @@ namespace native {
 // -----------------------------------
 // glu forward
 // -----------------------------------
-void glu_kernel(TensorIterator& iter) {
+void glu_kernel(TensorIteratorBase& iter) {
   AT_DISPATCH_FLOATING_TYPES_AND2(kHalf, kBFloat16, iter.dtype(), "glu_cuda", [&]() {
     using acc_t = at::acc_type<scalar_t, true>;
     gpu_kernel(iter, [] GPU_LAMBDA (scalar_t a_, scalar_t b_) -> scalar_t {
index 4f7d7e6..224d850 100644 (file)
     CompositeExplicitAutograd: elu_
 
 - func: glu.out(Tensor self, int dim=-1, *, Tensor(a!) out) -> Tensor(a!)
+  structured: True
+  structured_inherits: TensorIteratorBase
   python_module: nn
   dispatch:
     CPU, CUDA: glu_out
 
 - func: glu(Tensor self, int dim=-1) -> Tensor
+  structured_delegate: glu.out
+  device_check: NoCheck   # TensorIterator
   python_module: nn
-  dispatch:
-    CPU, CUDA: glu
 
 - func: glu_backward.grad_input(Tensor grad_output, Tensor self, int dim, *, Tensor(a!) grad_input) -> Tensor(a!)
   python_module: nn