DECLARE_DISPATCH(shrink_backward_fn, shrink_backward_stub);
DECLARE_DISPATCH(leaky_relu_fn, leaky_relu_stub);
DECLARE_DISPATCH(leaky_relu_backward_fn, leaky_relu_backward_stub);
-DECLARE_DISPATCH(activation_fn, glu_stub);
+DECLARE_DISPATCH(structured_activation_fn, glu_stub);
DECLARE_DISPATCH(activation_backward_fn, glu_backward_stub);
DECLARE_DISPATCH(structured_activation_fn, silu_stub);
DECLARE_DISPATCH(structured_activation_backward_fn, silu_backward_stub);
#include <ATen/native/Activation.h>
namespace at {
-namespace native {
-
-DEFINE_DISPATCH(glu_stub);
-DEFINE_DISPATCH(glu_backward_stub);
-Tensor& glu_out(const Tensor& self, int64_t dim, Tensor &result) {
+namespace meta {
+TORCH_META_FUNC(glu) (
+ const Tensor& self, int64_t dim
+) {
// this can't pass anyway because a 0-dimensional tensor has "size" 1, which
// can't be evenly halved, but give a nicer error message here.
TORCH_CHECK(self.dim() > 0, "glu does not support 0-dimensional tensors");
const int64_t nIn = self.size(wrap_dim);
TORCH_CHECK(nIn % 2 == 0, "Halving dimension must be even, but dimension ",
wrap_dim, " is size ", nIn);
+
// size output to half of input
const int64_t selfSize = nIn / 2;
- auto newSizes = self.sizes().vec();
- newSizes[wrap_dim] = selfSize;
- result.resize_(newSizes);
- // half tensor
Tensor firstHalf = self.narrow(wrap_dim, 0, selfSize);
Tensor secondHalf = self.narrow(wrap_dim, selfSize, selfSize);
-
- auto iter = TensorIterator::borrowing_binary_op(result, firstHalf, secondHalf);
- glu_stub(iter.device_type(), iter);
- return result;
+ build_borrowing_binary_op(maybe_get_output(), firstHalf, secondHalf);
}
+} // namespace meta
+
+namespace native {
+
+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+DEFINE_DISPATCH(glu_stub);
+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+DEFINE_DISPATCH(glu_backward_stub);
-Tensor glu(const Tensor& self, int64_t dim) {
- auto result = at::empty({0}, self.options());
- return at::glu_out(result, self, dim);
+TORCH_IMPL_FUNC(glu_out) (const Tensor& self, int64_t dim, const Tensor& out) {
+ glu_stub(device_type(), *this);
}
Tensor& glu_backward_cpu_out(const Tensor& grad_output, const Tensor& input,
// -----------------------------------
// glu forward
// -----------------------------------
-void glu_kernel(TensorIterator& iter) {
+void glu_kernel(TensorIteratorBase& iter) {
AT_DISPATCH_FLOATING_TYPES_AND2(kHalf, kBFloat16, iter.dtype(), "glu_cuda", [&]() {
using acc_t = at::acc_type<scalar_t, true>;
gpu_kernel(iter, [] GPU_LAMBDA (scalar_t a_, scalar_t b_) -> scalar_t {
CompositeExplicitAutograd: elu_
- func: glu.out(Tensor self, int dim=-1, *, Tensor(a!) out) -> Tensor(a!)
+ structured: True
+ structured_inherits: TensorIteratorBase
python_module: nn
dispatch:
CPU, CUDA: glu_out
- func: glu(Tensor self, int dim=-1) -> Tensor
+ structured_delegate: glu.out
+ device_check: NoCheck # TensorIterator
python_module: nn
- dispatch:
- CPU, CUDA: glu
- func: glu_backward.grad_input(Tensor grad_output, Tensor self, int dim, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn