From 8136c39b5e22a307f5aa50b1c1ca6a4a11283eee Mon Sep 17 00:00:00 2001 From: Sebastian Messmer Date: Thu, 10 Jan 2019 16:06:26 -0800 Subject: [PATCH] Enable calling caffe2 LayerNorm from PyTorch and JIT (#15243) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/15243 Register it as a custom JIT op. Reviewed By: dzhulgakov Differential Revision: D13473791 fbshipit-source-id: 0f7e72e3efc85a75060a7597fadaf0a8bd289651 --- c10/core/opschema/layer_norm.h | 7 +++--- caffe2/operators/layer_norm_op.cc | 6 ++--- tools/build_variables.py | 1 + torch/CMakeLists.txt | 1 + torch/csrc/jit/c10_ops/layer_norm.cpp | 45 +++++++++++++++++++++++++++++++++++ 5 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 torch/csrc/jit/c10_ops/layer_norm.cpp diff --git a/c10/core/opschema/layer_norm.h b/c10/core/opschema/layer_norm.h index a2fe639..a00ded0 100644 --- a/c10/core/opschema/layer_norm.h +++ b/c10/core/opschema/layer_norm.h @@ -29,15 +29,14 @@ struct LayerNorm final { const C10Tensor& output_stddev, int axis, float epsilon, - Cache* cache, - at::BaseContext* context); + Cache* cache); static constexpr size_t num_dispatch_args() {return 1;} static constexpr size_t num_outputs() {return 3;} - static constexpr c10::guts::array parameter_names = { - {"input", "output", "output_mean", "output_stddev", "axis", "epsilon", "cache", "context"}}; + static constexpr c10::guts::array parameter_names = { + {"input", "output", "output_mean", "output_stddev", "axis", "epsilon", "cache"}}; }; } // namespace opschema diff --git a/caffe2/operators/layer_norm_op.cc b/caffe2/operators/layer_norm_op.cc index 4806844..94cd177 100644 --- a/caffe2/operators/layer_norm_op.cc +++ b/caffe2/operators/layer_norm_op.cc @@ -193,12 +193,12 @@ void layer_norm_c10( const c10::C10Tensor& sig_, int axis, float epsilon, - c10::core::opschema::LayerNorm::Cache* cache, - caffe2::BaseContext* context) { + c10::core::opschema::LayerNorm::Cache* cache) { caffe2::Tensor X(X_); caffe2::Tensor Y(Y_); caffe2::Tensor mean(mean_); caffe2::Tensor sig(sig_); + caffe2::CPUContext context; if (!cache->scale.has_value()) { cache->scale = c10::C10Tensor(caffe2::Tensor{caffe2::CPU}); } @@ -215,7 +215,7 @@ void layer_norm_c10( mean.Resize(moments_dims); sig.Resize(moments_dims); caffe2::LayerNormOp::runLayerNorm( - X, &Y, &mean, &sig, canonical_axis, epsilon, &scale, &bias, static_cast(context) + X, &Y, &mean, &sig, canonical_axis, epsilon, &scale, &bias, static_cast(&context) ); } } diff --git a/tools/build_variables.py b/tools/build_variables.py index 53c53a6..08a2218 100644 --- a/tools/build_variables.py +++ b/tools/build_variables.py @@ -97,6 +97,7 @@ torch_sources_no_python_default = [ "torch/csrc/jit/script/lexer.cpp", "torch/csrc/jit/script/module.cpp", "torch/csrc/jit/tracer.cpp", + "torch/csrc/jit/c10_ops/layer_norm.cpp", "torch/csrc/utils/tensor_flatten.cpp", "torch/csrc/utils/variadic.cpp", ] diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index f5343a8..beaaa72 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -200,6 +200,7 @@ set(TORCH_SRCS ${TORCH_SRC_DIR}/csrc/jit/script/module.cpp ${TORCH_SRC_DIR}/csrc/jit/tracer.cpp ${TORCH_SRC_DIR}/csrc/jit/hooks_for_testing.cpp + ${TORCH_SRC_DIR}/csrc/jit/c10_ops/layer_norm.cpp ${TORCH_SRC_DIR}/csrc/utils/tensor_flatten.cpp ${TORCH_SRC_DIR}/csrc/utils/variadic.cpp ${TORCH_ROOT}/test/cpp/jit/no-gtest.cpp diff --git a/torch/csrc/jit/c10_ops/layer_norm.cpp b/torch/csrc/jit/c10_ops/layer_norm.cpp new file mode 100644 index 0000000..d0d874b --- /dev/null +++ b/torch/csrc/jit/c10_ops/layer_norm.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include + +using c10::C10Tensor; + +namespace { +// TODO Return tuple instead of vector +std::vector layer_norm( + at::Tensor input, + int64_t axis, + double epsilon) { + + // TODO This code is currently written specifically for LayerNorm, but it is + // *not* the plan to have to write this manually for each operation. + // This is just a proof of concept. To expand this to all operators, + // we'd ideally not need any per-operator code (possibly thanks to boxing + // or templates). If that's not possible, then we should at least offer + // a macro that takes this burden so that we only need to write one line + // for each operation we want to support (i.e. the macro invocation). + + // TODO This currently only handles tensors with requires_grad==False correctly. + // It should also handle autograd. + + if (input.requires_grad()) { + throw std::runtime_error("Autograd not yet supported for c10 ops."); + } + c10::core::opschema::LayerNorm::Cache cache; + C10Tensor c10_input(torch::autograd::Variable(std::move(input)).data()); + C10Tensor c10_output(at::empty({0})); + C10Tensor c10_output_mean(at::empty({0})); + C10Tensor c10_output_stdev(at::empty({0})); + c10::Dispatcher::call(c10_input, c10_output, c10_output_mean, c10_output_stdev, (int)axis, (float)epsilon, &cache); + return { + torch::autograd::make_variable(at::Tensor(std::move(c10_output)), false), + torch::autograd::make_variable(at::Tensor(std::move(c10_output_mean)), false), + torch::autograd::make_variable(at::Tensor(std::move(c10_output_stdev)), false) + }; +} +} + +static auto registry = + torch::jit::RegisterOperators("caffe2::layer_norm_dont_use_this_op_yet", &layer_norm); -- 2.7.4