From 8136c39b5e22a307f5aa50b1c1ca6a4a11283eee Mon Sep 17 00:00:00 2001
From: Sebastian Messmer <messmer@fb.com>
Date: Thu, 10 Jan 2019 16:06:26 -0800
Subject: [PATCH] Enable calling caffe2 LayerNorm from PyTorch and JIT (#15243)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15243

Register it as a custom JIT op.

Reviewed By: dzhulgakov

Differential Revision: D13473791

fbshipit-source-id: 0f7e72e3efc85a75060a7597fadaf0a8bd289651
---
 c10/core/opschema/layer_norm.h        |  7 +++---
 caffe2/operators/layer_norm_op.cc     |  6 ++---
 tools/build_variables.py              |  1 +
 torch/CMakeLists.txt                  |  1 +
 torch/csrc/jit/c10_ops/layer_norm.cpp | 45 +++++++++++++++++++++++++++++++++++
 5 files changed, 53 insertions(+), 7 deletions(-)
 create mode 100644 torch/csrc/jit/c10_ops/layer_norm.cpp
diff --git a/c10/core/opschema/layer_norm.h b/c10/core/opschema/layer_norm.h
index a2fe639..a00ded0 100644
--- a/c10/core/opschema/layer_norm.h
+++ b/c10/core/opschema/layer_norm.h
@@ -29,15 +29,14 @@ struct LayerNorm final {
       const C10Tensor& output_stddev,
       int axis,
       float epsilon,
-      Cache* cache,
-      at::BaseContext* context);
+      Cache* cache);
 
   static constexpr size_t num_dispatch_args() {return 1;}
 
   static constexpr size_t num_outputs() {return 3;}
 
-  static constexpr c10::guts::array<const char*, 8> parameter_names = {
-      {"input", "output", "output_mean", "output_stddev", "axis", "epsilon", "cache", "context"}};
+  static constexpr c10::guts::array<const char*, 7> parameter_names = {
+      {"input", "output", "output_mean", "output_stddev", "axis", "epsilon", "cache"}};
 };
 
 } // namespace opschema
diff --git a/caffe2/operators/layer_norm_op.cc b/caffe2/operators/layer_norm_op.cc
index 4806844..94cd177 100644
--- a/caffe2/operators/layer_norm_op.cc
+++ b/caffe2/operators/layer_norm_op.cc
@@ -193,12 +193,12 @@ void layer_norm_c10(
     const c10::C10Tensor& sig_,
     int axis,
     float epsilon,
-    c10::core::opschema::LayerNorm::Cache* cache,
-    caffe2::BaseContext* context) {
+    c10::core::opschema::LayerNorm::Cache* cache) {
   caffe2::Tensor X(X_);
   caffe2::Tensor Y(Y_);
   caffe2::Tensor mean(mean_);
   caffe2::Tensor sig(sig_);
+  caffe2::CPUContext context;
   if (!cache->scale.has_value()) {
     cache->scale = c10::C10Tensor(caffe2::Tensor{caffe2::CPU});
   }
@@ -215,7 +215,7 @@ void layer_norm_c10(
   mean.Resize(moments_dims);
   sig.Resize(moments_dims);
   caffe2::LayerNormOp<caffe2::CPUContext>::runLayerNorm<DataType>(
-    X, &Y, &mean, &sig, canonical_axis, epsilon, &scale, &bias, static_cast<caffe2::CPUContext*>(context)
+    X, &Y, &mean, &sig, canonical_axis, epsilon, &scale, &bias, static_cast<caffe2::CPUContext*>(&context)
   );
 }
 }
diff --git a/tools/build_variables.py b/tools/build_variables.py
index 53c53a6..08a2218 100644
--- a/tools/build_variables.py
+++ b/tools/build_variables.py
@@ -97,6 +97,7 @@ torch_sources_no_python_default = [
     "torch/csrc/jit/script/lexer.cpp",
     "torch/csrc/jit/script/module.cpp",
     "torch/csrc/jit/tracer.cpp",
+    "torch/csrc/jit/c10_ops/layer_norm.cpp",
     "torch/csrc/utils/tensor_flatten.cpp",
     "torch/csrc/utils/variadic.cpp",
 ]
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index f5343a8..beaaa72 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -200,6 +200,7 @@ set(TORCH_SRCS
   ${TORCH_SRC_DIR}/csrc/jit/script/module.cpp
   ${TORCH_SRC_DIR}/csrc/jit/tracer.cpp
   ${TORCH_SRC_DIR}/csrc/jit/hooks_for_testing.cpp
+  ${TORCH_SRC_DIR}/csrc/jit/c10_ops/layer_norm.cpp
   ${TORCH_SRC_DIR}/csrc/utils/tensor_flatten.cpp
   ${TORCH_SRC_DIR}/csrc/utils/variadic.cpp
   ${TORCH_ROOT}/test/cpp/jit/no-gtest.cpp
diff --git a/torch/csrc/jit/c10_ops/layer_norm.cpp b/torch/csrc/jit/c10_ops/layer_norm.cpp
new file mode 100644
index 0000000..d0d874b
--- /dev/null
+++ b/torch/csrc/jit/c10_ops/layer_norm.cpp
@@ -0,0 +1,45 @@
+#include <c10/core/dispatch/Dispatcher.h>
+#include <c10/core/opschema/layer_norm.h>
+#include <torch/csrc/jit/custom_operator.h>
+#include <torch/csrc/autograd/variable.h>
+#include <caffe2/core/context.h>
+
+using c10::C10Tensor;
+
+namespace {
+// TODO Return tuple<Tensor, Tensor, Tensor> instead of vector<Tensor>
+std::vector<at::Tensor> layer_norm(
+    at::Tensor input,
+    int64_t axis,
+    double epsilon) {
+
+  // TODO This code is currently written specifically for LayerNorm, but it is
+  //      *not* the plan to have to write this manually for each operation.
+  //      This is just a proof of concept. To expand this to all operators,
+  //      we'd ideally not need any per-operator code (possibly thanks to boxing
+  //      or templates). If that's not possible, then we should at least offer
+  //      a macro that takes this burden so that we only need to write one line
+  //      for each operation we want to support (i.e. the macro invocation).
+
+  // TODO This currently only handles tensors with requires_grad==False correctly.
+  //      It should also handle autograd.
+
+  if (input.requires_grad()) {
+    throw std::runtime_error("Autograd not yet supported for c10 ops.");
+  }
+  c10::core::opschema::LayerNorm::Cache cache;
+  C10Tensor c10_input(torch::autograd::Variable(std::move(input)).data());
+  C10Tensor c10_output(at::empty({0}));
+  C10Tensor c10_output_mean(at::empty({0}));
+  C10Tensor c10_output_stdev(at::empty({0}));
+  c10::Dispatcher<c10::core::opschema::LayerNorm>::call(c10_input, c10_output, c10_output_mean, c10_output_stdev, (int)axis, (float)epsilon, &cache);
+  return {
+    torch::autograd::make_variable(at::Tensor(std::move(c10_output)), false),
+    torch::autograd::make_variable(at::Tensor(std::move(c10_output_mean)), false),
+    torch::autograd::make_variable(at::Tensor(std::move(c10_output_stdev)), false)
+  };
+}
+}
+
+static auto registry =
+  torch::jit::RegisterOperators("caffe2::layer_norm_dont_use_this_op_yet", &layer_norm);
-- 
2.7.4