From: Sebastian Messmer Date: Wed, 9 Jan 2019 04:22:41 +0000 (-0800) Subject: Use C10Tensor in the dispatcher (#15195) X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~1960 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d562840910b8743b6ea476a47c4df53a8531fd14;p=platform%2Fupstream%2Fpytorch.git Use C10Tensor in the dispatcher (#15195) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/15195 This removes the use of caffe2::Tensor or at::Tensor in the c10 dispatcher and only uses C10::Tensor. It also changes output tensors to be passed as `const Tensor&` instead of `Tensor*` because we otherwise can't forward them in operator_c10wrapper.h. Reviewed By: ezyang Differential Revision: D13461640 fbshipit-source-id: 7f79925a7d60f01660a24bbfda47391af0c70ed3 --- diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt index e80dd79..514aeee 100644 --- a/c10/CMakeLists.txt +++ b/c10/CMakeLists.txt @@ -29,6 +29,7 @@ file(GLOB C10_SRCS *.cpp core/*.cpp core/dispatch/*.cpp + core/opschema/*.cpp impl/*.cpp macros/*.cpp util/*.cpp diff --git a/c10/core/Tensor.h b/c10/core/Tensor.h index c8f9c11..461c1a8 100644 --- a/c10/core/Tensor.h +++ b/c10/core/Tensor.h @@ -27,7 +27,7 @@ public: C10Tensor& operator=(C10Tensor&&) noexcept = default; const TensorImplPtr &impl() const & noexcept; - TensorImplPtr impl() && noexcept; + TensorImplPtr&& impl() && noexcept; TensorTypeId type_id() const; @@ -42,7 +42,7 @@ inline const C10Tensor::TensorImplPtr &C10Tensor::impl() const & noexcept { return impl_; } -inline C10Tensor::TensorImplPtr C10Tensor::impl() && noexcept { +inline C10Tensor::TensorImplPtr&& C10Tensor::impl() && noexcept { return std::move(impl_); } diff --git a/c10/core/dispatch/OpSchema.h b/c10/core/dispatch/OpSchema.h index b9658d6..0825e1b 100644 --- a/c10/core/dispatch/OpSchema.h +++ b/c10/core/dispatch/OpSchema.h @@ -4,10 +4,7 @@ #include #include #include - -namespace caffe2 { -class Tensor; -} // namespace caffe2 +#include namespace c10 { @@ -19,7 +16,7 @@ namespace details { */ template using is_tensor_arg = std:: - is_same>>; + is_same>>; inline DeviceTypeId to_device_type_id(DeviceType device_type) { switch (device_type) { @@ -32,16 +29,18 @@ inline DeviceTypeId to_device_type_id(DeviceType device_type) { } } -// TODO get rid of tensor_to_dispatch_key once c2::Tensor is de-templatized. This then fits into a template lambda instead of a functor. -struct tensor_to_dispatch_key final { - template - TensorParameterDispatchKey operator()(const TensorType& tensor) const { - return TensorParameterDispatchKey{ - to_device_type_id(tensor.GetDeviceType()), - LayoutId(0), - tensor.dtype().id()}; - } -}; +inline TensorParameterDispatchKey tensor_to_dispatch_key(const C10Tensor& tensor) { + return TensorParameterDispatchKey{ + to_device_type_id(tensor.impl()->device_type()), + LayoutId(0), + tensor.impl()->dtype().id()}; +} + +// Extract type ids for all tensors from an array of tensors +template +guts::array getDispatchTypeIds__(const guts::array& tensor_args, guts::index_sequence) { + return {tensor_to_dispatch_key(*tensor_args[indices])...}; +} /** * Extract the type ids of all tensors in a variadic list of arguments @@ -50,12 +49,13 @@ struct tensor_to_dispatch_key final { * @param args List of arguments to get type ids from * @return guts::array, where n is the number of tensor arguments (is_tensor_arg) in the class */ -template auto getTensorTypeIds_(const Args&... args) --> guts::array>::value> { - return guts::filter_map(tensor_to_dispatch_key(), args...); +template +guts::array getDispatchTypeIds_(const Args&... args) { + auto tensor_args = guts::filter_map([] (const C10Tensor& v){return &v;}, args...); + return getDispatchTypeIds__(tensor_args, guts::make_index_sequence()); } -// TODO Test getTensorTypeIds_ +// TODO Test getDispatchTypeIds_ /** * If T is a struct with a type field Signature, provides the member constant @@ -121,6 +121,8 @@ public: */ static constexpr size_t num_tensor_args = guts::typelist::count_if::value; + static constexpr size_t num_outputs = OpSchemaDef::num_outputs(); + private: static_assert(details::has_parameter_names_defined::value, "Operator schema doesn't define parameter_names member."); // TODO Allow simpler definition of parameter_names without having to spell out the guts::array type in the schema def. @@ -165,7 +167,7 @@ class OpDispatchKeySchema; + using dispatch_key_type = DispatchKey; template static inline dispatch_key_type dispatch_key(const Args&... args) { @@ -176,7 +178,7 @@ public: map_t> >::value, "Invalid argument types passed to OpSchema::dispatch_key()"); return dispatch_key_type { - details::getTensorTypeIds_(args...) + details::getDispatchTypeIds_(args...) }; } }; diff --git a/c10/test/dispatch/OpSchema_test.cpp b/c10/test/dispatch/OpSchema_test.cpp index 3a56ff9..d10f7fc 100644 --- a/c10/test/dispatch/OpSchema_test.cpp +++ b/c10/test/dispatch/OpSchema_test.cpp @@ -1,26 +1,27 @@ -#include "c10/core/dispatch/OpSchema.h" +#include #include using namespace c10; -using namespace caffe2; -static_assert(details::is_tensor_arg::value, ""); -static_assert(details::is_tensor_arg::value, ""); -static_assert(details::is_tensor_arg::value, ""); +static_assert(details::is_tensor_arg::value, ""); +static_assert(details::is_tensor_arg::value, ""); +static_assert(details::is_tensor_arg::value, ""); static_assert(!details::is_tensor_arg::value, ""); struct SchemaDef final { - using Signature = bool(int, Tensor, float, Tensor, Tensor, unsigned int); + using Signature = bool(int, C10Tensor, float, C10Tensor, C10Tensor, unsigned int); static constexpr guts::array parameter_names = {{ "1", "2", "3", "4", "5", "6" }}; + static constexpr size_t num_dispatch_args() {return 3;} + static constexpr size_t num_outputs() {return 0;} }; -static_assert(6 == OpSchema::signature::num_args, "test num_dispatch_args"); -static_assert(3 == OpSchema::signature::num_tensor_args, "test num_dispatch_args"); -static_assert(std::is_same::signature::return_type>::value, "test num_dispatch_args"); +static_assert(6 == OpSchema::signature::num_args, ""); +static_assert(3 == OpSchema::signature::num_tensor_args, ""); +static_assert(std::is_same::signature::return_type>::value, ""); static_assert( std::is_same< guts::typelist:: - typelist, + typelist, typename OpSchema::signature::parameter_types>::value, - "test num_dispatch_args"); + ""); diff --git a/caffe2/core/operator_c10wrapper.h b/caffe2/core/operator_c10wrapper.h index f7089ca..88f3536 100644 --- a/caffe2/core/operator_c10wrapper.h +++ b/caffe2/core/operator_c10wrapper.h @@ -79,9 +79,7 @@ class C10OperatorWrapper final : public Operator { } static constexpr size_t num_outputs() { - return c10::guts::typelist::count_if< - details::is_output_arg, - typename Schema::signature::parameter_types>::value; + return Schema::signature::num_outputs; } bool RunOnDevice() override { @@ -122,8 +120,8 @@ class C10OperatorWrapper final : public Operator { c10::guts::index_sequence, c10::guts::index_sequence) { c10::Dispatcher::call( - Input(InputIndex)..., - Output(OutputIndex)..., + C10Tensor(Input(InputIndex))..., + C10Tensor(*Output(OutputIndex))..., std::get(parameters_)..., state_.get(), static_cast(&context_)); @@ -142,8 +140,8 @@ class C10OperatorWrapper final : public Operator { c10::guts::index_sequence, c10::guts::index_sequence) { c10::Dispatcher::call( - Input(InputIndex)..., - Output(OutputIndex)..., + C10Tensor(Input(InputIndex))..., + C10Tensor(*Output(OutputIndex))..., std::get(parameters_)..., static_cast(&context_)); } @@ -161,8 +159,8 @@ class C10OperatorWrapper final : public Operator { c10::guts::index_sequence, c10::guts::index_sequence) { c10::Dispatcher::call( - Input(InputIndex)..., - Output(OutputIndex)..., + C10Tensor(Input(InputIndex))..., + C10Tensor(*Output(OutputIndex))..., std::get(parameters_)..., state_.get()); } @@ -180,8 +178,8 @@ class C10OperatorWrapper final : public Operator { c10::guts::index_sequence, c10::guts::index_sequence) { c10::Dispatcher::call( - Input(InputIndex)..., - Output(OutputIndex)..., + C10Tensor(Input(InputIndex))..., + C10Tensor(*Output(OutputIndex))..., std::get(parameters_)...); } @@ -198,8 +196,8 @@ class C10OperatorWrapper final : public Operator { c10::guts::index_sequence, c10::guts::index_sequence) { c10::Dispatcher::call( - at::ArrayRef(array_inputs_()), - Output(OutputIndex)..., + at::ArrayRef(array_inputs_()), + C10Tensor(*Output(OutputIndex))..., std::get(parameters_)..., state_.get(), static_cast(&context_)); @@ -218,8 +216,8 @@ class C10OperatorWrapper final : public Operator { c10::guts::index_sequence, c10::guts::index_sequence) { c10::Dispatcher::call( - at::ArrayRef(array_inputs_()), - Output(OutputIndex)..., + at::ArrayRef(array_inputs_()), + C10Tensor(*Output(OutputIndex))..., std::get(parameters_)..., static_cast(&context_)); } @@ -237,8 +235,8 @@ class C10OperatorWrapper final : public Operator { c10::guts::index_sequence, c10::guts::index_sequence) { c10::Dispatcher::call( - at::ArrayRef(array_inputs_()), - Output(OutputIndex)..., + at::ArrayRef(array_inputs_()), + C10Tensor(*Output(OutputIndex))..., std::get(parameters_)..., state_.get()); } @@ -256,16 +254,16 @@ class C10OperatorWrapper final : public Operator { c10::guts::index_sequence, c10::guts::index_sequence) { c10::Dispatcher::call( - at::ArrayRef(array_inputs_()), - Output(OutputIndex)..., + at::ArrayRef(array_inputs_()), + C10Tensor(*Output(OutputIndex))..., std::get(parameters_)...); } - std::vector array_inputs_() { - std::vector result; + std::vector array_inputs_() { + std::vector result; result.reserve(InputSize()); for (size_t i = 0; i < InputSize(); ++i) { - result.push_back(&Input(i)); + result.push_back(C10Tensor(Input(i))); } return result; } diff --git a/caffe2/operators/experimental/c10/cpu/add_cpu.cc b/caffe2/operators/experimental/c10/cpu/add_cpu.cc index 76c3bc3..b55357b 100644 --- a/caffe2/operators/experimental/c10/cpu/add_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/add_cpu.cc @@ -11,12 +11,15 @@ namespace { template void add_op_cpu_impl( - const Tensor& A, - const Tensor& B, - Tensor* C, + const C10Tensor& A_, + const C10Tensor& B_, + const C10Tensor& C_, bool legacy_broadcast, int axis, BaseContext* context) { + Tensor A(A_); + Tensor B(B_); + Tensor C(C_); const DataType* A_data = A.template data(); const DataType* B_data = B.template data(); std::vector A_dims; @@ -24,11 +27,11 @@ void add_op_cpu_impl( if (legacy_broadcast) { CAFFE_ENFORCE_NE( - C, - &B, + C.getIntrusivePtr(), + B.getIntrusivePtr(), "In-place is allowed only with the first tensor when " "legacy-broadcasting"); - C->ResizeLike(A); + C.ResizeLike(A); if (B.numel() == 1) { A_dims = {static_cast(A.numel())}; B_dims = {1}; @@ -47,15 +50,15 @@ void add_op_cpu_impl( const std::vector C_dims = caffe2::elementwise_ops_utils::ComputeBinaryBroadcastForwardDims( A_dims, B_dims); - if (C == &A) { + if (C.getIntrusivePtr() == A.getIntrusivePtr()) { CAFFE_ENFORCE_EQ(C_dims, A_dims); - } else if (C == &B) { + } else if (C.getIntrusivePtr() == B.getIntrusivePtr()) { CAFFE_ENFORCE_EQ(C_dims, B_dims); } else { - C->Resize(C_dims); + C.Resize(C_dims); } } - auto* C_data = C->template mutable_data(); + auto* C_data = C.template mutable_data(); caffe2::math::Add( A_dims.size(), @@ -64,7 +67,7 @@ void add_op_cpu_impl( B_dims.data(), A.data(), B.data(), - C->mutable_data(), + C.mutable_data(), static_cast(context)); } } // namespace diff --git a/caffe2/operators/experimental/c10/cpu/averaged_loss_cpu.cc b/caffe2/operators/experimental/c10/cpu/averaged_loss_cpu.cc index 1bc2152..276eb75 100644 --- a/caffe2/operators/experimental/c10/cpu/averaged_loss_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/averaged_loss_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/averaged_loss.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::BaseContext; using caffe2::Tensor; @@ -11,25 +12,29 @@ namespace { template void averaged_loss_op_cpu_impl( - const Tensor& X, - Tensor* sum, + const C10Tensor& X_, + const C10Tensor& sum_, caffe2::ops::AveragedLoss::State* state, BaseContext* context) { - sum->Resize(vector()); + Tensor X(X_); + Tensor sum(sum_); - T* data = sum->template mutable_data(); + sum.Resize(vector()); + T* data = sum.template mutable_data(); + + Tensor scratch(state->scratch); caffe2::math::Sum( X.numel(), X.template data(), data, static_cast(context), - &state->scratch); + &scratch); if (X.numel() > 0) { caffe2::math::Scale( 1, static_cast(1.) / X.numel(), - sum->template data(), + sum.template data(), data, static_cast(context)); } diff --git a/caffe2/operators/experimental/c10/cpu/batch_gather_cpu.cc b/caffe2/operators/experimental/c10/cpu/batch_gather_cpu.cc index bfc876c..01caa33 100644 --- a/caffe2/operators/experimental/c10/cpu/batch_gather_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/batch_gather_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/batch_gather.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::BaseContext; using caffe2::Tensor; @@ -11,17 +12,21 @@ namespace { template void batch_gather_op_cpu_impl( - const Tensor& data, - const Tensor& indices, - Tensor* output, + const C10Tensor& data_, + const C10Tensor& indices_, + const C10Tensor& output_, BaseContext* context) { + Tensor data(data_); + Tensor indices(indices_); + Tensor output(output_); + CAFFE_ENFORCE_GE(data.dim(), 2, "DATA should be at least 2-D"); vector shape; shape.push_back(data.size(0)); shape.insert(shape.end(), indices.sizes().begin(), indices.sizes().end()); shape.insert(shape.end(), data.sizes().begin() + 2, data.sizes().end()); - output->Resize(shape); + output.Resize(shape); auto block_size = data.size_from_dim(2); auto block_bytesize = block_size * data.dtype().itemsize(); @@ -31,7 +36,7 @@ void batch_gather_op_cpu_impl( N * data.size_from_dim(2) * data.dtype().itemsize(); const TInd* idxs = indices.template data(); auto src_base = static_cast(data.raw_data()); - auto out = static_cast(output->raw_mutable_data(data.dtype())); + auto out = static_cast(output.raw_mutable_data(data.dtype())); for (auto batch = 0; batch < data.size(0); ++batch) { for (auto i = 0; i < N; ++i) { diff --git a/caffe2/operators/experimental/c10/cpu/batch_matmul_cpu.cc b/caffe2/operators/experimental/c10/cpu/batch_matmul_cpu.cc index 79a2f8f..476cf1e 100644 --- a/caffe2/operators/experimental/c10/cpu/batch_matmul_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/batch_matmul_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/batch_matmul.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::BaseContext; using caffe2::Tensor; @@ -12,14 +13,17 @@ namespace { template void batch_matmul_op_cpu_impl( - const Tensor& A, - const Tensor& B, - Tensor* Y, + const C10Tensor& A_, + const C10Tensor& B_, + const C10Tensor& Y_, int trans_a, int trans_b, int broadcast, caffe2::ops::BatchMatmul::State* state, BaseContext* context) { + Tensor A(A_); + Tensor B(B_); + Tensor Y(Y_); using Engine = caffe2::DefaultEngine; auto ndims_A = A.dim(); @@ -77,9 +81,9 @@ void batch_matmul_op_cpu_impl( dims_B[0], "Vector-vector product requires each of the vectors to " "be the same size."); - Y->Resize(1); + Y.Resize(1); math::Dot( - dims_A[0], data_A, data_B, Y->template mutable_data(), static_cast(context)); + dims_A[0], data_A, data_B, Y.template mutable_data(), static_cast(context)); } else { bool A_broadcasted = false, B_broadcasted = false; if (ndims_A == 1) { @@ -231,8 +235,8 @@ void batch_matmul_op_cpu_impl( } // Allocate output tensor - Y->Resize(new_dims); - auto* Y_data = Y->template mutable_data(); + Y.Resize(new_dims); + auto* Y_data = Y.template mutable_data(); // Zero batch dimension indicates no elements if (num_sub_batches == 0 || num_outer_batches == 0) { diff --git a/caffe2/operators/experimental/c10/cpu/cast_cpu.cc b/caffe2/operators/experimental/c10/cpu/cast_cpu.cc index 6f0b5ba..6dd478d 100644 --- a/caffe2/operators/experimental/c10/cpu/cast_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/cast_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/cast.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::CPUContext; using caffe2::Tensor; @@ -10,10 +11,10 @@ namespace caffe2 { namespace { template -void do_cast_(const Tensor& input, Tensor* output) { - output->ResizeLike(input); +void do_cast_(const Tensor& input, const Tensor& output) { + output.ResizeLike(input); const auto* data = input.template data(); - auto* out = output->template mutable_data(); + auto* out = output.template mutable_data(); auto N = input.numel(); for (int64_t i = 0; i < N; ++i) { out[i] = static_cast(data[i]); @@ -22,9 +23,12 @@ void do_cast_(const Tensor& input, Tensor* output) { template void cast_op_cpu_impl( - const Tensor& input, - Tensor* output, + const C10Tensor& input_, + const C10Tensor& output_, TensorProto_DataType to) { + Tensor input(input_); + Tensor output(output_); + switch (to) { case caffe2::TensorProto_DataType_FLOAT: do_cast_(input, output); diff --git a/caffe2/operators/experimental/c10/cpu/concat_cpu.cc b/caffe2/operators/experimental/c10/cpu/concat_cpu.cc index cc3667b..9179cc4 100644 --- a/caffe2/operators/experimental/c10/cpu/concat_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/concat_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/concat.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::BaseContext; using caffe2::CPUContext; @@ -12,35 +13,38 @@ namespace caffe2 { namespace { template void concat_op_cpu_impl( - at::ArrayRef inputs, - Tensor* output, - Tensor* split, + at::ArrayRef inputs, + const C10Tensor& output_, + const C10Tensor& split_, int axis, int add_axis, BaseContext* context) { - split->Resize(vector(1, inputs.size())); - int* axis_data = split->template mutable_data(); - int adj_size = inputs[0]->dim() + (add_axis ? 1 : 0); + Tensor output(output_); + Tensor split(split_); + + split.Resize(vector(1, inputs.size())); + int* axis_data = split.template mutable_data(); + int adj_size = Tensor(inputs[0]).dim() + (add_axis ? 1 : 0); int canonical_axis = caffe2::canonical_axis_index_(axis, adj_size); CAFFE_ENFORCE_LT(canonical_axis, adj_size, "Axis not in input ndim range."); for (int i = 1; i < inputs.size(); ++i) { CAFFE_ENFORCE( - inputs[i]->dtype() == inputs[0]->dtype(), + Tensor(inputs[i]).dtype() == Tensor(inputs[0]).dtype(), "All inputs must have the same type, expected: ", - inputs[0]->dtype().name(), + Tensor(inputs[0]).dtype().name(), " but got: ", - inputs[i]->dtype().name(), + Tensor(inputs[i]).dtype().name(), " for input: ", i); } int before = 1, after = 1; - vector output_dims(inputs[0]->sizes().vec()); - for (int i = 0; i < inputs[0]->dim(); ++i) { + vector output_dims(Tensor(inputs[0]).sizes().vec()); + for (int i = 0; i < Tensor(inputs[0]).dim(); ++i) { if (i == canonical_axis && !add_axis) { continue; } - int dim = inputs[0]->dim32(i); + int dim = Tensor(inputs[0]).dim32(i); if (i < canonical_axis) { before *= dim; } else { // i > canonical_axis || i == canonical_axis && add_axis @@ -48,7 +52,7 @@ void concat_op_cpu_impl( } // check the input dims are compatible. for (int j = 1; j < inputs.size(); ++j) { - int dim_j = inputs[j]->dim32(i); + int dim_j = Tensor(inputs[j]).dim32(i); CAFFE_ENFORCE( dim == dim_j, "Expect dimension = ", @@ -63,16 +67,16 @@ void concat_op_cpu_impl( "when arg 'add_axis' = 0 and along the axis = ", canonical_axis, " <", - inputs[0]->sizes(), + Tensor(inputs[0]).sizes(), "> vs <", - inputs[j]->sizes(), + Tensor(inputs[j]).sizes(), ">."); } } int output_channels = 0; for (int i = 0; i < inputs.size(); ++i) { - axis_data[i] = add_axis ? 1 : inputs[i]->dim32(canonical_axis); + axis_data[i] = add_axis ? 1 : Tensor(inputs[i]).dim32(canonical_axis); output_channels += axis_data[i]; } if (add_axis) { @@ -80,10 +84,10 @@ void concat_op_cpu_impl( } else { output_dims[canonical_axis] = output_channels; } - output->Resize(output_dims); + output.Resize(output_dims); size_t output_offset = 0; for (int i = 0; i < inputs.size(); ++i) { - auto& input = *inputs[i]; + Tensor input(inputs[i]); auto axis_dim = add_axis ? 1 : input.dim32(canonical_axis); caffe2::math::CopyMatrix( input.itemsize(), @@ -91,11 +95,11 @@ void concat_op_cpu_impl( axis_dim * after, input.raw_data(), axis_dim * after, - static_cast(output->raw_mutable_data(inputs[0]->dtype())) + + static_cast(output.raw_mutable_data(Tensor(inputs[0]).dtype())) + output_offset, output_channels * after, static_cast(context), - inputs[0]->dtype().copy()); + Tensor(inputs[0]).dtype().copy()); output_offset += axis_dim * after * input.itemsize(); } } diff --git a/caffe2/operators/experimental/c10/cpu/enforce_finite_cpu.cc b/caffe2/operators/experimental/c10/cpu/enforce_finite_cpu.cc index be3b9c2..727ff3a 100644 --- a/caffe2/operators/experimental/c10/cpu/enforce_finite_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/enforce_finite_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/enforce_finite.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::CPUContext; using caffe2::Tensor; @@ -8,7 +9,8 @@ using caffe2::Tensor; namespace caffe2 { namespace { template -void enforce_finite_op_impl_cpu(const Tensor& input) { +void enforce_finite_op_impl_cpu(const C10Tensor& input_) { + Tensor input(input_); const DataType* input_data = input.template data(); auto size = input.numel(); diff --git a/caffe2/operators/experimental/c10/cpu/expand_dims_cpu.cc b/caffe2/operators/experimental/c10/cpu/expand_dims_cpu.cc index 2a72f43..5350ac7 100644 --- a/caffe2/operators/experimental/c10/cpu/expand_dims_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/expand_dims_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/expand_dims.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::BaseContext; using caffe2::Tensor; @@ -9,11 +10,14 @@ namespace caffe2 { namespace { template void expand_dims_op_cpu_impl( - const Tensor& input, - Tensor* output, + const C10Tensor& input_, + const C10Tensor& output_, const std::vector& dims, caffe2::ops::ExpandDims::State* state, BaseContext* context) { + Tensor input(input_); + Tensor output(output_); + if (!state->initialized) { state->dims = dims; auto originalSize = state->dims.size(); @@ -29,7 +33,7 @@ void expand_dims_op_cpu_impl( state->initialized = true; } - output->CopyFrom(input, context); + output.CopyFrom(input, context); if (state->dims.empty()) { return; } @@ -44,7 +48,7 @@ void expand_dims_op_cpu_impl( for (const auto dim : state->dims) { newDims.insert(newDims.begin() + dim, 1); } - output->Reshape(newDims); + output.Reshape(newDims); } } // namespace } // namespace caffe2 diff --git a/caffe2/operators/experimental/c10/cpu/fc_cpu.cc b/caffe2/operators/experimental/c10/cpu/fc_cpu.cc index 5d0e244..3deb2a9 100644 --- a/caffe2/operators/experimental/c10/cpu/fc_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/fc_cpu.cc @@ -4,6 +4,7 @@ #include "caffe2/operators/experimental/c10/schemas/fc.h" #include "caffe2/utils/conversions.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::BaseContext; using caffe2::Tensor; @@ -12,14 +13,19 @@ namespace caffe2 { namespace { template void fc_op_cpu_impl( - const Tensor& X, - const Tensor& W, - const Tensor& b, - Tensor* Y, + const C10Tensor& X_, + const C10Tensor& W_, + const C10Tensor& b_, + const C10Tensor& Y_, int axis, int axis_w, caffe2::ops::FullyConnected::Cache* cache, BaseContext* context) { + Tensor X(X_); + Tensor W(W_); + Tensor b(b_); + Tensor Y(Y_); + constexpr bool TransposeWeight = true; CAFFE_ENFORCE(b.dim() == 1, b.dim()); @@ -61,12 +67,12 @@ void fc_op_cpu_impl( DCHECK_LE(canonical_axis + 1, cache->Y_shape_cache_.size()); cache->Y_shape_cache_.resize(canonical_axis + 1); cache->Y_shape_cache_[canonical_axis] = N; - Y->Resize(cache->Y_shape_cache_); - CAFFE_ENFORCE(M * N == Y->numel(), dimErrorString()); + Y.Resize(cache->Y_shape_cache_); + CAFFE_ENFORCE(M * N == Y.numel(), dimErrorString()); if (X.numel() == 0) { // skip the rest of the computation if X is empty - Y->template mutable_data(); + Y.template mutable_data(); return; } @@ -87,17 +93,18 @@ void fc_op_cpu_impl( X.template data(), W.template data(), 0, - Y->template mutable_data(), + Y.template mutable_data(), static_cast(context), math_type); // Add bias term - if (cache->bias_multiplier_.numel() != M) { + Tensor bias_multiplier(cache->bias_multiplier_); + if (bias_multiplier.numel() != M) { // If the helper bias multiplier is not M, reshape and fill it with one. - cache->bias_multiplier_.Resize(M); + bias_multiplier.Resize(M); caffe2::math::Set( M, caffe2::convert::To(1), - cache->bias_multiplier_.template mutable_data(), + bias_multiplier.template mutable_data(), static_cast(context)); } caffe2::math::Gemm( @@ -107,10 +114,10 @@ void fc_op_cpu_impl( N, 1, 1, - cache->bias_multiplier_.template data(), + bias_multiplier.template data(), b.template data(), 1, - Y->template mutable_data(), + Y.template mutable_data(), static_cast(context), math_type); } diff --git a/caffe2/operators/experimental/c10/cpu/filler_cpu.cc b/caffe2/operators/experimental/c10/cpu/filler_cpu.cc index e4d1496..6e2c223 100644 --- a/caffe2/operators/experimental/c10/cpu/filler_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/filler_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/filler.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::CPUContext; using caffe2::Tensor; @@ -10,16 +11,17 @@ using std::vector; namespace caffe2 { namespace { void filler_init( - at::ArrayRef inputs, - Tensor* output, + at::ArrayRef inputs, + const C10Tensor& output_, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape) { + Tensor output(output_); if (inputs.size()) { auto real_shape = vector{}; if (input_as_shape) { // Shape input must be in CPU context - auto& input = *inputs[0]; + Tensor input(inputs[0]); CAFFE_ENFORCE_EQ( input.dim(), 1, @@ -29,75 +31,80 @@ void filler_init( real_shape.insert( real_shape.end(), shape_data, shape_data + input.dim32(0)); } else { - auto& input = *inputs[0]; + Tensor input(inputs[0]); real_shape.insert( real_shape.end(), input.sizes().begin(), input.sizes().end()); } real_shape.insert(real_shape.end(), extra_shape.begin(), extra_shape.end()); - output->Resize(real_shape); + output.Resize(real_shape); } else { - output->Resize(shape); + output.Resize(shape); } } template void given_tensor_fill_op_cpu_impl( - at::ArrayRef inputs, - Tensor* output, + at::ArrayRef inputs, + const C10Tensor& output_, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape, - const Tensor& values, + const C10Tensor& values_, BaseContext* context) { - filler_init(inputs, output, shape, extra_shape, input_as_shape); + Tensor output(output_); + Tensor values(values_); + + filler_init(inputs, output_, shape, extra_shape, input_as_shape); // TODO T might not be the correct type to call, since float allows others. - DCHECK_EQ(output->numel(), values.numel()) - << "output size: " << output->numel() + DCHECK_EQ(output.numel(), values.numel()) + << "output size: " << output.numel() << " given size: " << values.numel(); - auto* data = output->template mutable_data(); + auto* data = output.template mutable_data(); const Type* values_data = values.template data(); - if (output->numel()) { - context->CopySameDevice(output->numel(), values_data, data); + if (output.numel()) { + context->CopySameDevice(output.numel(), values_data, data); } } void constant_fill_op_cpu_impl( - at::ArrayRef inputs, - Tensor* output, + at::ArrayRef inputs, + const C10Tensor& output_, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape, int dtype, caffe2::ops::ConstantFill::Value value, BaseContext* context) { - filler_init(inputs, output, shape, extra_shape, input_as_shape); + Tensor output(output_); + + filler_init(inputs, output_, shape, extra_shape, input_as_shape); - if (output->numel()) { + if (output.numel()) { if (dtype == caffe2::TensorProto_DataType_FLOAT) { caffe2::math::Set( - output->numel(), + output.numel(), value.as_float, - output->template mutable_data(), + output.template mutable_data(), static_cast(context)); } else if (dtype == caffe2::TensorProto_DataType_INT32) { caffe2::math::Set( - output->numel(), + output.numel(), value.as_int32, - output->template mutable_data(), + output.template mutable_data(), static_cast(context)); } else if (dtype == caffe2::TensorProto_DataType_INT64) { caffe2::math::Set( - output->numel(), + output.numel(), value.as_int64, - output->template mutable_data(), + output.template mutable_data(), static_cast(context)); } else if (dtype == caffe2::TensorProto_DataType_BOOL) { caffe2::math::Set( - output->numel(), + output.numel(), value.as_bool, - output->template mutable_data(), + output.template mutable_data(), static_cast(context)); } else { throw std::logic_error( @@ -108,34 +115,36 @@ void constant_fill_op_cpu_impl( } void uniform_fill_op_cpu_impl( - at::ArrayRef inputs, - Tensor* output, + at::ArrayRef inputs, + const C10Tensor& output_, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape, float min, float max, BaseContext* context) { - filler_init(inputs, output, shape, extra_shape, input_as_shape); + Tensor output(output_); + + filler_init(inputs, output_, shape, extra_shape, input_as_shape); if (inputs.size() == 3) { - CAFFE_ENFORCE_EQ(1, inputs[1]->numel(), "min blob must be scalar"); - CAFFE_ENFORCE_EQ(1, inputs[2]->numel(), "max blob must be scalar"); - min = *inputs[1]->template data(); - max = *inputs[2]->template data(); + CAFFE_ENFORCE_EQ(1, Tensor(inputs[1]).numel(), "min blob must be scalar"); + CAFFE_ENFORCE_EQ(1, Tensor(inputs[2]).numel(), "max blob must be scalar"); + min = *Tensor(inputs[1]).template data(); + max = *Tensor(inputs[2]).template data(); if (min > max) { - auto shape = output->sizes().vec(); + auto shape = output.sizes().vec(); shape[0] = 0; - output->Resize(shape); - output->template mutable_data(); + output.Resize(shape); + output.template mutable_data(); return; } } caffe2::math::RandUniform( - output->numel(), + output.numel(), min, max, - output->template mutable_data(), + output.template mutable_data(), static_cast(context)); } } // namespace diff --git a/caffe2/operators/experimental/c10/cpu/flatten_cpu.cc b/caffe2/operators/experimental/c10/cpu/flatten_cpu.cc index abab1ff..a1b1ae0 100644 --- a/caffe2/operators/experimental/c10/cpu/flatten_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/flatten_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/flatten.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::BaseContext; using caffe2::Tensor; @@ -9,18 +10,20 @@ namespace caffe2 { namespace { template void flatten_op_cpu_impl( - const Tensor& input, - Tensor* output, + const C10Tensor& input_, + const C10Tensor& output_, int axis, BaseContext* context) { + Tensor input(input_); + Tensor output(output_); CAFFE_ENFORCE_GE( input.sizes().size(), axis, "The rank of the tensor must be >= axis."); - output->Resize(input.size_to_dim(axis), input.size_from_dim(axis)); + output.Resize(input.size_to_dim(axis), input.size_from_dim(axis)); context->CopyItemsSameDevice( input.dtype(), input.numel(), input.raw_data(), - output->raw_mutable_data(input.dtype())); + output.raw_mutable_data(input.dtype())); } } // namespace } // namespace caffe2 diff --git a/caffe2/operators/experimental/c10/cpu/mul_cpu.cc b/caffe2/operators/experimental/c10/cpu/mul_cpu.cc index 682e013..1633449 100644 --- a/caffe2/operators/experimental/c10/cpu/mul_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/mul_cpu.cc @@ -2,6 +2,7 @@ #include "caffe2/operators/elementwise_ops_utils.h" #include "caffe2/operators/experimental/c10/schemas/mul.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::BaseContext; using caffe2::Tensor; @@ -11,12 +12,15 @@ namespace { template void mul_op_cpu_impl( - const Tensor& A, - const Tensor& B, - Tensor* C, + const C10Tensor& A_, + const C10Tensor& B_, + const C10Tensor& C_, bool legacy_broadcast, int axis, BaseContext* context) { + Tensor A(A_); + Tensor B(B_); + Tensor C(C_); const DataType* A_data = A.template data(); const DataType* B_data = B.template data(); std::vector A_dims; @@ -24,11 +28,11 @@ void mul_op_cpu_impl( if (legacy_broadcast) { CAFFE_ENFORCE_NE( - C, - &B, + C.getIntrusivePtr(), + B.getIntrusivePtr(), "In-place is allowed only with the first tensor when " "legacy-broadcasting"); - C->ResizeLike(A); + C.ResizeLike(A); if (B.numel() == 1) { A_dims = {static_cast(A.numel())}; B_dims = {1}; @@ -47,15 +51,15 @@ void mul_op_cpu_impl( const std::vector C_dims = caffe2::elementwise_ops_utils::ComputeBinaryBroadcastForwardDims( A_dims, B_dims); - if (C == &A) { + if (C.getIntrusivePtr() == A.getIntrusivePtr()) { CAFFE_ENFORCE_EQ(C_dims, A_dims); - } else if (C == &B) { + } else if (C.getIntrusivePtr() == B.getIntrusivePtr()) { CAFFE_ENFORCE_EQ(C_dims, B_dims); } else { - C->Resize(C_dims); + C.Resize(C_dims); } } - auto* C_data = C->template mutable_data(); + auto* C_data = C.template mutable_data(); caffe2::math::Mul( A_dims.size(), @@ -64,7 +68,7 @@ void mul_op_cpu_impl( B_dims.data(), A.data(), B.data(), - C->mutable_data(), + C.mutable_data(), static_cast(context)); } } // namespace diff --git a/caffe2/operators/experimental/c10/cpu/relu_cpu.cc b/caffe2/operators/experimental/c10/cpu/relu_cpu.cc index 46ad7a3..f8182d2 100644 --- a/caffe2/operators/experimental/c10/cpu/relu_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/relu_cpu.cc @@ -2,6 +2,7 @@ #include "caffe2/operators/experimental/c10/schemas/relu.h" #include "caffe2/utils/eigen_utils.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::Tensor; @@ -9,9 +10,12 @@ namespace caffe2 { namespace { template void relu_op_cpu_impl( - const Tensor& input, - Tensor* output) { - output->ResizeLike(input); + const C10Tensor& input_, + const C10Tensor& output_) { + Tensor input(input_); + Tensor output(output_); + + output.ResizeLike(input); #ifdef CAFFE2_USE_ACCELERATE const float zero = 0.0f; @@ -19,17 +23,17 @@ void relu_op_cpu_impl( input.data(), 1, &zero, - output->mutable_data(), + output.mutable_data(), 1, input.size()); #else - caffe2::EigenVectorMap(output->mutable_data(), input.numel()) = + caffe2::EigenVectorMap(output.mutable_data(), input.numel()) = caffe2::ConstEigenVectorMap(input.data(), input.numel()) .cwiseMax(0.f); #endif /* Naive implementation const float* input_data = input.data(); - float* output_data = output->mutable_data(); + float* output_data = output.mutable_data(); for (int i = 0; i < input.size(); ++i) { output_data[i] = std::max(input_data[i], 0.f); } diff --git a/caffe2/operators/experimental/c10/cpu/sigmoid_cpu.cc b/caffe2/operators/experimental/c10/cpu/sigmoid_cpu.cc index 68a3ee1..2a2a035 100644 --- a/caffe2/operators/experimental/c10/cpu/sigmoid_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/sigmoid_cpu.cc @@ -2,6 +2,7 @@ #include "caffe2/operators/experimental/c10/schemas/sigmoid.h" #include "caffe2/utils/eigen_utils.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::Tensor; @@ -9,14 +10,16 @@ namespace caffe2 { namespace { template void sigmoid_op_cpu_impl( - const Tensor& input, - Tensor* output) { - output->ResizeLike(input); + const C10Tensor& input_, + const C10Tensor& output_) { + Tensor input(input_); + Tensor output(output_); + output.ResizeLike(input); caffe2::ConstEigenVectorArrayMap xM( input.data(), input.numel()); caffe2::EigenVectorArrayMap( - output->mutable_data(), input.numel()) = + output.mutable_data(), input.numel()) = 1. / (1. + (-xM).exp()); } } // namespace diff --git a/caffe2/operators/experimental/c10/cpu/sigmoid_cross_entropy_with_logits_cpu.cc b/caffe2/operators/experimental/c10/cpu/sigmoid_cross_entropy_with_logits_cpu.cc index 182b17f..7fec595 100644 --- a/caffe2/operators/experimental/c10/cpu/sigmoid_cross_entropy_with_logits_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/sigmoid_cross_entropy_with_logits_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::Tensor; @@ -25,22 +26,26 @@ inline float unjoined_sigmoid_xent_forward(float lgt, float tgt) { } void sigmoid_cross_entropy_with_logits_op_cpu_impl( - const Tensor& logits, - const Tensor& targets, - Tensor* out, + const C10Tensor& logits_, + const C10Tensor& targets_, + const C10Tensor& out_, bool log_D_trick, bool unjoined_lr_loss) { + Tensor logits(logits_); + Tensor targets(targets_); + Tensor out(out_); + CAFFE_ENFORCE_EQ(logits.sizes(), targets.sizes()); const auto inner_size = logits.dim() > 0 ? logits.sizes().back() : 1; const auto outer_size = logits.numel() / inner_size; if (logits.dim() == 0) { - out->Resize(std::vector{}); + out.Resize(std::vector{}); } else { std::vector dims(logits.sizes().begin(), logits.sizes().end() - 1); - out->Resize(dims); + out.Resize(dims); } - auto* out_ptr = out->mutable_data(); + auto* out_ptr = out.mutable_data(); auto* logits_ptr = logits.data(); auto* targets_ptr = targets.data(); diff --git a/caffe2/operators/experimental/c10/cpu/sparse_lengths_sum_cpu.cc b/caffe2/operators/experimental/c10/cpu/sparse_lengths_sum_cpu.cc index 669d3b2..2bc2307 100644 --- a/caffe2/operators/experimental/c10/cpu/sparse_lengths_sum_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/sparse_lengths_sum_cpu.cc @@ -2,6 +2,7 @@ #include "caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.h" #include "caffe2/perfkernels/embedding_lookup.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::Tensor; @@ -10,10 +11,15 @@ namespace { template void sparse_lengths_sum_op_cpu_impl( - const Tensor& dataInput, - const Tensor& indicesInput, - const Tensor& lengthsInput, - Tensor* output) { + const C10Tensor& dataInput_, + const C10Tensor& indicesInput_, + const C10Tensor& lengthsInput_, + const C10Tensor& output_) { + Tensor dataInput(dataInput_); + Tensor indicesInput(indicesInput_); + Tensor lengthsInput(lengthsInput_); + Tensor output(output_); + using T = float; constexpr bool USE_MEAN = false; constexpr bool USE_POSITIONAL_WEIGHT = false; @@ -27,8 +33,8 @@ void sparse_lengths_sum_op_cpu_impl( auto shape = dataInput.sizes().vec(); shape[0] = M; - output->Resize(shape); - T* out_data = output->template mutable_data(); + output.Resize(shape); + T* out_data = output.template mutable_data(); const InputType* in_data = dataInput.template data(); const IndexType* indices = indicesInput.template data(); diff --git a/caffe2/operators/experimental/c10/cpu/stop_gradient_cpu.cc b/caffe2/operators/experimental/c10/cpu/stop_gradient_cpu.cc index c26c6df..3f0fa0b 100644 --- a/caffe2/operators/experimental/c10/cpu/stop_gradient_cpu.cc +++ b/caffe2/operators/experimental/c10/cpu/stop_gradient_cpu.cc @@ -1,6 +1,7 @@ #include #include "caffe2/operators/experimental/c10/schemas/stop_gradient.h" #include "caffe2/utils/math.h" +#include "caffe2/core/tensor.h" using caffe2::BaseContext; using caffe2::Tensor; @@ -9,11 +10,13 @@ namespace caffe2 { namespace { template void stop_gradient_op_cpu_impl( - const Tensor& input, - Tensor* output, + const C10Tensor& input_, + const C10Tensor& output_, BaseContext* context) { - if (output != &input) { - output->CopyFrom(input, context); + Tensor input(input_); + Tensor output(output_); + if (output.getIntrusivePtr() != input.getIntrusivePtr()) { + output.CopyFrom(input, context); } } } // namespace diff --git a/caffe2/operators/experimental/c10/schemas/add.cc b/caffe2/operators/experimental/c10/schemas/add.cc index 8b6fa0c..fd3e7dc 100644 --- a/caffe2/operators/experimental/c10/schemas/add.cc +++ b/caffe2/operators/experimental/c10/schemas/add.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::Add); diff --git a/caffe2/operators/experimental/c10/schemas/add.h b/caffe2/operators/experimental/c10/schemas/add.h index 017c959..b1334e0 100644 --- a/caffe2/operators/experimental/c10/schemas/add.h +++ b/caffe2/operators/experimental/c10/schemas/add.h @@ -1,7 +1,8 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { @@ -10,13 +11,17 @@ struct Add final { static constexpr const char* name = "add"; using Signature = void( - const Tensor& input1, - const Tensor& input2, - Tensor* output, + const C10Tensor& input1, + const C10Tensor& input2, + const C10Tensor& output, bool legacy_broadcast, int axis, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 2;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"input1", "input2", "output", "legacy_broadcast", "axis", "context"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/averaged_loss.cc b/caffe2/operators/experimental/c10/schemas/averaged_loss.cc index eab6dc4..7a10f36 100644 --- a/caffe2/operators/experimental/c10/schemas/averaged_loss.cc +++ b/caffe2/operators/experimental/c10/schemas/averaged_loss.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::AveragedLoss); diff --git a/caffe2/operators/experimental/c10/schemas/averaged_loss.h b/caffe2/operators/experimental/c10/schemas/averaged_loss.h index 8f39d48..1f87651 100644 --- a/caffe2/operators/experimental/c10/schemas/averaged_loss.h +++ b/caffe2/operators/experimental/c10/schemas/averaged_loss.h @@ -1,24 +1,30 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" +#include "caffe2/core/tensor.h" namespace caffe2 { namespace ops { struct AveragedLoss final { struct State final { - Tensor scratch = Tensor{CPU}; + C10Tensor scratch = C10Tensor(empty({}, CPU)); }; static constexpr const char* name = "averaged_loss"; using Signature = void( - const Tensor& input, - Tensor* output, + const C10Tensor& input, + const C10Tensor& output, State* state, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 1;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"input", "output", "state", "context"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/batch_gather.cc b/caffe2/operators/experimental/c10/schemas/batch_gather.cc index 81bd82f..070e9ce 100644 --- a/caffe2/operators/experimental/c10/schemas/batch_gather.cc +++ b/caffe2/operators/experimental/c10/schemas/batch_gather.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::BatchGather); diff --git a/caffe2/operators/experimental/c10/schemas/batch_gather.h b/caffe2/operators/experimental/c10/schemas/batch_gather.h index 3107f5c..e51c0a4 100644 --- a/caffe2/operators/experimental/c10/schemas/batch_gather.h +++ b/caffe2/operators/experimental/c10/schemas/batch_gather.h @@ -1,7 +1,8 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { @@ -10,11 +11,15 @@ struct BatchGather final { static constexpr const char* name = "batch_gather"; using Signature = void( - const Tensor& data, - const Tensor& indices, - Tensor* output, + const C10Tensor& data, + const C10Tensor& indices, + const C10Tensor& output, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 2;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"data", "indices", "output", "context"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/batch_matmul.cc b/caffe2/operators/experimental/c10/schemas/batch_matmul.cc index 8edcba5..80747df 100644 --- a/caffe2/operators/experimental/c10/schemas/batch_matmul.cc +++ b/caffe2/operators/experimental/c10/schemas/batch_matmul.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::BatchMatmul); diff --git a/caffe2/operators/experimental/c10/schemas/batch_matmul.h b/caffe2/operators/experimental/c10/schemas/batch_matmul.h index 6048a14..6788f4a 100644 --- a/caffe2/operators/experimental/c10/schemas/batch_matmul.h +++ b/caffe2/operators/experimental/c10/schemas/batch_matmul.h @@ -1,28 +1,33 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { struct BatchMatmul final { struct State final { - std::shared_ptr scratch; + std::shared_ptr scratch; }; static constexpr const char* name = "batch_matmul"; using Signature = void( - const Tensor& A, - const Tensor& B, - Tensor* output, + const C10Tensor& A, + const C10Tensor& B, + const C10Tensor& output, int trans_a, int trans_b, int broadcast, State* state, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 2;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"A", "B", diff --git a/caffe2/operators/experimental/c10/schemas/cast.cc b/caffe2/operators/experimental/c10/schemas/cast.cc index 802556c..73a1815 100644 --- a/caffe2/operators/experimental/c10/schemas/cast.cc +++ b/caffe2/operators/experimental/c10/schemas/cast.cc @@ -4,7 +4,6 @@ #include "caffe2/utils/cast.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::Cast); diff --git a/caffe2/operators/experimental/c10/schemas/cast.h b/caffe2/operators/experimental/c10/schemas/cast.h index 3b7cacb..1e8204b 100644 --- a/caffe2/operators/experimental/c10/schemas/cast.h +++ b/caffe2/operators/experimental/c10/schemas/cast.h @@ -1,7 +1,8 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { @@ -10,10 +11,14 @@ struct Cast final { static constexpr const char* name = "cast"; using Signature = void( - const Tensor& input1, - Tensor* output, + const C10Tensor& input1, + const C10Tensor& output, TensorProto_DataType to); + static constexpr size_t num_dispatch_args() {return 1;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"input", "output", "to"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/concat.cc b/caffe2/operators/experimental/c10/schemas/concat.cc index 0782432..fad2b17 100644 --- a/caffe2/operators/experimental/c10/schemas/concat.cc +++ b/caffe2/operators/experimental/c10/schemas/concat.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::Concat); diff --git a/caffe2/operators/experimental/c10/schemas/concat.h b/caffe2/operators/experimental/c10/schemas/concat.h index 9965e3f..e739658 100644 --- a/caffe2/operators/experimental/c10/schemas/concat.h +++ b/caffe2/operators/experimental/c10/schemas/concat.h @@ -1,9 +1,10 @@ #pragma once #include -#include "caffe2/core/tensor.h" +#include #include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { @@ -12,20 +13,22 @@ struct Concat final { static constexpr const char* name = "concat"; using Signature = void( - at::ArrayRef inputs, - Tensor* output, - Tensor* split_info, + at::ArrayRef inputs, + const C10Tensor& output, + const C10Tensor& split_info, int add, int add_axis, BaseContext* context); + static constexpr size_t num_outputs() {return 2;} + static constexpr c10::guts::array parameter_names = { {"inputs", "output", "split_info_output", "add", "add_axis", "context"}}; static c10::DeviceTypeId dispatch_key( - at::ArrayRef inputs, - Tensor* output, - Tensor* split_info, + at::ArrayRef inputs, + const C10Tensor& output, + const C10Tensor& split_info, int add, int add_axis, BaseContext* context) { diff --git a/caffe2/operators/experimental/c10/schemas/enforce_finite.cc b/caffe2/operators/experimental/c10/schemas/enforce_finite.cc index 429d051..cf6c745 100644 --- a/caffe2/operators/experimental/c10/schemas/enforce_finite.cc +++ b/caffe2/operators/experimental/c10/schemas/enforce_finite.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::EnforceFinite); diff --git a/caffe2/operators/experimental/c10/schemas/enforce_finite.h b/caffe2/operators/experimental/c10/schemas/enforce_finite.h index aefa67c..2e3f0da 100644 --- a/caffe2/operators/experimental/c10/schemas/enforce_finite.h +++ b/caffe2/operators/experimental/c10/schemas/enforce_finite.h @@ -1,6 +1,6 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include namespace caffe2 { @@ -9,7 +9,11 @@ namespace ops { struct EnforceFinite final { static constexpr const char* name = "enforce_finite"; - using Signature = void(const Tensor& input); + using Signature = void(const C10Tensor& input); + + static constexpr size_t num_dispatch_args() {return 1;} + + static constexpr size_t num_outputs() {return 0;} static constexpr c10::guts::array parameter_names = { {"input"}}; diff --git a/caffe2/operators/experimental/c10/schemas/expand_dims.cc b/caffe2/operators/experimental/c10/schemas/expand_dims.cc index f7c3119..b6e56cf 100644 --- a/caffe2/operators/experimental/c10/schemas/expand_dims.cc +++ b/caffe2/operators/experimental/c10/schemas/expand_dims.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::ExpandDims); diff --git a/caffe2/operators/experimental/c10/schemas/expand_dims.h b/caffe2/operators/experimental/c10/schemas/expand_dims.h index ab9dcba..6684652 100644 --- a/caffe2/operators/experimental/c10/schemas/expand_dims.h +++ b/caffe2/operators/experimental/c10/schemas/expand_dims.h @@ -1,7 +1,8 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { @@ -15,12 +16,16 @@ struct ExpandDims final { static constexpr const char* name = "expand_dims"; using Signature = void( - const Tensor& input, - Tensor* output, + const C10Tensor& input, + const C10Tensor& output, const std::vector& dims, State* state, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 1;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"input", "output", "dims", "state", "context"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/fc.cc b/caffe2/operators/experimental/c10/schemas/fc.cc index 2447e78..ee0e28c 100644 --- a/caffe2/operators/experimental/c10/schemas/fc.cc +++ b/caffe2/operators/experimental/c10/schemas/fc.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::FullyConnected); diff --git a/caffe2/operators/experimental/c10/schemas/fc.h b/caffe2/operators/experimental/c10/schemas/fc.h index 638855c..bea1353 100644 --- a/caffe2/operators/experimental/c10/schemas/fc.h +++ b/caffe2/operators/experimental/c10/schemas/fc.h @@ -1,7 +1,8 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/tensor.h" namespace caffe2 { namespace ops { @@ -11,19 +12,23 @@ struct FullyConnected final { struct Cache final { vector Y_shape_cache_; - Tensor bias_multiplier_ = Tensor{CPU}; + C10Tensor bias_multiplier_ = C10Tensor(Tensor{CPU}); }; using Signature = void( - const Tensor& X, - const Tensor& W, - const Tensor& b, - Tensor* output, + const C10Tensor& X, + const C10Tensor& W, + const C10Tensor& b, + const C10Tensor& output, int axis, int axis_w, Cache* cache, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 3;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"X", "W", "b", "output", "axis", "axis_w", "cache", "context"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/filler.cc b/caffe2/operators/experimental/c10/schemas/filler.cc index 5b74dc6..f3d0e61 100644 --- a/caffe2/operators/experimental/c10/schemas/filler.cc +++ b/caffe2/operators/experimental/c10/schemas/filler.cc @@ -4,7 +4,7 @@ #include "caffe2/utils/cast.h" using caffe2::CPUContext; -using caffe2::Tensor; +using c10::C10Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::ConstantFill); C10_DEFINE_OP_SCHEMA(caffe2::ops::UniformFill); @@ -86,8 +86,8 @@ struct MaxParameter final { }; template struct ValuesParameter final { - using type = Tensor; - static Tensor parse(const caffe2::ArgumentHelper& helper) { + using type = C10Tensor; + static C10Tensor parse(const caffe2::ArgumentHelper& helper) { if (!std::is_same::value || !helper.HasArgument("dtype")) { return ExtractValues(helper); } else { @@ -115,17 +115,17 @@ struct ValuesParameter final { private: template - static Tensor ExtractValues( + static C10Tensor ExtractValues( const caffe2::ArgumentHelper& helper) { auto source_values = helper.GetRepeatedArgument("values"); - Tensor values{caffe2::CPU}; + caffe2::Tensor values{caffe2::CPU}; values.Resize(source_values.size()); Type* values_data = values.template mutable_data(); for (int i = 0; i < source_values.size(); i++) { values_data[i] = static_cast(source_values[i]); } // body_ = &GivenTensorFillOp::FillWithType; - return values; + return C10Tensor(values); } }; } // namespace diff --git a/caffe2/operators/experimental/c10/schemas/filler.h b/caffe2/operators/experimental/c10/schemas/filler.h index b5458d8..dc81ca7 100644 --- a/caffe2/operators/experimental/c10/schemas/filler.h +++ b/caffe2/operators/experimental/c10/schemas/filler.h @@ -1,9 +1,10 @@ #pragma once #include -#include "caffe2/core/tensor.h" +#include #include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { @@ -17,12 +18,12 @@ struct GivenTensorFill final { static constexpr const char* name = "given_tensor_fill"; using Signature = void( - at::ArrayRef inputs, - Tensor* output, + at::ArrayRef inputs, + const C10Tensor& output, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape, - const Tensor& values, + const C10Tensor& values, BaseContext* context); static constexpr c10::guts::array parameter_names = { @@ -34,13 +35,15 @@ struct GivenTensorFill final { "values", "context"}}; - static c10::DeviceTypeId dispatch_key( - at::ArrayRef inputs, - Tensor* output, + static constexpr size_t num_outputs() {return 1;} + + static c10::DeviceTypeId dispatch_key( + at::ArrayRef inputs, + const C10Tensor& output, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape, - const Tensor& values, + const C10Tensor& values, BaseContext* context) { return c10::DeviceTypeId::CPU; } @@ -56,8 +59,8 @@ struct ConstantFill final { static constexpr const char* name = "constant_fill"; using Signature = void( - at::ArrayRef inputs, - Tensor* output, + at::ArrayRef inputs, + const C10Tensor& output, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape, @@ -65,6 +68,8 @@ struct ConstantFill final { Value value, BaseContext* context); + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"inputs", "output", @@ -76,8 +81,8 @@ struct ConstantFill final { "context"}}; static c10::DeviceTypeId dispatch_key( - at::ArrayRef inputs, - Tensor* output, + at::ArrayRef inputs, + const C10Tensor& output, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape, @@ -92,8 +97,8 @@ struct UniformFill final { static constexpr const char* name = "uniform_fill"; using Signature = void( - at::ArrayRef inputs, - Tensor* output, + at::ArrayRef inputs, + const C10Tensor& output, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape, @@ -101,6 +106,8 @@ struct UniformFill final { float max, BaseContext* context); + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"inputs", "output", @@ -112,8 +119,8 @@ struct UniformFill final { "context"}}; static c10::DeviceTypeId dispatch_key( - at::ArrayRef inputs, - Tensor* output, + at::ArrayRef inputs, + const C10Tensor& output, const std::vector& shape, const std::vector& extra_shape, bool input_as_shape, diff --git a/caffe2/operators/experimental/c10/schemas/flatten.cc b/caffe2/operators/experimental/c10/schemas/flatten.cc index 17a78ad..6c5d544 100644 --- a/caffe2/operators/experimental/c10/schemas/flatten.cc +++ b/caffe2/operators/experimental/c10/schemas/flatten.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::Flatten); diff --git a/caffe2/operators/experimental/c10/schemas/flatten.h b/caffe2/operators/experimental/c10/schemas/flatten.h index 7968790..26954d9 100644 --- a/caffe2/operators/experimental/c10/schemas/flatten.h +++ b/caffe2/operators/experimental/c10/schemas/flatten.h @@ -1,7 +1,8 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { @@ -10,11 +11,15 @@ struct Flatten final { static constexpr const char* name = "flatten"; using Signature = void( - const Tensor& input, - Tensor* output, + const C10Tensor& input, + const C10Tensor& output, int axis, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 1;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"input", "output", "axis", "context"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/layer_norm.cc b/caffe2/operators/experimental/c10/schemas/layer_norm.cc index 4a19b10..149ef0c 100644 --- a/caffe2/operators/experimental/c10/schemas/layer_norm.cc +++ b/caffe2/operators/experimental/c10/schemas/layer_norm.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::LayerNorm); diff --git a/caffe2/operators/experimental/c10/schemas/layer_norm.h b/caffe2/operators/experimental/c10/schemas/layer_norm.h index 5d4f51f..11df08b 100644 --- a/caffe2/operators/experimental/c10/schemas/layer_norm.h +++ b/caffe2/operators/experimental/c10/schemas/layer_norm.h @@ -1,7 +1,9 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" +#include "caffe2/core/tensor.h" namespace caffe2 { namespace ops { @@ -10,20 +12,24 @@ struct LayerNorm final { static constexpr const char* name = "LayerNorm"; struct Cache final { - Tensor scale = empty({}, CPU); - Tensor bias = empty({}, CPU); + at::optional scale = at::nullopt; + at::optional bias = at::nullopt; }; using Signature = void( - const Tensor& input, - Tensor* output, - Tensor* output_mean, - Tensor* output_stddev, + const C10Tensor& input, + const C10Tensor& output, + const C10Tensor& output_mean, + const C10Tensor& output_stddev, int axis, float epsilon, Cache* cache, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 1;} + + static constexpr size_t num_outputs() {return 3;} + static constexpr c10::guts::array parameter_names = { {"input", "output", "output_mean", "output_stddev", "axis", "epsilon", "cache", "context"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/mul.cc b/caffe2/operators/experimental/c10/schemas/mul.cc index fe27fd5..7c7d871 100644 --- a/caffe2/operators/experimental/c10/schemas/mul.cc +++ b/caffe2/operators/experimental/c10/schemas/mul.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::Mul); diff --git a/caffe2/operators/experimental/c10/schemas/mul.h b/caffe2/operators/experimental/c10/schemas/mul.h index 2980242..92187dd 100644 --- a/caffe2/operators/experimental/c10/schemas/mul.h +++ b/caffe2/operators/experimental/c10/schemas/mul.h @@ -1,7 +1,8 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { @@ -10,13 +11,17 @@ struct Mul final { static constexpr const char* name = "mul"; using Signature = void( - const Tensor& input1, - const Tensor& input2, - Tensor* output, + const C10Tensor& input1, + const C10Tensor& input2, + const C10Tensor& output, bool legacy_broadcast, int axis, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 2;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"input1", "input2", "output", "legacy_broadcast", "axis", "context"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/relu.cc b/caffe2/operators/experimental/c10/schemas/relu.cc index 23a5b75..fbc094d 100644 --- a/caffe2/operators/experimental/c10/schemas/relu.cc +++ b/caffe2/operators/experimental/c10/schemas/relu.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::Relu); diff --git a/caffe2/operators/experimental/c10/schemas/relu.h b/caffe2/operators/experimental/c10/schemas/relu.h index dd0d3f9..19606f8 100644 --- a/caffe2/operators/experimental/c10/schemas/relu.h +++ b/caffe2/operators/experimental/c10/schemas/relu.h @@ -1,6 +1,6 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include namespace caffe2 { @@ -10,7 +10,11 @@ struct Relu final { static constexpr const char* name = "relu"; using Signature = - void(const Tensor& input, Tensor* output); + void(const C10Tensor& input, const C10Tensor& output); + + static constexpr size_t num_dispatch_args() {return 1;} + + static constexpr size_t num_outputs() {return 1;} static constexpr c10::guts::array parameter_names = { {"input", "output"}}; diff --git a/caffe2/operators/experimental/c10/schemas/sigmoid.cc b/caffe2/operators/experimental/c10/schemas/sigmoid.cc index e83b3e9..d6eb86f 100644 --- a/caffe2/operators/experimental/c10/schemas/sigmoid.cc +++ b/caffe2/operators/experimental/c10/schemas/sigmoid.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::Sigmoid); diff --git a/caffe2/operators/experimental/c10/schemas/sigmoid.h b/caffe2/operators/experimental/c10/schemas/sigmoid.h index 9d227ec..ad70d05 100644 --- a/caffe2/operators/experimental/c10/schemas/sigmoid.h +++ b/caffe2/operators/experimental/c10/schemas/sigmoid.h @@ -1,6 +1,6 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include namespace caffe2 { @@ -10,7 +10,11 @@ struct Sigmoid final { static constexpr const char* name = "sigmoid"; using Signature = - void(const Tensor& input, Tensor* output); + void(const C10Tensor& input, const C10Tensor& output); + + static constexpr size_t num_dispatch_args() {return 1;} + + static constexpr size_t num_outputs() {return 1;} static constexpr c10::guts::array parameter_names = { {"input", "output"}}; diff --git a/caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.cc b/caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.cc index e3b55a6..d9fd187 100644 --- a/caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.cc +++ b/caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::SigmoidCrossEntropyWithLogits); diff --git a/caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.h b/caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.h index 6a93d0b..7e2d8d7 100644 --- a/caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.h +++ b/caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.h @@ -1,6 +1,6 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include namespace caffe2 { @@ -10,12 +10,16 @@ struct SigmoidCrossEntropyWithLogits final { static constexpr const char* name = "sigmoid_cross_entropy_with_logits"; using Signature = void( - const Tensor& input1, - const Tensor& input2, - Tensor* output, + const C10Tensor& input1, + const C10Tensor& input2, + const C10Tensor& output, bool log_D_trick, bool unjoined_lr_loss); + static constexpr size_t num_dispatch_args() {return 2;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"input1", "input2", "output", "log_d_trick", "unjoined_lr_loss"}}; }; diff --git a/caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.cc b/caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.cc index 190e172..28a1428 100644 --- a/caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.cc +++ b/caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::SparseLengthsSum); diff --git a/caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.h b/caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.h index 1b7b5b1..33f9655 100644 --- a/caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.h +++ b/caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.h @@ -1,6 +1,6 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include namespace caffe2 { @@ -10,10 +10,14 @@ struct SparseLengthsSum final { static constexpr const char* name = "sparse_lengths_sum"; using Signature = void( - const Tensor& data, - const Tensor& indices, - const Tensor& lengths, - Tensor* output); + const C10Tensor& data, + const C10Tensor& indices, + const C10Tensor& lengths, + const C10Tensor& output); + + static constexpr size_t num_dispatch_args() {return 3;} + + static constexpr size_t num_outputs() {return 1;} static constexpr c10::guts::array parameter_names = { {"data", "indices", "lengths", "output"}}; diff --git a/caffe2/operators/experimental/c10/schemas/stop_gradient.cc b/caffe2/operators/experimental/c10/schemas/stop_gradient.cc index a2eb9c6..94345b1 100644 --- a/caffe2/operators/experimental/c10/schemas/stop_gradient.cc +++ b/caffe2/operators/experimental/c10/schemas/stop_gradient.cc @@ -3,7 +3,6 @@ #include "caffe2/core/operator_c10wrapper.h" using caffe2::CPUContext; -using caffe2::Tensor; C10_DEFINE_OP_SCHEMA(caffe2::ops::StopGradient); diff --git a/caffe2/operators/experimental/c10/schemas/stop_gradient.h b/caffe2/operators/experimental/c10/schemas/stop_gradient.h index 4a663b6..f38a4aa 100644 --- a/caffe2/operators/experimental/c10/schemas/stop_gradient.h +++ b/caffe2/operators/experimental/c10/schemas/stop_gradient.h @@ -1,7 +1,8 @@ #pragma once -#include "caffe2/core/tensor.h" +#include #include +#include "caffe2/core/context_base.h" namespace caffe2 { namespace ops { @@ -10,10 +11,14 @@ struct StopGradient final { static constexpr const char* name = "stop_gradient"; using Signature = void( - const Tensor& input, - Tensor* output, + const C10Tensor& input, + const C10Tensor& output, BaseContext* context); + static constexpr size_t num_dispatch_args() {return 1;} + + static constexpr size_t num_outputs() {return 1;} + static constexpr c10::guts::array parameter_names = { {"input", "output", "context"}}; }; diff --git a/caffe2/operators/layer_norm_op.cc b/caffe2/operators/layer_norm_op.cc index 9be131d..5e63dd4 100644 --- a/caffe2/operators/layer_norm_op.cc +++ b/caffe2/operators/layer_norm_op.cc @@ -187,22 +187,35 @@ to the end.) namespace { template void layer_norm_c10( - const caffe2::Tensor& X, - caffe2::Tensor* Y, - caffe2::Tensor* mean, - caffe2::Tensor* sig, + const c10::C10Tensor& X_, + const c10::C10Tensor& Y_, + const c10::C10Tensor& mean_, + const c10::C10Tensor& sig_, int axis, float epsilon, caffe2::ops::LayerNorm::Cache* cache, caffe2::BaseContext* context) { + caffe2::Tensor X(X_); + caffe2::Tensor Y(Y_); + caffe2::Tensor mean(mean_); + caffe2::Tensor sig(sig_); + if (!cache->scale.has_value()) { + cache->scale = c10::C10Tensor(caffe2::Tensor{caffe2::CPU}); + } + if (!cache->bias.has_value()) { + cache->bias = c10::C10Tensor(caffe2::Tensor{caffe2::CPU}); + } + caffe2::Tensor scale(*cache->scale); + caffe2::Tensor bias(*cache->bias); + const int canonical_axis = X.canonical_axis_index(axis); std::vector moments_dims( X.sizes().cbegin(), X.sizes().cbegin() + canonical_axis); moments_dims.push_back(1); - mean->Resize(moments_dims); - sig->Resize(moments_dims); + mean.Resize(moments_dims); + sig.Resize(moments_dims); caffe2::LayerNormOp::runLayerNorm( - X, Y, mean, sig, canonical_axis, epsilon, &cache->scale, &cache->bias, static_cast(context) + X, &Y, &mean, &sig, canonical_axis, epsilon, &scale, &bias, static_cast(context) ); } }