*.cpp
core/*.cpp
core/dispatch/*.cpp
+ core/opschema/*.cpp
impl/*.cpp
macros/*.cpp
util/*.cpp
C10Tensor& operator=(C10Tensor&&) noexcept = default;
const TensorImplPtr &impl() const & noexcept;
- TensorImplPtr impl() && noexcept;
+ TensorImplPtr&& impl() && noexcept;
TensorTypeId type_id() const;
return impl_;
}
-inline C10Tensor::TensorImplPtr C10Tensor::impl() && noexcept {
+inline C10Tensor::TensorImplPtr&& C10Tensor::impl() && noexcept {
return std::move(impl_);
}
#include <c10/util/Array.h>
#include <c10/util/Metaprogramming.h>
#include <c10/DeviceType.h>
-
-namespace caffe2 {
-class Tensor;
-} // namespace caffe2
+#include <c10/core/Tensor.h>
namespace c10 {
*/
template <class Arg>
using is_tensor_arg = std::
- is_same<caffe2::Tensor, guts::remove_cv_t<guts::remove_reference_t<Arg>>>;
+ is_same<C10Tensor, guts::remove_cv_t<guts::remove_reference_t<Arg>>>;
inline DeviceTypeId to_device_type_id(DeviceType device_type) {
switch (device_type) {
}
}
-// TODO get rid of tensor_to_dispatch_key once c2::Tensor is de-templatized. This then fits into a template lambda instead of a functor.
-struct tensor_to_dispatch_key final {
- template<class TensorType>
- TensorParameterDispatchKey operator()(const TensorType& tensor) const {
- return TensorParameterDispatchKey{
- to_device_type_id(tensor.GetDeviceType()),
- LayoutId(0),
- tensor.dtype().id()};
- }
-};
+inline TensorParameterDispatchKey tensor_to_dispatch_key(const C10Tensor& tensor) {
+ return TensorParameterDispatchKey{
+ to_device_type_id(tensor.impl()->device_type()),
+ LayoutId(0),
+ tensor.impl()->dtype().id()};
+}
+
+// Extract type ids for all tensors from an array of tensors
+template<size_t num_dispatch_args, size_t num_tensor_args, size_t... indices>
+guts::array<TensorParameterDispatchKey, num_dispatch_args> getDispatchTypeIds__(const guts::array<const C10Tensor*, num_tensor_args>& tensor_args, guts::index_sequence<indices...>) {
+ return {tensor_to_dispatch_key(*tensor_args[indices])...};
+}
/**
* Extract the type ids of all tensors in a variadic list of arguments
* @param args List of arguments to get type ids from
* @return guts::array<TensorParameterDispatchKey, n>, where n is the number of tensor arguments (is_tensor_arg) in the class
*/
-template<class... Args> auto getTensorTypeIds_(const Args&... args)
--> guts::array<TensorParameterDispatchKey, guts::typelist::count_if<is_tensor_arg, guts::typelist::typelist<Args...>>::value> {
- return guts::filter_map<TensorParameterDispatchKey, is_tensor_arg>(tensor_to_dispatch_key(), args...);
+template<size_t num_dispatch_args, class... Args>
+guts::array<TensorParameterDispatchKey, num_dispatch_args> getDispatchTypeIds_(const Args&... args) {
+ auto tensor_args = guts::filter_map<const C10Tensor*, is_tensor_arg>([] (const C10Tensor& v){return &v;}, args...);
+ return getDispatchTypeIds__<num_dispatch_args>(tensor_args, guts::make_index_sequence<num_dispatch_args>());
}
-// TODO Test getTensorTypeIds_
+// TODO Test getDispatchTypeIds_
/**
* If T is a struct with a type field Signature, provides the member constant
*/
static constexpr size_t num_tensor_args = guts::typelist::count_if<details::is_tensor_arg, parameter_types>::value;
+ static constexpr size_t num_outputs = OpSchemaDef::num_outputs();
+
private:
static_assert(details::has_parameter_names_defined<OpSchemaDef>::value, "Operator schema doesn't define parameter_names member.");
// TODO Allow simpler definition of parameter_names without having to spell out the guts::array type in the schema def.
// TODO Use an ADL-based debugString(DispatchKey) function instead of operator<< for debug printing.
public:
- using dispatch_key_type = DispatchKey<signature::num_tensor_args>;
+ using dispatch_key_type = DispatchKey<OpSchemaDef::num_dispatch_args()>;
template<class... Args>
static inline dispatch_key_type dispatch_key(const Args&... args) {
map_t<guts::remove_cv_t, map_t<guts::remove_reference_t, typename signature::parameter_types>>
>::value, "Invalid argument types passed to OpSchema::dispatch_key()");
return dispatch_key_type {
- details::getTensorTypeIds_(args...)
+ details::getDispatchTypeIds_<OpSchemaDef::num_dispatch_args()>(args...)
};
}
};
-#include "c10/core/dispatch/OpSchema.h"
+#include <c10/core/dispatch/OpSchema.h>
#include <c10/util/Array.h>
using namespace c10;
-using namespace caffe2;
-static_assert(details::is_tensor_arg<Tensor>::value, "");
-static_assert(details::is_tensor_arg<const Tensor&>::value, "");
-static_assert(details::is_tensor_arg<Tensor&&>::value, "");
+static_assert(details::is_tensor_arg<C10Tensor>::value, "");
+static_assert(details::is_tensor_arg<const C10Tensor&>::value, "");
+static_assert(details::is_tensor_arg<C10Tensor&&>::value, "");
static_assert(!details::is_tensor_arg<int>::value, "");
struct SchemaDef final {
- using Signature = bool(int, Tensor, float, Tensor, Tensor, unsigned int);
+ using Signature = bool(int, C10Tensor, float, C10Tensor, C10Tensor, unsigned int);
static constexpr guts::array<const char*, 6> parameter_names = {{
"1", "2", "3", "4", "5", "6"
}};
+ static constexpr size_t num_dispatch_args() {return 3;}
+ static constexpr size_t num_outputs() {return 0;}
};
-static_assert(6 == OpSchema<SchemaDef>::signature::num_args, "test num_dispatch_args");
-static_assert(3 == OpSchema<SchemaDef>::signature::num_tensor_args, "test num_dispatch_args");
-static_assert(std::is_same<bool, typename OpSchema<SchemaDef>::signature::return_type>::value, "test num_dispatch_args");
+static_assert(6 == OpSchema<SchemaDef>::signature::num_args, "");
+static_assert(3 == OpSchema<SchemaDef>::signature::num_tensor_args, "");
+static_assert(std::is_same<bool, typename OpSchema<SchemaDef>::signature::return_type>::value, "");
static_assert(
std::is_same<
guts::typelist::
- typelist<int, Tensor, float, Tensor, Tensor, unsigned int>,
+ typelist<int, C10Tensor, float, C10Tensor, C10Tensor, unsigned int>,
typename OpSchema<SchemaDef>::signature::parameter_types>::value,
- "test num_dispatch_args");
+ "");
}
static constexpr size_t num_outputs() {
- return c10::guts::typelist::count_if<
- details::is_output_arg,
- typename Schema::signature::parameter_types>::value;
+ return Schema::signature::num_outputs;
}
bool RunOnDevice() override {
c10::guts::index_sequence<OutputIndex...>,
c10::guts::index_sequence<ParameterIndex...>) {
c10::Dispatcher<OpSchemaDef>::call(
- Input(InputIndex)...,
- Output(OutputIndex)...,
+ C10Tensor(Input(InputIndex))...,
+ C10Tensor(*Output(OutputIndex))...,
std::get<ParameterIndex>(parameters_)...,
state_.get(),
static_cast<BaseContext*>(&context_));
c10::guts::index_sequence<OutputIndex...>,
c10::guts::index_sequence<ParameterIndex...>) {
c10::Dispatcher<OpSchemaDef>::call(
- Input(InputIndex)...,
- Output(OutputIndex)...,
+ C10Tensor(Input(InputIndex))...,
+ C10Tensor(*Output(OutputIndex))...,
std::get<ParameterIndex>(parameters_)...,
static_cast<BaseContext*>(&context_));
}
c10::guts::index_sequence<OutputIndex...>,
c10::guts::index_sequence<ParameterIndex...>) {
c10::Dispatcher<OpSchemaDef>::call(
- Input(InputIndex)...,
- Output(OutputIndex)...,
+ C10Tensor(Input(InputIndex))...,
+ C10Tensor(*Output(OutputIndex))...,
std::get<ParameterIndex>(parameters_)...,
state_.get());
}
c10::guts::index_sequence<OutputIndex...>,
c10::guts::index_sequence<ParameterIndex...>) {
c10::Dispatcher<OpSchemaDef>::call(
- Input(InputIndex)...,
- Output(OutputIndex)...,
+ C10Tensor(Input(InputIndex))...,
+ C10Tensor(*Output(OutputIndex))...,
std::get<ParameterIndex>(parameters_)...);
}
c10::guts::index_sequence<OutputIndex...>,
c10::guts::index_sequence<ParameterIndex...>) {
c10::Dispatcher<OpSchemaDef>::call(
- at::ArrayRef<const Tensor*>(array_inputs_()),
- Output(OutputIndex)...,
+ at::ArrayRef<C10Tensor>(array_inputs_()),
+ C10Tensor(*Output(OutputIndex))...,
std::get<ParameterIndex>(parameters_)...,
state_.get(),
static_cast<BaseContext*>(&context_));
c10::guts::index_sequence<OutputIndex...>,
c10::guts::index_sequence<ParameterIndex...>) {
c10::Dispatcher<OpSchemaDef>::call(
- at::ArrayRef<const Tensor*>(array_inputs_()),
- Output(OutputIndex)...,
+ at::ArrayRef<C10Tensor>(array_inputs_()),
+ C10Tensor(*Output(OutputIndex))...,
std::get<ParameterIndex>(parameters_)...,
static_cast<BaseContext*>(&context_));
}
c10::guts::index_sequence<OutputIndex...>,
c10::guts::index_sequence<ParameterIndex...>) {
c10::Dispatcher<OpSchemaDef>::call(
- at::ArrayRef<const Tensor*>(array_inputs_()),
- Output(OutputIndex)...,
+ at::ArrayRef<C10Tensor>(array_inputs_()),
+ C10Tensor(*Output(OutputIndex))...,
std::get<ParameterIndex>(parameters_)...,
state_.get());
}
c10::guts::index_sequence<OutputIndex...>,
c10::guts::index_sequence<ParameterIndex...>) {
c10::Dispatcher<OpSchemaDef>::call(
- at::ArrayRef<const Tensor*>(array_inputs_()),
- Output(OutputIndex)...,
+ at::ArrayRef<C10Tensor>(array_inputs_()),
+ C10Tensor(*Output(OutputIndex))...,
std::get<ParameterIndex>(parameters_)...);
}
- std::vector<const Tensor*> array_inputs_() {
- std::vector<const Tensor*> result;
+ std::vector<C10Tensor> array_inputs_() {
+ std::vector<C10Tensor> result;
result.reserve(InputSize());
for (size_t i = 0; i < InputSize(); ++i) {
- result.push_back(&Input(i));
+ result.push_back(C10Tensor(Input(i)));
}
return result;
}
template <class DataType>
void add_op_cpu_impl(
- const Tensor& A,
- const Tensor& B,
- Tensor* C,
+ const C10Tensor& A_,
+ const C10Tensor& B_,
+ const C10Tensor& C_,
bool legacy_broadcast,
int axis,
BaseContext* context) {
+ Tensor A(A_);
+ Tensor B(B_);
+ Tensor C(C_);
const DataType* A_data = A.template data<DataType>();
const DataType* B_data = B.template data<DataType>();
std::vector<int> A_dims;
if (legacy_broadcast) {
CAFFE_ENFORCE_NE(
- C,
- &B,
+ C.getIntrusivePtr(),
+ B.getIntrusivePtr(),
"In-place is allowed only with the first tensor when "
"legacy-broadcasting");
- C->ResizeLike(A);
+ C.ResizeLike(A);
if (B.numel() == 1) {
A_dims = {static_cast<int>(A.numel())};
B_dims = {1};
const std::vector<int> C_dims =
caffe2::elementwise_ops_utils::ComputeBinaryBroadcastForwardDims(
A_dims, B_dims);
- if (C == &A) {
+ if (C.getIntrusivePtr() == A.getIntrusivePtr()) {
CAFFE_ENFORCE_EQ(C_dims, A_dims);
- } else if (C == &B) {
+ } else if (C.getIntrusivePtr() == B.getIntrusivePtr()) {
CAFFE_ENFORCE_EQ(C_dims, B_dims);
} else {
- C->Resize(C_dims);
+ C.Resize(C_dims);
}
}
- auto* C_data = C->template mutable_data<DataType>();
+ auto* C_data = C.template mutable_data<DataType>();
caffe2::math::Add(
A_dims.size(),
B_dims.data(),
A.data<DataType>(),
B.data<DataType>(),
- C->mutable_data<DataType>(),
+ C.mutable_data<DataType>(),
static_cast<CPUContext*>(context));
}
} // namespace
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/averaged_loss.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::BaseContext;
using caffe2::Tensor;
template <class T, class Context>
void averaged_loss_op_cpu_impl(
- const Tensor& X,
- Tensor* sum,
+ const C10Tensor& X_,
+ const C10Tensor& sum_,
caffe2::ops::AveragedLoss::State* state,
BaseContext* context) {
- sum->Resize(vector<int64_t>());
+ Tensor X(X_);
+ Tensor sum(sum_);
- T* data = sum->template mutable_data<T>();
+ sum.Resize(vector<int64_t>());
+ T* data = sum.template mutable_data<T>();
+
+ Tensor scratch(state->scratch);
caffe2::math::Sum<T, Context>(
X.numel(),
X.template data<T>(),
data,
static_cast<Context*>(context),
- &state->scratch);
+ &scratch);
if (X.numel() > 0) {
caffe2::math::Scale<T, T, Context>(
1,
static_cast<T>(1.) / X.numel(),
- sum->template data<T>(),
+ sum.template data<T>(),
data,
static_cast<Context*>(context));
}
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/batch_gather.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::BaseContext;
using caffe2::Tensor;
template <class TInd>
void batch_gather_op_cpu_impl(
- const Tensor& data,
- const Tensor& indices,
- Tensor* output,
+ const C10Tensor& data_,
+ const C10Tensor& indices_,
+ const C10Tensor& output_,
BaseContext* context) {
+ Tensor data(data_);
+ Tensor indices(indices_);
+ Tensor output(output_);
+
CAFFE_ENFORCE_GE(data.dim(), 2, "DATA should be at least 2-D");
vector<int64_t> shape;
shape.push_back(data.size(0));
shape.insert(shape.end(), indices.sizes().begin(), indices.sizes().end());
shape.insert(shape.end(), data.sizes().begin() + 2, data.sizes().end());
- output->Resize(shape);
+ output.Resize(shape);
auto block_size = data.size_from_dim(2);
auto block_bytesize = block_size * data.dtype().itemsize();
N * data.size_from_dim(2) * data.dtype().itemsize();
const TInd* idxs = indices.template data<TInd>();
auto src_base = static_cast<const char*>(data.raw_data());
- auto out = static_cast<char*>(output->raw_mutable_data(data.dtype()));
+ auto out = static_cast<char*>(output.raw_mutable_data(data.dtype()));
for (auto batch = 0; batch < data.size(0); ++batch) {
for (auto i = 0; i < N; ++i) {
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/batch_matmul.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::BaseContext;
using caffe2::Tensor;
template <class T, class Context>
void batch_matmul_op_cpu_impl(
- const Tensor& A,
- const Tensor& B,
- Tensor* Y,
+ const C10Tensor& A_,
+ const C10Tensor& B_,
+ const C10Tensor& Y_,
int trans_a,
int trans_b,
int broadcast,
caffe2::ops::BatchMatmul::State* state,
BaseContext* context) {
+ Tensor A(A_);
+ Tensor B(B_);
+ Tensor Y(Y_);
using Engine = caffe2::DefaultEngine;
auto ndims_A = A.dim();
dims_B[0],
"Vector-vector product requires each of the vectors to "
"be the same size.");
- Y->Resize(1);
+ Y.Resize(1);
math::Dot<T, Context>(
- dims_A[0], data_A, data_B, Y->template mutable_data<T>(), static_cast<Context*>(context));
+ dims_A[0], data_A, data_B, Y.template mutable_data<T>(), static_cast<Context*>(context));
} else {
bool A_broadcasted = false, B_broadcasted = false;
if (ndims_A == 1) {
}
// Allocate output tensor
- Y->Resize(new_dims);
- auto* Y_data = Y->template mutable_data<T>();
+ Y.Resize(new_dims);
+ auto* Y_data = Y.template mutable_data<T>();
// Zero batch dimension indicates no elements
if (num_sub_batches == 0 || num_outer_batches == 0) {
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/cast.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::CPUContext;
using caffe2::Tensor;
namespace {
template <typename DstType, typename SrcType>
-void do_cast_(const Tensor& input, Tensor* output) {
- output->ResizeLike(input);
+void do_cast_(const Tensor& input, const Tensor& output) {
+ output.ResizeLike(input);
const auto* data = input.template data<SrcType>();
- auto* out = output->template mutable_data<DstType>();
+ auto* out = output.template mutable_data<DstType>();
auto N = input.numel();
for (int64_t i = 0; i < N; ++i) {
out[i] = static_cast<DstType>(data[i]);
template <class SrcType>
void cast_op_cpu_impl(
- const Tensor& input,
- Tensor* output,
+ const C10Tensor& input_,
+ const C10Tensor& output_,
TensorProto_DataType to) {
+ Tensor input(input_);
+ Tensor output(output_);
+
switch (to) {
case caffe2::TensorProto_DataType_FLOAT:
do_cast_<float, SrcType>(input, output);
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/concat.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::BaseContext;
using caffe2::CPUContext;
namespace {
template <class DataType, class Context>
void concat_op_cpu_impl(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
- Tensor* split,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output_,
+ const C10Tensor& split_,
int axis,
int add_axis,
BaseContext* context) {
- split->Resize(vector<int64_t>(1, inputs.size()));
- int* axis_data = split->template mutable_data<int>();
- int adj_size = inputs[0]->dim() + (add_axis ? 1 : 0);
+ Tensor output(output_);
+ Tensor split(split_);
+
+ split.Resize(vector<int64_t>(1, inputs.size()));
+ int* axis_data = split.template mutable_data<int>();
+ int adj_size = Tensor(inputs[0]).dim() + (add_axis ? 1 : 0);
int canonical_axis = caffe2::canonical_axis_index_(axis, adj_size);
CAFFE_ENFORCE_LT(canonical_axis, adj_size, "Axis not in input ndim range.");
for (int i = 1; i < inputs.size(); ++i) {
CAFFE_ENFORCE(
- inputs[i]->dtype() == inputs[0]->dtype(),
+ Tensor(inputs[i]).dtype() == Tensor(inputs[0]).dtype(),
"All inputs must have the same type, expected: ",
- inputs[0]->dtype().name(),
+ Tensor(inputs[0]).dtype().name(),
" but got: ",
- inputs[i]->dtype().name(),
+ Tensor(inputs[i]).dtype().name(),
" for input: ",
i);
}
int before = 1, after = 1;
- vector<int64_t> output_dims(inputs[0]->sizes().vec());
- for (int i = 0; i < inputs[0]->dim(); ++i) {
+ vector<int64_t> output_dims(Tensor(inputs[0]).sizes().vec());
+ for (int i = 0; i < Tensor(inputs[0]).dim(); ++i) {
if (i == canonical_axis && !add_axis) {
continue;
}
- int dim = inputs[0]->dim32(i);
+ int dim = Tensor(inputs[0]).dim32(i);
if (i < canonical_axis) {
before *= dim;
} else { // i > canonical_axis || i == canonical_axis && add_axis
}
// check the input dims are compatible.
for (int j = 1; j < inputs.size(); ++j) {
- int dim_j = inputs[j]->dim32(i);
+ int dim_j = Tensor(inputs[j]).dim32(i);
CAFFE_ENFORCE(
dim == dim_j,
"Expect dimension = ",
"when arg 'add_axis' = 0 and along the axis = ",
canonical_axis,
" <",
- inputs[0]->sizes(),
+ Tensor(inputs[0]).sizes(),
"> vs <",
- inputs[j]->sizes(),
+ Tensor(inputs[j]).sizes(),
">.");
}
}
int output_channels = 0;
for (int i = 0; i < inputs.size(); ++i) {
- axis_data[i] = add_axis ? 1 : inputs[i]->dim32(canonical_axis);
+ axis_data[i] = add_axis ? 1 : Tensor(inputs[i]).dim32(canonical_axis);
output_channels += axis_data[i];
}
if (add_axis) {
} else {
output_dims[canonical_axis] = output_channels;
}
- output->Resize(output_dims);
+ output.Resize(output_dims);
size_t output_offset = 0;
for (int i = 0; i < inputs.size(); ++i) {
- auto& input = *inputs[i];
+ Tensor input(inputs[i]);
auto axis_dim = add_axis ? 1 : input.dim32(canonical_axis);
caffe2::math::CopyMatrix<Context>(
input.itemsize(),
axis_dim * after,
input.raw_data(),
axis_dim * after,
- static_cast<char*>(output->raw_mutable_data(inputs[0]->dtype())) +
+ static_cast<char*>(output.raw_mutable_data(Tensor(inputs[0]).dtype())) +
output_offset,
output_channels * after,
static_cast<Context*>(context),
- inputs[0]->dtype().copy());
+ Tensor(inputs[0]).dtype().copy());
output_offset += axis_dim * after * input.itemsize();
}
}
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/enforce_finite.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::CPUContext;
using caffe2::Tensor;
namespace caffe2 {
namespace {
template <class DataType>
-void enforce_finite_op_impl_cpu(const Tensor& input) {
+void enforce_finite_op_impl_cpu(const C10Tensor& input_) {
+ Tensor input(input_);
const DataType* input_data = input.template data<DataType>();
auto size = input.numel();
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/expand_dims.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::BaseContext;
using caffe2::Tensor;
namespace {
template <class DataType>
void expand_dims_op_cpu_impl(
- const Tensor& input,
- Tensor* output,
+ const C10Tensor& input_,
+ const C10Tensor& output_,
const std::vector<int>& dims,
caffe2::ops::ExpandDims::State* state,
BaseContext* context) {
+ Tensor input(input_);
+ Tensor output(output_);
+
if (!state->initialized) {
state->dims = dims;
auto originalSize = state->dims.size();
state->initialized = true;
}
- output->CopyFrom(input, context);
+ output.CopyFrom(input, context);
if (state->dims.empty()) {
return;
}
for (const auto dim : state->dims) {
newDims.insert(newDims.begin() + dim, 1);
}
- output->Reshape(newDims);
+ output.Reshape(newDims);
}
} // namespace
} // namespace caffe2
#include "caffe2/operators/experimental/c10/schemas/fc.h"
#include "caffe2/utils/conversions.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::BaseContext;
using caffe2::Tensor;
namespace {
template <class DataType, class Context>
void fc_op_cpu_impl(
- const Tensor& X,
- const Tensor& W,
- const Tensor& b,
- Tensor* Y,
+ const C10Tensor& X_,
+ const C10Tensor& W_,
+ const C10Tensor& b_,
+ const C10Tensor& Y_,
int axis,
int axis_w,
caffe2::ops::FullyConnected::Cache* cache,
BaseContext* context) {
+ Tensor X(X_);
+ Tensor W(W_);
+ Tensor b(b_);
+ Tensor Y(Y_);
+
constexpr bool TransposeWeight = true;
CAFFE_ENFORCE(b.dim() == 1, b.dim());
DCHECK_LE(canonical_axis + 1, cache->Y_shape_cache_.size());
cache->Y_shape_cache_.resize(canonical_axis + 1);
cache->Y_shape_cache_[canonical_axis] = N;
- Y->Resize(cache->Y_shape_cache_);
- CAFFE_ENFORCE(M * N == Y->numel(), dimErrorString());
+ Y.Resize(cache->Y_shape_cache_);
+ CAFFE_ENFORCE(M * N == Y.numel(), dimErrorString());
if (X.numel() == 0) {
// skip the rest of the computation if X is empty
- Y->template mutable_data<DataType>();
+ Y.template mutable_data<DataType>();
return;
}
X.template data<DataType>(),
W.template data<DataType>(),
0,
- Y->template mutable_data<DataType>(),
+ Y.template mutable_data<DataType>(),
static_cast<Context*>(context),
math_type);
// Add bias term
- if (cache->bias_multiplier_.numel() != M) {
+ Tensor bias_multiplier(cache->bias_multiplier_);
+ if (bias_multiplier.numel() != M) {
// If the helper bias multiplier is not M, reshape and fill it with one.
- cache->bias_multiplier_.Resize(M);
+ bias_multiplier.Resize(M);
caffe2::math::Set<DataType, Context>(
M,
caffe2::convert::To<float, DataType>(1),
- cache->bias_multiplier_.template mutable_data<DataType>(),
+ bias_multiplier.template mutable_data<DataType>(),
static_cast<Context*>(context));
}
caffe2::math::Gemm<DataType, Context, caffe2::DefaultEngine>(
N,
1,
1,
- cache->bias_multiplier_.template data<DataType>(),
+ bias_multiplier.template data<DataType>(),
b.template data<DataType>(),
1,
- Y->template mutable_data<DataType>(),
+ Y.template mutable_data<DataType>(),
static_cast<Context*>(context),
math_type);
}
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/filler.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::CPUContext;
using caffe2::Tensor;
namespace caffe2 {
namespace {
void filler_init(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output_,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape) {
+ Tensor output(output_);
if (inputs.size()) {
auto real_shape = vector<int64_t>{};
if (input_as_shape) {
// Shape input must be in CPU context
- auto& input = *inputs[0];
+ Tensor input(inputs[0]);
CAFFE_ENFORCE_EQ(
input.dim(),
1,
real_shape.insert(
real_shape.end(), shape_data, shape_data + input.dim32(0));
} else {
- auto& input = *inputs[0];
+ Tensor input(inputs[0]);
real_shape.insert(
real_shape.end(), input.sizes().begin(), input.sizes().end());
}
real_shape.insert(real_shape.end(), extra_shape.begin(), extra_shape.end());
- output->Resize(real_shape);
+ output.Resize(real_shape);
} else {
- output->Resize(shape);
+ output.Resize(shape);
}
}
template <class Type, class Context>
void given_tensor_fill_op_cpu_impl(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output_,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape,
- const Tensor& values,
+ const C10Tensor& values_,
BaseContext* context) {
- filler_init(inputs, output, shape, extra_shape, input_as_shape);
+ Tensor output(output_);
+ Tensor values(values_);
+
+ filler_init(inputs, output_, shape, extra_shape, input_as_shape);
// TODO T might not be the correct type to call, since float allows others.
- DCHECK_EQ(output->numel(), values.numel())
- << "output size: " << output->numel()
+ DCHECK_EQ(output.numel(), values.numel())
+ << "output size: " << output.numel()
<< " given size: " << values.numel();
- auto* data = output->template mutable_data<Type>();
+ auto* data = output.template mutable_data<Type>();
const Type* values_data = values.template data<Type>();
- if (output->numel()) {
- context->CopySameDevice(output->numel(), values_data, data);
+ if (output.numel()) {
+ context->CopySameDevice(output.numel(), values_data, data);
}
}
void constant_fill_op_cpu_impl(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output_,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape,
int dtype,
caffe2::ops::ConstantFill::Value value,
BaseContext* context) {
- filler_init(inputs, output, shape, extra_shape, input_as_shape);
+ Tensor output(output_);
+
+ filler_init(inputs, output_, shape, extra_shape, input_as_shape);
- if (output->numel()) {
+ if (output.numel()) {
if (dtype == caffe2::TensorProto_DataType_FLOAT) {
caffe2::math::Set<float, CPUContext>(
- output->numel(),
+ output.numel(),
value.as_float,
- output->template mutable_data<float>(),
+ output.template mutable_data<float>(),
static_cast<CPUContext*>(context));
} else if (dtype == caffe2::TensorProto_DataType_INT32) {
caffe2::math::Set<int32_t, CPUContext>(
- output->numel(),
+ output.numel(),
value.as_int32,
- output->template mutable_data<int32_t>(),
+ output.template mutable_data<int32_t>(),
static_cast<CPUContext*>(context));
} else if (dtype == caffe2::TensorProto_DataType_INT64) {
caffe2::math::Set<int64_t, CPUContext>(
- output->numel(),
+ output.numel(),
value.as_int64,
- output->template mutable_data<int64_t>(),
+ output.template mutable_data<int64_t>(),
static_cast<CPUContext*>(context));
} else if (dtype == caffe2::TensorProto_DataType_BOOL) {
caffe2::math::Set<bool, CPUContext>(
- output->numel(),
+ output.numel(),
value.as_bool,
- output->template mutable_data<bool>(),
+ output.template mutable_data<bool>(),
static_cast<CPUContext*>(context));
} else {
throw std::logic_error(
}
void uniform_fill_op_cpu_impl(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output_,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape,
float min,
float max,
BaseContext* context) {
- filler_init(inputs, output, shape, extra_shape, input_as_shape);
+ Tensor output(output_);
+
+ filler_init(inputs, output_, shape, extra_shape, input_as_shape);
if (inputs.size() == 3) {
- CAFFE_ENFORCE_EQ(1, inputs[1]->numel(), "min blob must be scalar");
- CAFFE_ENFORCE_EQ(1, inputs[2]->numel(), "max blob must be scalar");
- min = *inputs[1]->template data<float>();
- max = *inputs[2]->template data<float>();
+ CAFFE_ENFORCE_EQ(1, Tensor(inputs[1]).numel(), "min blob must be scalar");
+ CAFFE_ENFORCE_EQ(1, Tensor(inputs[2]).numel(), "max blob must be scalar");
+ min = *Tensor(inputs[1]).template data<float>();
+ max = *Tensor(inputs[2]).template data<float>();
if (min > max) {
- auto shape = output->sizes().vec();
+ auto shape = output.sizes().vec();
shape[0] = 0;
- output->Resize(shape);
- output->template mutable_data<float>();
+ output.Resize(shape);
+ output.template mutable_data<float>();
return;
}
}
caffe2::math::RandUniform<float, CPUContext>(
- output->numel(),
+ output.numel(),
min,
max,
- output->template mutable_data<float>(),
+ output.template mutable_data<float>(),
static_cast<CPUContext*>(context));
}
} // namespace
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/flatten.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::BaseContext;
using caffe2::Tensor;
namespace {
template <class DataType, class Context>
void flatten_op_cpu_impl(
- const Tensor& input,
- Tensor* output,
+ const C10Tensor& input_,
+ const C10Tensor& output_,
int axis,
BaseContext* context) {
+ Tensor input(input_);
+ Tensor output(output_);
CAFFE_ENFORCE_GE(
input.sizes().size(), axis, "The rank of the tensor must be >= axis.");
- output->Resize(input.size_to_dim(axis), input.size_from_dim(axis));
+ output.Resize(input.size_to_dim(axis), input.size_from_dim(axis));
context->CopyItemsSameDevice(
input.dtype(),
input.numel(),
input.raw_data(),
- output->raw_mutable_data(input.dtype()));
+ output.raw_mutable_data(input.dtype()));
}
} // namespace
} // namespace caffe2
#include "caffe2/operators/elementwise_ops_utils.h"
#include "caffe2/operators/experimental/c10/schemas/mul.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::BaseContext;
using caffe2::Tensor;
template <class DataType>
void mul_op_cpu_impl(
- const Tensor& A,
- const Tensor& B,
- Tensor* C,
+ const C10Tensor& A_,
+ const C10Tensor& B_,
+ const C10Tensor& C_,
bool legacy_broadcast,
int axis,
BaseContext* context) {
+ Tensor A(A_);
+ Tensor B(B_);
+ Tensor C(C_);
const DataType* A_data = A.template data<DataType>();
const DataType* B_data = B.template data<DataType>();
std::vector<int> A_dims;
if (legacy_broadcast) {
CAFFE_ENFORCE_NE(
- C,
- &B,
+ C.getIntrusivePtr(),
+ B.getIntrusivePtr(),
"In-place is allowed only with the first tensor when "
"legacy-broadcasting");
- C->ResizeLike(A);
+ C.ResizeLike(A);
if (B.numel() == 1) {
A_dims = {static_cast<int>(A.numel())};
B_dims = {1};
const std::vector<int> C_dims =
caffe2::elementwise_ops_utils::ComputeBinaryBroadcastForwardDims(
A_dims, B_dims);
- if (C == &A) {
+ if (C.getIntrusivePtr() == A.getIntrusivePtr()) {
CAFFE_ENFORCE_EQ(C_dims, A_dims);
- } else if (C == &B) {
+ } else if (C.getIntrusivePtr() == B.getIntrusivePtr()) {
CAFFE_ENFORCE_EQ(C_dims, B_dims);
} else {
- C->Resize(C_dims);
+ C.Resize(C_dims);
}
}
- auto* C_data = C->template mutable_data<DataType>();
+ auto* C_data = C.template mutable_data<DataType>();
caffe2::math::Mul(
A_dims.size(),
B_dims.data(),
A.data<DataType>(),
B.data<DataType>(),
- C->mutable_data<DataType>(),
+ C.mutable_data<DataType>(),
static_cast<CPUContext*>(context));
}
} // namespace
#include "caffe2/operators/experimental/c10/schemas/relu.h"
#include "caffe2/utils/eigen_utils.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::Tensor;
namespace {
template <class DataType>
void relu_op_cpu_impl(
- const Tensor& input,
- Tensor* output) {
- output->ResizeLike(input);
+ const C10Tensor& input_,
+ const C10Tensor& output_) {
+ Tensor input(input_);
+ Tensor output(output_);
+
+ output.ResizeLike(input);
#ifdef CAFFE2_USE_ACCELERATE
const float zero = 0.0f;
input.data<float>(),
1,
&zero,
- output->mutable_data<float>(),
+ output.mutable_data<float>(),
1,
input.size());
#else
- caffe2::EigenVectorMap<float>(output->mutable_data<float>(), input.numel()) =
+ caffe2::EigenVectorMap<float>(output.mutable_data<float>(), input.numel()) =
caffe2::ConstEigenVectorMap<float>(input.data<float>(), input.numel())
.cwiseMax(0.f);
#endif
/* Naive implementation
const float* input_data = input.data<float>();
- float* output_data = output->mutable_data<float>();
+ float* output_data = output.mutable_data<float>();
for (int i = 0; i < input.size(); ++i) {
output_data[i] = std::max(input_data[i], 0.f);
}
#include "caffe2/operators/experimental/c10/schemas/sigmoid.h"
#include "caffe2/utils/eigen_utils.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::Tensor;
namespace {
template <class DataType>
void sigmoid_op_cpu_impl(
- const Tensor& input,
- Tensor* output) {
- output->ResizeLike(input);
+ const C10Tensor& input_,
+ const C10Tensor& output_) {
+ Tensor input(input_);
+ Tensor output(output_);
+ output.ResizeLike(input);
caffe2::ConstEigenVectorArrayMap<DataType> xM(
input.data<DataType>(), input.numel());
caffe2::EigenVectorArrayMap<DataType>(
- output->mutable_data<DataType>(), input.numel()) =
+ output.mutable_data<DataType>(), input.numel()) =
1. / (1. + (-xM).exp());
}
} // namespace
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/sigmoid_cross_entropy_with_logits.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::Tensor;
}
void sigmoid_cross_entropy_with_logits_op_cpu_impl(
- const Tensor& logits,
- const Tensor& targets,
- Tensor* out,
+ const C10Tensor& logits_,
+ const C10Tensor& targets_,
+ const C10Tensor& out_,
bool log_D_trick,
bool unjoined_lr_loss) {
+ Tensor logits(logits_);
+ Tensor targets(targets_);
+ Tensor out(out_);
+
CAFFE_ENFORCE_EQ(logits.sizes(), targets.sizes());
const auto inner_size = logits.dim() > 0 ? logits.sizes().back() : 1;
const auto outer_size = logits.numel() / inner_size;
if (logits.dim() == 0) {
- out->Resize(std::vector<int64_t>{});
+ out.Resize(std::vector<int64_t>{});
} else {
std::vector<int64_t> dims(logits.sizes().begin(), logits.sizes().end() - 1);
- out->Resize(dims);
+ out.Resize(dims);
}
- auto* out_ptr = out->mutable_data<float>();
+ auto* out_ptr = out.mutable_data<float>();
auto* logits_ptr = logits.data<float>();
auto* targets_ptr = targets.data<float>();
#include "caffe2/operators/experimental/c10/schemas/sparse_lengths_sum.h"
#include "caffe2/perfkernels/embedding_lookup.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::Tensor;
template <typename InputType, typename IndexType>
void sparse_lengths_sum_op_cpu_impl(
- const Tensor& dataInput,
- const Tensor& indicesInput,
- const Tensor& lengthsInput,
- Tensor* output) {
+ const C10Tensor& dataInput_,
+ const C10Tensor& indicesInput_,
+ const C10Tensor& lengthsInput_,
+ const C10Tensor& output_) {
+ Tensor dataInput(dataInput_);
+ Tensor indicesInput(indicesInput_);
+ Tensor lengthsInput(lengthsInput_);
+ Tensor output(output_);
+
using T = float;
constexpr bool USE_MEAN = false;
constexpr bool USE_POSITIONAL_WEIGHT = false;
auto shape = dataInput.sizes().vec();
shape[0] = M;
- output->Resize(shape);
- T* out_data = output->template mutable_data<T>();
+ output.Resize(shape);
+ T* out_data = output.template mutable_data<T>();
const InputType* in_data = dataInput.template data<InputType>();
const IndexType* indices = indicesInput.template data<IndexType>();
#include <c10/core/dispatch/KernelRegistration.h>
#include "caffe2/operators/experimental/c10/schemas/stop_gradient.h"
#include "caffe2/utils/math.h"
+#include "caffe2/core/tensor.h"
using caffe2::BaseContext;
using caffe2::Tensor;
namespace {
template <class DataType>
void stop_gradient_op_cpu_impl(
- const Tensor& input,
- Tensor* output,
+ const C10Tensor& input_,
+ const C10Tensor& output_,
BaseContext* context) {
- if (output != &input) {
- output->CopyFrom(input, context);
+ Tensor input(input_);
+ Tensor output(output_);
+ if (output.getIntrusivePtr() != input.getIntrusivePtr()) {
+ output.CopyFrom(input, context);
}
}
} // namespace
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::Add);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "add";
using Signature = void(
- const Tensor& input1,
- const Tensor& input2,
- Tensor* output,
+ const C10Tensor& input1,
+ const C10Tensor& input2,
+ const C10Tensor& output,
bool legacy_broadcast,
int axis,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 2;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 6> parameter_names = {
{"input1", "input2", "output", "legacy_broadcast", "axis", "context"}};
};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::AveragedLoss);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
+#include "caffe2/core/tensor.h"
namespace caffe2 {
namespace ops {
struct AveragedLoss final {
struct State final {
- Tensor scratch = Tensor{CPU};
+ C10Tensor scratch = C10Tensor(empty({}, CPU));
};
static constexpr const char* name = "averaged_loss";
using Signature = void(
- const Tensor& input,
- Tensor* output,
+ const C10Tensor& input,
+ const C10Tensor& output,
State* state,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 1;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 4> parameter_names = {
{"input", "output", "state", "context"}};
};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::BatchGather);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "batch_gather";
using Signature = void(
- const Tensor& data,
- const Tensor& indices,
- Tensor* output,
+ const C10Tensor& data,
+ const C10Tensor& indices,
+ const C10Tensor& output,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 2;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 4> parameter_names = {
{"data", "indices", "output", "context"}};
};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::BatchMatmul);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
struct BatchMatmul final {
struct State final {
- std::shared_ptr<Tensor> scratch;
+ std::shared_ptr<C10Tensor> scratch;
};
static constexpr const char* name = "batch_matmul";
using Signature = void(
- const Tensor& A,
- const Tensor& B,
- Tensor* output,
+ const C10Tensor& A,
+ const C10Tensor& B,
+ const C10Tensor& output,
int trans_a,
int trans_b,
int broadcast,
State* state,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 2;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 8> parameter_names = {
{"A",
"B",
#include "caffe2/utils/cast.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::Cast);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "cast";
using Signature = void(
- const Tensor& input1,
- Tensor* output,
+ const C10Tensor& input1,
+ const C10Tensor& output,
TensorProto_DataType to);
+ static constexpr size_t num_dispatch_args() {return 1;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 3> parameter_names = {
{"input", "output", "to"}};
};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::Concat);
#pragma once
#include <c10/core/dispatch/DeviceId.h>
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
#include <c10/util/ArrayRef.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "concat";
using Signature = void(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
- Tensor* split_info,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output,
+ const C10Tensor& split_info,
int add,
int add_axis,
BaseContext* context);
+ static constexpr size_t num_outputs() {return 2;}
+
static constexpr c10::guts::array<const char*, 6> parameter_names = {
{"inputs", "output", "split_info_output", "add", "add_axis", "context"}};
static c10::DeviceTypeId dispatch_key(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
- Tensor* split_info,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output,
+ const C10Tensor& split_info,
int add,
int add_axis,
BaseContext* context) {
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::EnforceFinite);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
namespace caffe2 {
struct EnforceFinite final {
static constexpr const char* name = "enforce_finite";
- using Signature = void(const Tensor& input);
+ using Signature = void(const C10Tensor& input);
+
+ static constexpr size_t num_dispatch_args() {return 1;}
+
+ static constexpr size_t num_outputs() {return 0;}
static constexpr c10::guts::array<const char*, 1> parameter_names = {
{"input"}};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::ExpandDims);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "expand_dims";
using Signature = void(
- const Tensor& input,
- Tensor* output,
+ const C10Tensor& input,
+ const C10Tensor& output,
const std::vector<int>& dims,
State* state,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 1;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 5> parameter_names = {
{"input", "output", "dims", "state", "context"}};
};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::FullyConnected);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/tensor.h"
namespace caffe2 {
namespace ops {
struct Cache final {
vector<int64_t> Y_shape_cache_;
- Tensor bias_multiplier_ = Tensor{CPU};
+ C10Tensor bias_multiplier_ = C10Tensor(Tensor{CPU});
};
using Signature = void(
- const Tensor& X,
- const Tensor& W,
- const Tensor& b,
- Tensor* output,
+ const C10Tensor& X,
+ const C10Tensor& W,
+ const C10Tensor& b,
+ const C10Tensor& output,
int axis,
int axis_w,
Cache* cache,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 3;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 8> parameter_names = {
{"X", "W", "b", "output", "axis", "axis_w", "cache", "context"}};
};
#include "caffe2/utils/cast.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
+using c10::C10Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::ConstantFill);
C10_DEFINE_OP_SCHEMA(caffe2::ops::UniformFill);
};
template <class T>
struct ValuesParameter final {
- using type = Tensor;
- static Tensor parse(const caffe2::ArgumentHelper& helper) {
+ using type = C10Tensor;
+ static C10Tensor parse(const caffe2::ArgumentHelper& helper) {
if (!std::is_same<T, float>::value || !helper.HasArgument("dtype")) {
return ExtractValues<T>(helper);
} else {
private:
template <typename Type>
- static Tensor ExtractValues(
+ static C10Tensor ExtractValues(
const caffe2::ArgumentHelper& helper) {
auto source_values = helper.GetRepeatedArgument<Type>("values");
- Tensor values{caffe2::CPU};
+ caffe2::Tensor values{caffe2::CPU};
values.Resize(source_values.size());
Type* values_data = values.template mutable_data<Type>();
for (int i = 0; i < source_values.size(); i++) {
values_data[i] = static_cast<Type>(source_values[i]);
}
// body_ = &GivenTensorFillOp::FillWithType<Type>;
- return values;
+ return C10Tensor(values);
}
};
} // namespace
#pragma once
#include <c10/core/dispatch/DeviceId.h>
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
#include <c10/util/ArrayRef.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "given_tensor_fill";
using Signature = void(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape,
- const Tensor& values,
+ const C10Tensor& values,
BaseContext* context);
static constexpr c10::guts::array<const char*, 7> parameter_names = {
"values",
"context"}};
- static c10::DeviceTypeId dispatch_key(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ static constexpr size_t num_outputs() {return 1;}
+
+ static c10::DeviceTypeId dispatch_key(
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape,
- const Tensor& values,
+ const C10Tensor& values,
BaseContext* context) {
return c10::DeviceTypeId::CPU;
}
static constexpr const char* name = "constant_fill";
using Signature = void(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape,
Value value,
BaseContext* context);
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 8> parameter_names = {
{"inputs",
"output",
"context"}};
static c10::DeviceTypeId dispatch_key(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape,
static constexpr const char* name = "uniform_fill";
using Signature = void(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape,
float max,
BaseContext* context);
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 8> parameter_names = {
{"inputs",
"output",
"context"}};
static c10::DeviceTypeId dispatch_key(
- at::ArrayRef<const Tensor*> inputs,
- Tensor* output,
+ at::ArrayRef<C10Tensor> inputs,
+ const C10Tensor& output,
const std::vector<int64_t>& shape,
const std::vector<int>& extra_shape,
bool input_as_shape,
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::Flatten);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "flatten";
using Signature = void(
- const Tensor& input,
- Tensor* output,
+ const C10Tensor& input,
+ const C10Tensor& output,
int axis,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 1;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 4> parameter_names = {
{"input", "output", "axis", "context"}};
};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::LayerNorm);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
+#include "caffe2/core/tensor.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "LayerNorm";
struct Cache final {
- Tensor scale = empty({}, CPU);
- Tensor bias = empty({}, CPU);
+ at::optional<C10Tensor> scale = at::nullopt;
+ at::optional<C10Tensor> bias = at::nullopt;
};
using Signature = void(
- const Tensor& input,
- Tensor* output,
- Tensor* output_mean,
- Tensor* output_stddev,
+ const C10Tensor& input,
+ const C10Tensor& output,
+ const C10Tensor& output_mean,
+ const C10Tensor& output_stddev,
int axis,
float epsilon,
Cache* cache,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 1;}
+
+ static constexpr size_t num_outputs() {return 3;}
+
static constexpr c10::guts::array<const char*, 8> parameter_names = {
{"input", "output", "output_mean", "output_stddev", "axis", "epsilon", "cache", "context"}};
};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::Mul);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "mul";
using Signature = void(
- const Tensor& input1,
- const Tensor& input2,
- Tensor* output,
+ const C10Tensor& input1,
+ const C10Tensor& input2,
+ const C10Tensor& output,
bool legacy_broadcast,
int axis,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 2;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 6> parameter_names = {
{"input1", "input2", "output", "legacy_broadcast", "axis", "context"}};
};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::Relu);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
namespace caffe2 {
static constexpr const char* name = "relu";
using Signature =
- void(const Tensor& input, Tensor* output);
+ void(const C10Tensor& input, const C10Tensor& output);
+
+ static constexpr size_t num_dispatch_args() {return 1;}
+
+ static constexpr size_t num_outputs() {return 1;}
static constexpr c10::guts::array<const char*, 2> parameter_names = {
{"input", "output"}};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::Sigmoid);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
namespace caffe2 {
static constexpr const char* name = "sigmoid";
using Signature =
- void(const Tensor& input, Tensor* output);
+ void(const C10Tensor& input, const C10Tensor& output);
+
+ static constexpr size_t num_dispatch_args() {return 1;}
+
+ static constexpr size_t num_outputs() {return 1;}
static constexpr c10::guts::array<const char*, 2> parameter_names = {
{"input", "output"}};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::SigmoidCrossEntropyWithLogits);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
namespace caffe2 {
static constexpr const char* name = "sigmoid_cross_entropy_with_logits";
using Signature = void(
- const Tensor& input1,
- const Tensor& input2,
- Tensor* output,
+ const C10Tensor& input1,
+ const C10Tensor& input2,
+ const C10Tensor& output,
bool log_D_trick,
bool unjoined_lr_loss);
+ static constexpr size_t num_dispatch_args() {return 2;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 5> parameter_names = {
{"input1", "input2", "output", "log_d_trick", "unjoined_lr_loss"}};
};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::SparseLengthsSum);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
namespace caffe2 {
static constexpr const char* name = "sparse_lengths_sum";
using Signature = void(
- const Tensor& data,
- const Tensor& indices,
- const Tensor& lengths,
- Tensor* output);
+ const C10Tensor& data,
+ const C10Tensor& indices,
+ const C10Tensor& lengths,
+ const C10Tensor& output);
+
+ static constexpr size_t num_dispatch_args() {return 3;}
+
+ static constexpr size_t num_outputs() {return 1;}
static constexpr c10::guts::array<const char*, 4> parameter_names = {
{"data", "indices", "lengths", "output"}};
#include "caffe2/core/operator_c10wrapper.h"
using caffe2::CPUContext;
-using caffe2::Tensor;
C10_DEFINE_OP_SCHEMA(caffe2::ops::StopGradient);
#pragma once
-#include "caffe2/core/tensor.h"
+#include <c10/core/Tensor.h>
#include <c10/util/Array.h>
+#include "caffe2/core/context_base.h"
namespace caffe2 {
namespace ops {
static constexpr const char* name = "stop_gradient";
using Signature = void(
- const Tensor& input,
- Tensor* output,
+ const C10Tensor& input,
+ const C10Tensor& output,
BaseContext* context);
+ static constexpr size_t num_dispatch_args() {return 1;}
+
+ static constexpr size_t num_outputs() {return 1;}
+
static constexpr c10::guts::array<const char*, 3> parameter_names = {
{"input", "output", "context"}};
};
namespace {
template <class DataType>
void layer_norm_c10(
- const caffe2::Tensor& X,
- caffe2::Tensor* Y,
- caffe2::Tensor* mean,
- caffe2::Tensor* sig,
+ const c10::C10Tensor& X_,
+ const c10::C10Tensor& Y_,
+ const c10::C10Tensor& mean_,
+ const c10::C10Tensor& sig_,
int axis,
float epsilon,
caffe2::ops::LayerNorm::Cache* cache,
caffe2::BaseContext* context) {
+ caffe2::Tensor X(X_);
+ caffe2::Tensor Y(Y_);
+ caffe2::Tensor mean(mean_);
+ caffe2::Tensor sig(sig_);
+ if (!cache->scale.has_value()) {
+ cache->scale = c10::C10Tensor(caffe2::Tensor{caffe2::CPU});
+ }
+ if (!cache->bias.has_value()) {
+ cache->bias = c10::C10Tensor(caffe2::Tensor{caffe2::CPU});
+ }
+ caffe2::Tensor scale(*cache->scale);
+ caffe2::Tensor bias(*cache->bias);
+
const int canonical_axis = X.canonical_axis_index(axis);
std::vector<int64_t> moments_dims(
X.sizes().cbegin(), X.sizes().cbegin() + canonical_axis);
moments_dims.push_back(1);
- mean->Resize(moments_dims);
- sig->Resize(moments_dims);
+ mean.Resize(moments_dims);
+ sig.Resize(moments_dims);
caffe2::LayerNormOp<caffe2::CPUContext>::runLayerNorm<DataType>(
- X, Y, mean, sig, canonical_axis, epsilon, &cache->scale, &cache->bias, static_cast<caffe2::CPUContext*>(context)
+ X, &Y, &mean, &sig, canonical_axis, epsilon, &scale, &bias, static_cast<caffe2::CPUContext*>(context)
);
}
}