The `input` will be split into `K` parts. Each part of length
`sum(lengths[i*k:i*k+k))`)DOC");
-namespace {
OpSchema::Cost CostInferenceForConcat(
const OperatorDef& def,
const vector<TensorShape>& in) {
return cost;
}
+namespace {
std::pair<std::vector<DeviceOption>, std::vector<DeviceOption>>
concatOpDevInfer(const OperatorDef& def) {
auto op_device =
}
} // namespace
+vector<TensorShape> TensorInferenceForConcat(
+ const OperatorDef& def,
+ const vector<TensorShape>& in) {
+ ArgumentHelper helper(def);
+ const int axis = helper.HasArgument("axis")
+ ? helper.GetSingleArgument<int>("axis", -1)
+ : GetDimFromOrderString(
+ helper.GetSingleArgument<string>("order", "NCHW"));
+ bool add_axis = helper.GetSingleArgument<int>("add_axis", 0) != 0;
+ int adj_size = in[0].dims_size() + (add_axis ? 1 : 0);
+ const int canonical_axis = canonical_axis_index_(axis, adj_size);
+ CAFFE_ENFORCE_LT(canonical_axis, adj_size, "Axis not in input ndim range.");
+ CAFFE_ENFORCE_GT(in.size(), 0);
+ vector<int> split_shape(1, in.size());
+ vector<int> out_shape(in[0].dims().begin(), in[0].dims().end());
+ if (add_axis) {
+ for (int i = 1; i < in.size(); ++i) {
+ CAFFE_ENFORCE_EQ(
+ in[0].dims().size(),
+ in[i].dims().size(),
+ "All inputs of Concat should have same dims when add_axis = 1. "
+ "Got different sizes for inputs 0 and ",
+ i);
+ for (int j = 0; j < in[0].dims().size(); ++j) {
+ CAFFE_ENFORCE_EQ(
+ in[0].dims(j),
+ in[i].dims(j),
+ "All inputs of Concat should have same dims when add_axis = 1. "
+ "Got different dims for inputs 0 and ",
+ i,
+ ". At dim: ",
+ j);
+ }
+ }
+ out_shape.insert(out_shape.begin() + canonical_axis, in.size());
+ } else {
+ for (int i = 1; i < in.size(); ++i) {
+ CAFFE_ENFORCE_EQ(
+ in[0].dims().size(),
+ in[i].dims().size(),
+ "All inputs of Concat should have same dims except "
+ "canonical_axis dim that is equal to ",
+ canonical_axis,
+ "Got different sizes for inputs 0 and ",
+ i);
+ for (int j = 0; j < in[0].dims().size(); ++j) {
+ if (j == canonical_axis) {
+ continue;
+ }
+ CAFFE_ENFORCE_EQ(
+ in[0].dims(j),
+ in[i].dims(j),
+ "All inputs of Concat should have same dims except "
+ "canonical_axis dim that is equal to ",
+ canonical_axis,
+ "Got different dims for inputs 0 and ",
+ i,
+ ". At dim: ",
+ j);
+ }
+ }
+
+ for (int i = 1; i < in.size(); ++i) {
+ out_shape[canonical_axis] += in[i].dims(canonical_axis);
+ }
+ }
+ if (def.output_size() == 1) {
+ return vector<TensorShape>{CreateTensorShape(out_shape, in[0].data_type())};
+ }
+ return vector<TensorShape>{
+ CreateTensorShape(out_shape, in[0].data_type()),
+ CreateTensorShape(split_shape, TensorProto::INT32)};
+}
+
REGISTER_CPU_OPERATOR(Concat, ConcatOp<CPUContext>);
OPERATOR_SCHEMA(Concat)
.NumInputs(1, INT_MAX)
.Arg(
"add_axis",
"*(type: int)* Pass non-zero integer to add the axis specified in `axis` to all input tensors.")
- .TensorInferenceFunction(OpSchema::NeedsAllInputShapes([](const OperatorDef&
- def,
- const vector<
- TensorShape>&
- in) {
- ArgumentHelper helper(def);
- const int axis = helper.HasArgument("axis")
- ? helper.GetSingleArgument<int>("axis", -1)
- : GetDimFromOrderString(
- helper.GetSingleArgument<string>("order", "NCHW"));
- bool add_axis = helper.GetSingleArgument<int>("add_axis", 0) != 0;
- int adj_size = in[0].dims_size() + (add_axis ? 1 : 0);
- const int canonical_axis = canonical_axis_index_(axis, adj_size);
- CAFFE_ENFORCE_LT(
- canonical_axis, adj_size, "Axis not in input ndim range.");
- CAFFE_ENFORCE_GT(in.size(), 0);
- vector<int> split_shape(1, in.size());
- vector<int> out_shape(in[0].dims().begin(), in[0].dims().end());
- if (add_axis) {
- for (int i = 1; i < in.size(); ++i) {
- CAFFE_ENFORCE_EQ(
- in[0].dims().size(),
- in[i].dims().size(),
- "All inputs of Concat should have same dims when add_axis = 1. "
- "Got different sizes for inputs 0 and ",
- i);
- for (int j = 0; j < in[0].dims().size(); ++j) {
- CAFFE_ENFORCE_EQ(
- in[0].dims(j),
- in[i].dims(j),
- "All inputs of Concat should have same dims when add_axis = 1. "
- "Got different dims for inputs 0 and ",
- i,
- ". At dim: ",
- j);
- }
- }
- out_shape.insert(out_shape.begin() + canonical_axis, in.size());
- } else {
- for (int i = 1; i < in.size(); ++i) {
- CAFFE_ENFORCE_EQ(
- in[0].dims().size(),
- in[i].dims().size(),
- "All inputs of Concat should have same dims except "
- "canonical_axis dim that is equal to ",
- canonical_axis,
- "Got different sizes for inputs 0 and ",
- i);
- for (int j = 0; j < in[0].dims().size(); ++j) {
- if (j == canonical_axis) {
- continue;
- }
- CAFFE_ENFORCE_EQ(
- in[0].dims(j),
- in[i].dims(j),
- "All inputs of Concat should have same dims except "
- "canonical_axis dim that is equal to ",
- canonical_axis,
- "Got different dims for inputs 0 and ",
- i,
- ". At dim: ",
- j);
- }
- }
-
- for (int i = 1; i < in.size(); ++i) {
- out_shape[canonical_axis] += in[i].dims(canonical_axis);
- }
- }
- if (def.output_size() == 1) {
- return vector<TensorShape>{
- CreateTensorShape(out_shape, in[0].data_type())};
- }
- return vector<TensorShape>{
- CreateTensorShape(out_shape, in[0].data_type()),
- CreateTensorShape(split_shape, TensorProto::INT32)};
- }))
+ .TensorInferenceFunction(
+ OpSchema::NeedsAllInputShapes(TensorInferenceForConcat))
.CostInferenceFunction(CostInferenceForConcat)
.DeviceInferenceFunction(concatOpDevInfer)
.SetDoc(R"DOC(
return true;
}
+OpSchema::Cost CostInferenceForConcat(
+ const OperatorDef& def,
+ const std::vector<TensorShape>& in);
+
+std::vector<TensorShape> TensorInferenceForConcat(
+ const OperatorDef& def,
+ const std::vector<TensorShape>& in);
+
} // namespace caffe2
#endif // CAFFE2_OPERATORS_CONCAT_SPLIT_OP_H_
namespace caffe2 {
-namespace {
-OpSchema::Cost CostInferenceForSum(
- const OperatorDef& def,
- const vector<TensorShape>& in) {
- struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in);
- cost.flops *= (in.size() - 1);
- cost.params_bytes = 0;
- return cost;
-}
-} // namespace
-
REGISTER_CPU_OPERATOR(Sum, SumOp<CPUContext>);
OPERATOR_SCHEMA(Sum)
OPERATOR_SCHEMA(Flatten)
.NumInputs(1)
.NumOutputs(1)
- .TensorInferenceFunction([](const OperatorDef& def,
- const vector<TensorShape>& in) {
- ArgumentHelper helper(def);
- const int axis = helper.GetSingleArgument<int>("axis", 1);
- vector<TensorShape> out(1);
- int64_t outer = 1;
- int64_t inner = 1;
- std::size_t index = 0;
- for (auto d : in[0].dims()) {
- if (index < axis) {
- outer *= d;
- } else {
- inner *= d;
- }
- ++index;
- }
- out[0].set_data_type(in[0].data_type());
- out[0].add_dims(outer);
- out[0].add_dims(inner);
- return out;
- })
+ .TensorInferenceFunction(TensorInferenceForFlatten)
.SetDoc(R"DOC(
Flattens the input tensor into a 2D matrix. If input tensor has shape
$(d_0, d_1, ..., d_n)$ then the output will have shape
int axis_;
};
+inline std::vector<TensorShape> TensorInferenceForFlatten(
+ const OperatorDef& def,
+ const std::vector<TensorShape>& in) {
+ ArgumentHelper helper(def);
+ const int axis = helper.GetSingleArgument<int>("axis", 1);
+ std::vector<TensorShape> out(1);
+ int64_t outer = 1;
+ int64_t inner = 1;
+ std::size_t index = 0;
+ for (auto d : in[0].dims()) {
+ if (index < axis) {
+ outer *= d;
+ } else {
+ inner *= d;
+ }
+ ++index;
+ }
+ out[0].set_data_type(in[0].data_type());
+ out[0].add_dims(outer);
+ out[0].add_dims(inner);
+ return out;
+}
+
} // namespace caffe2
#endif // CAFFE2_OPERATORS_FLATTEN_OP_H_
+#include "caffe2/operators/quantized/int8_add_op.h"
+
#include <climits>
-#include "caffe2/operators/quantized/int8_add_op.h"
+#include "caffe2/operators/utility_ops.h"
namespace caffe2 {
.NumInputs(1, std::numeric_limits<int>::max())
.NumOutputs(1)
.AllowInplace({{0, 0}, {1, 0}})
+ .CostInferenceFunction(CostInferenceForSum)
+ .IdenticalTypeAndShapeOfInput(0)
.Arg("Y_scale", "Output tensor quantization scale")
.Arg("Y_zero_point", "Output tensor quantization offset");
.NumInputs(1, std::numeric_limits<int>::max())
.NumOutputs(1)
.AllowInplace({{0, 0}, {1, 0}})
+ .CostInferenceFunction(CostInferenceForSum)
+ .IdenticalTypeAndShapeOfInput(0)
.Arg("Y_scale", "Output tensor quantization scale")
.Arg("Y_zero_point", "Output tensor quantization offset");
#include "caffe2/operators/quantized/int8_concat_op.h"
+#include "caffe2/operators/concat_split_op.h"
+
namespace caffe2 {
REGISTER_CPU_OPERATOR(Int8Concat, int8::Int8ConcatOp);
"add_axis",
"Pass 1 to add the axis specified in arg 'axis' to all "
"input tensors")
+ .TensorInferenceFunction(
+ OpSchema::NeedsAllInputShapes(TensorInferenceForConcat))
+ .CostInferenceFunction(CostInferenceForConcat)
.SetDoc("Concatenate a list of tensors into a single tensor")
.Output(0, "concat_result", "Concatenated tensor")
.Output(1, "split_info", "The dimensions of the inputs.")
#include "caffe2/operators/quantized/int8_fc_op.h"
+#include <functional>
+
+#include "caffe2/operators/fc_inference.h"
+
namespace caffe2 {
REGISTER_CPU_OPERATOR(Int8FC, int8::Int8FCOp);
+using namespace std::placeholders;
OPERATOR_SCHEMA(Int8FC)
.NumInputs(3)
.NumOutputs(1)
+ .TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, false))
+ .CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2, false))
.SetDoc(R"DOC(
Computes the result of passing an input vector X into a fully
connected layer with 2D weight matrix W and 1D bias vector b. That is,
#include "caffe2/operators/quantized/int8_flatten_op.h"
+#include "caffe2/operators/flatten_op.h"
+
namespace caffe2 {
REGISTER_CPU_OPERATOR(Int8Flatten, int8::Int8FlattenOp);
OPERATOR_SCHEMA(Int8Flatten)
.NumInputs(1)
.NumOutputs(1)
+ .TensorInferenceFunction(TensorInferenceForFlatten)
.SetDoc(R"DOC(
Flattens the input tensor into a 2D matrix. If input tensor has shape
(d_0, d_1, ... d_n) then the output will have shape
.SetDoc(R"DOC(
Creates quantized tensor of type char(byte) with scale and zero point info.
)DOC")
- .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info");
+ .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info")
+ .TensorInferenceFunction(FillerTensorInference<>);
OPERATOR_SCHEMA(Int8GivenIntTensorFill)
.NumInputs(0)
.SetDoc(R"DOC(
Creates quantized tensor of type int32 with scale and zero point info.
)DOC")
- .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info");
+ .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info")
+ .TensorInferenceFunction(FillerTensorInference<>);
REGISTER_CPU_OPERATOR(Int8GivenTensorFill, int8::Int8GivenTensorFillOp);
REGISTER_CPU_OPERATOR(Int8GivenIntTensorFill, int8::Int8GivenIntTensorFillOp);
}
};
+inline OpSchema::Cost CostInferenceForSum(
+ const OperatorDef& def,
+ const std::vector<TensorShape>& in) {
+ struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in);
+ cost.flops *= (in.size() - 1);
+ cost.params_bytes = 0;
+ return cost;
+}
+
// WeightedSumOp computes the weighted sum of several tensors. The input should
// be in the form X_0, weight_0, X_1, weight_1, ... where X_i all have the same
// shape, and weight_i are size 1 tensors that specifies the weight of each