From: Jongsoo Park Date: Thu, 7 Mar 2019 07:26:27 +0000 (-0800) Subject: add tensor and cost inference functions (#17684) X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~951 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=39423fbdd41b5237a70afe711deaa742bca252fe;p=platform%2Fupstream%2Fpytorch.git add tensor and cost inference functions (#17684) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/17684 Adding tensor and cost inference functions to more int8 operators. Reviewed By: yinghai Differential Revision: D14174746 fbshipit-source-id: dfad975fa75899565c8fb61f1b7747a9206ebd22 --- diff --git a/caffe2/operators/concat_split_op.cc b/caffe2/operators/concat_split_op.cc index 57ef45c..ff66578 100644 --- a/caffe2/operators/concat_split_op.cc +++ b/caffe2/operators/concat_split_op.cc @@ -106,7 +106,6 @@ Split a tensor into a list of tensors, given a lengths input, along the specifie The `input` will be split into `K` parts. Each part of length `sum(lengths[i*k:i*k+k))`)DOC"); -namespace { OpSchema::Cost CostInferenceForConcat( const OperatorDef& def, const vector& in) { @@ -143,6 +142,7 @@ OpSchema::Cost CostInferenceForConcat( return cost; } +namespace { std::pair, std::vector> concatOpDevInfer(const OperatorDef& def) { auto op_device = @@ -157,6 +157,80 @@ concatOpDevInfer(const OperatorDef& def) { } } // namespace +vector TensorInferenceForConcat( + const OperatorDef& def, + const vector& in) { + ArgumentHelper helper(def); + const int axis = helper.HasArgument("axis") + ? helper.GetSingleArgument("axis", -1) + : GetDimFromOrderString( + helper.GetSingleArgument("order", "NCHW")); + bool add_axis = helper.GetSingleArgument("add_axis", 0) != 0; + int adj_size = in[0].dims_size() + (add_axis ? 1 : 0); + const int canonical_axis = canonical_axis_index_(axis, adj_size); + CAFFE_ENFORCE_LT(canonical_axis, adj_size, "Axis not in input ndim range."); + CAFFE_ENFORCE_GT(in.size(), 0); + vector split_shape(1, in.size()); + vector out_shape(in[0].dims().begin(), in[0].dims().end()); + if (add_axis) { + for (int i = 1; i < in.size(); ++i) { + CAFFE_ENFORCE_EQ( + in[0].dims().size(), + in[i].dims().size(), + "All inputs of Concat should have same dims when add_axis = 1. " + "Got different sizes for inputs 0 and ", + i); + for (int j = 0; j < in[0].dims().size(); ++j) { + CAFFE_ENFORCE_EQ( + in[0].dims(j), + in[i].dims(j), + "All inputs of Concat should have same dims when add_axis = 1. " + "Got different dims for inputs 0 and ", + i, + ". At dim: ", + j); + } + } + out_shape.insert(out_shape.begin() + canonical_axis, in.size()); + } else { + for (int i = 1; i < in.size(); ++i) { + CAFFE_ENFORCE_EQ( + in[0].dims().size(), + in[i].dims().size(), + "All inputs of Concat should have same dims except " + "canonical_axis dim that is equal to ", + canonical_axis, + "Got different sizes for inputs 0 and ", + i); + for (int j = 0; j < in[0].dims().size(); ++j) { + if (j == canonical_axis) { + continue; + } + CAFFE_ENFORCE_EQ( + in[0].dims(j), + in[i].dims(j), + "All inputs of Concat should have same dims except " + "canonical_axis dim that is equal to ", + canonical_axis, + "Got different dims for inputs 0 and ", + i, + ". At dim: ", + j); + } + } + + for (int i = 1; i < in.size(); ++i) { + out_shape[canonical_axis] += in[i].dims(canonical_axis); + } + } + if (def.output_size() == 1) { + return vector{CreateTensorShape(out_shape, in[0].data_type())}; + } + return vector{ + CreateTensorShape(out_shape, in[0].data_type()), + CreateTensorShape(split_shape, TensorProto::INT32)}; +} + REGISTER_CPU_OPERATOR(Concat, ConcatOp); OPERATOR_SCHEMA(Concat) .NumInputs(1, INT_MAX) @@ -168,83 +242,8 @@ OPERATOR_SCHEMA(Concat) .Arg( "add_axis", "*(type: int)* Pass non-zero integer to add the axis specified in `axis` to all input tensors.") - .TensorInferenceFunction(OpSchema::NeedsAllInputShapes([](const OperatorDef& - def, - const vector< - TensorShape>& - in) { - ArgumentHelper helper(def); - const int axis = helper.HasArgument("axis") - ? helper.GetSingleArgument("axis", -1) - : GetDimFromOrderString( - helper.GetSingleArgument("order", "NCHW")); - bool add_axis = helper.GetSingleArgument("add_axis", 0) != 0; - int adj_size = in[0].dims_size() + (add_axis ? 1 : 0); - const int canonical_axis = canonical_axis_index_(axis, adj_size); - CAFFE_ENFORCE_LT( - canonical_axis, adj_size, "Axis not in input ndim range."); - CAFFE_ENFORCE_GT(in.size(), 0); - vector split_shape(1, in.size()); - vector out_shape(in[0].dims().begin(), in[0].dims().end()); - if (add_axis) { - for (int i = 1; i < in.size(); ++i) { - CAFFE_ENFORCE_EQ( - in[0].dims().size(), - in[i].dims().size(), - "All inputs of Concat should have same dims when add_axis = 1. " - "Got different sizes for inputs 0 and ", - i); - for (int j = 0; j < in[0].dims().size(); ++j) { - CAFFE_ENFORCE_EQ( - in[0].dims(j), - in[i].dims(j), - "All inputs of Concat should have same dims when add_axis = 1. " - "Got different dims for inputs 0 and ", - i, - ". At dim: ", - j); - } - } - out_shape.insert(out_shape.begin() + canonical_axis, in.size()); - } else { - for (int i = 1; i < in.size(); ++i) { - CAFFE_ENFORCE_EQ( - in[0].dims().size(), - in[i].dims().size(), - "All inputs of Concat should have same dims except " - "canonical_axis dim that is equal to ", - canonical_axis, - "Got different sizes for inputs 0 and ", - i); - for (int j = 0; j < in[0].dims().size(); ++j) { - if (j == canonical_axis) { - continue; - } - CAFFE_ENFORCE_EQ( - in[0].dims(j), - in[i].dims(j), - "All inputs of Concat should have same dims except " - "canonical_axis dim that is equal to ", - canonical_axis, - "Got different dims for inputs 0 and ", - i, - ". At dim: ", - j); - } - } - - for (int i = 1; i < in.size(); ++i) { - out_shape[canonical_axis] += in[i].dims(canonical_axis); - } - } - if (def.output_size() == 1) { - return vector{ - CreateTensorShape(out_shape, in[0].data_type())}; - } - return vector{ - CreateTensorShape(out_shape, in[0].data_type()), - CreateTensorShape(split_shape, TensorProto::INT32)}; - })) + .TensorInferenceFunction( + OpSchema::NeedsAllInputShapes(TensorInferenceForConcat)) .CostInferenceFunction(CostInferenceForConcat) .DeviceInferenceFunction(concatOpDevInfer) .SetDoc(R"DOC( diff --git a/caffe2/operators/concat_split_op.h b/caffe2/operators/concat_split_op.h index 53821ed..47ed663 100644 --- a/caffe2/operators/concat_split_op.h +++ b/caffe2/operators/concat_split_op.h @@ -335,6 +335,14 @@ bool ConcatOp::RunOnDevice() { return true; } +OpSchema::Cost CostInferenceForConcat( + const OperatorDef& def, + const std::vector& in); + +std::vector TensorInferenceForConcat( + const OperatorDef& def, + const std::vector& in); + } // namespace caffe2 #endif // CAFFE2_OPERATORS_CONCAT_SPLIT_OP_H_ diff --git a/caffe2/operators/elementwise_sum_op.cc b/caffe2/operators/elementwise_sum_op.cc index 2668f24..8371541 100644 --- a/caffe2/operators/elementwise_sum_op.cc +++ b/caffe2/operators/elementwise_sum_op.cc @@ -2,17 +2,6 @@ namespace caffe2 { -namespace { -OpSchema::Cost CostInferenceForSum( - const OperatorDef& def, - const vector& in) { - struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in); - cost.flops *= (in.size() - 1); - cost.params_bytes = 0; - return cost; -} -} // namespace - REGISTER_CPU_OPERATOR(Sum, SumOp); OPERATOR_SCHEMA(Sum) diff --git a/caffe2/operators/flatten_op.cc b/caffe2/operators/flatten_op.cc index 3086b9a..a11867d 100644 --- a/caffe2/operators/flatten_op.cc +++ b/caffe2/operators/flatten_op.cc @@ -7,27 +7,7 @@ REGISTER_CPU_OPERATOR(Flatten, FlattenOp); OPERATOR_SCHEMA(Flatten) .NumInputs(1) .NumOutputs(1) - .TensorInferenceFunction([](const OperatorDef& def, - const vector& in) { - ArgumentHelper helper(def); - const int axis = helper.GetSingleArgument("axis", 1); - vector out(1); - int64_t outer = 1; - int64_t inner = 1; - std::size_t index = 0; - for (auto d : in[0].dims()) { - if (index < axis) { - outer *= d; - } else { - inner *= d; - } - ++index; - } - out[0].set_data_type(in[0].data_type()); - out[0].add_dims(outer); - out[0].add_dims(inner); - return out; - }) + .TensorInferenceFunction(TensorInferenceForFlatten) .SetDoc(R"DOC( Flattens the input tensor into a 2D matrix. If input tensor has shape $(d_0, d_1, ..., d_n)$ then the output will have shape diff --git a/caffe2/operators/flatten_op.h b/caffe2/operators/flatten_op.h index cdc8d83..401e6fb 100644 --- a/caffe2/operators/flatten_op.h +++ b/caffe2/operators/flatten_op.h @@ -33,6 +33,29 @@ class FlattenOp : public Operator { int axis_; }; +inline std::vector TensorInferenceForFlatten( + const OperatorDef& def, + const std::vector& in) { + ArgumentHelper helper(def); + const int axis = helper.GetSingleArgument("axis", 1); + std::vector out(1); + int64_t outer = 1; + int64_t inner = 1; + std::size_t index = 0; + for (auto d : in[0].dims()) { + if (index < axis) { + outer *= d; + } else { + inner *= d; + } + ++index; + } + out[0].set_data_type(in[0].data_type()); + out[0].add_dims(outer); + out[0].add_dims(inner); + return out; +} + } // namespace caffe2 #endif // CAFFE2_OPERATORS_FLATTEN_OP_H_ diff --git a/caffe2/operators/quantized/int8_add_op.cc b/caffe2/operators/quantized/int8_add_op.cc index 225a7a1..b1b7be2 100644 --- a/caffe2/operators/quantized/int8_add_op.cc +++ b/caffe2/operators/quantized/int8_add_op.cc @@ -1,6 +1,8 @@ +#include "caffe2/operators/quantized/int8_add_op.h" + #include -#include "caffe2/operators/quantized/int8_add_op.h" +#include "caffe2/operators/utility_ops.h" namespace caffe2 { @@ -55,6 +57,8 @@ OPERATOR_SCHEMA(Int8Sum) .NumInputs(1, std::numeric_limits::max()) .NumOutputs(1) .AllowInplace({{0, 0}, {1, 0}}) + .CostInferenceFunction(CostInferenceForSum) + .IdenticalTypeAndShapeOfInput(0) .Arg("Y_scale", "Output tensor quantization scale") .Arg("Y_zero_point", "Output tensor quantization offset"); @@ -62,6 +66,8 @@ OPERATOR_SCHEMA(Int8SumRelu) .NumInputs(1, std::numeric_limits::max()) .NumOutputs(1) .AllowInplace({{0, 0}, {1, 0}}) + .CostInferenceFunction(CostInferenceForSum) + .IdenticalTypeAndShapeOfInput(0) .Arg("Y_scale", "Output tensor quantization scale") .Arg("Y_zero_point", "Output tensor quantization offset"); diff --git a/caffe2/operators/quantized/int8_concat_op.cc b/caffe2/operators/quantized/int8_concat_op.cc index 8950d41..ae601a3 100644 --- a/caffe2/operators/quantized/int8_concat_op.cc +++ b/caffe2/operators/quantized/int8_concat_op.cc @@ -1,5 +1,7 @@ #include "caffe2/operators/quantized/int8_concat_op.h" +#include "caffe2/operators/concat_split_op.h" + namespace caffe2 { REGISTER_CPU_OPERATOR(Int8Concat, int8::Int8ConcatOp); @@ -14,6 +16,9 @@ OPERATOR_SCHEMA(Int8Concat) "add_axis", "Pass 1 to add the axis specified in arg 'axis' to all " "input tensors") + .TensorInferenceFunction( + OpSchema::NeedsAllInputShapes(TensorInferenceForConcat)) + .CostInferenceFunction(CostInferenceForConcat) .SetDoc("Concatenate a list of tensors into a single tensor") .Output(0, "concat_result", "Concatenated tensor") .Output(1, "split_info", "The dimensions of the inputs.") diff --git a/caffe2/operators/quantized/int8_fc_op.cc b/caffe2/operators/quantized/int8_fc_op.cc index ee7d605..0a8e504 100644 --- a/caffe2/operators/quantized/int8_fc_op.cc +++ b/caffe2/operators/quantized/int8_fc_op.cc @@ -1,12 +1,19 @@ #include "caffe2/operators/quantized/int8_fc_op.h" +#include + +#include "caffe2/operators/fc_inference.h" + namespace caffe2 { REGISTER_CPU_OPERATOR(Int8FC, int8::Int8FCOp); +using namespace std::placeholders; OPERATOR_SCHEMA(Int8FC) .NumInputs(3) .NumOutputs(1) + .TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, false)) + .CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2, false)) .SetDoc(R"DOC( Computes the result of passing an input vector X into a fully connected layer with 2D weight matrix W and 1D bias vector b. That is, diff --git a/caffe2/operators/quantized/int8_flatten_op.cc b/caffe2/operators/quantized/int8_flatten_op.cc index a9d5a5e..14e1381 100644 --- a/caffe2/operators/quantized/int8_flatten_op.cc +++ b/caffe2/operators/quantized/int8_flatten_op.cc @@ -1,5 +1,7 @@ #include "caffe2/operators/quantized/int8_flatten_op.h" +#include "caffe2/operators/flatten_op.h" + namespace caffe2 { REGISTER_CPU_OPERATOR(Int8Flatten, int8::Int8FlattenOp); @@ -7,6 +9,7 @@ REGISTER_CPU_OPERATOR(Int8Flatten, int8::Int8FlattenOp); OPERATOR_SCHEMA(Int8Flatten) .NumInputs(1) .NumOutputs(1) + .TensorInferenceFunction(TensorInferenceForFlatten) .SetDoc(R"DOC( Flattens the input tensor into a 2D matrix. If input tensor has shape (d_0, d_1, ... d_n) then the output will have shape diff --git a/caffe2/operators/quantized/int8_given_tensor_fill_op.cc b/caffe2/operators/quantized/int8_given_tensor_fill_op.cc index dc108d1..709c222 100644 --- a/caffe2/operators/quantized/int8_given_tensor_fill_op.cc +++ b/caffe2/operators/quantized/int8_given_tensor_fill_op.cc @@ -12,7 +12,8 @@ OPERATOR_SCHEMA(Int8GivenTensorFill) .SetDoc(R"DOC( Creates quantized tensor of type char(byte) with scale and zero point info. )DOC") - .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info"); + .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info") + .TensorInferenceFunction(FillerTensorInference<>); OPERATOR_SCHEMA(Int8GivenIntTensorFill) .NumInputs(0) @@ -24,7 +25,8 @@ OPERATOR_SCHEMA(Int8GivenIntTensorFill) .SetDoc(R"DOC( Creates quantized tensor of type int32 with scale and zero point info. )DOC") - .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info"); + .Output(0, "Tensor", "An Int8TensorCPU with scale and zero point info") + .TensorInferenceFunction(FillerTensorInference<>); REGISTER_CPU_OPERATOR(Int8GivenTensorFill, int8::Int8GivenTensorFillOp); REGISTER_CPU_OPERATOR(Int8GivenIntTensorFill, int8::Int8GivenIntTensorFillOp); diff --git a/caffe2/operators/utility_ops.h b/caffe2/operators/utility_ops.h index a77ee9a..7b92b2c 100644 --- a/caffe2/operators/utility_ops.h +++ b/caffe2/operators/utility_ops.h @@ -317,6 +317,15 @@ class SumOp : public Operator { } }; +inline OpSchema::Cost CostInferenceForSum( + const OperatorDef& def, + const std::vector& in) { + struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in); + cost.flops *= (in.size() - 1); + cost.params_bytes = 0; + return cost; +} + // WeightedSumOp computes the weighted sum of several tensors. The input should // be in the form X_0, weight_0, X_1, weight_1, ... where X_i all have the same // shape, and weight_i are size 1 tensors that specifies the weight of each