1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #include "../precomp.hpp"
6 #include "layers_common.hpp"
17 class ReduceLayerInt8Impl CV_FINAL : public ReduceLayerInt8
20 ReduceLayerInt8Impl(const LayerParams& params)
23 CV_Assert(params.has("reduce"));
24 String typeString = toLowerCase(params.get<String>("reduce"));
25 if (typeString == "max")
27 else if (typeString == "min")
30 CV_Error(Error::StsBadArg, "Unknown reduce type \"" + typeString + "\"");
33 CV_Assert(params.has("deleted_dims"));
34 DictValue tempDims = params.get("deleted_dims");
35 int i, n = tempDims.size();
37 for (i = 0; i < n; i++)
39 reduceDims[i] = tempDims.get<int>(i);
43 virtual bool supportBackend(int backendId) CV_OVERRIDE
45 if (backendId == DNN_BACKEND_OPENCV)
55 int8_t apply(const int8_t* first, const int8_t* last)
57 return std::accumulate(first, last, *first,
58 [](int8_t a, int8_t b)
60 return std::min(a, b);
68 int8_t apply(const int8_t* first, const int8_t* last)
70 return std::accumulate(first, last, *first,
71 [](int8_t a, int8_t b)
73 return std::max(a, b);
78 template<typename Func>
79 class ReduceInvoker : public ParallelLoopBody
84 std::vector<size_t> reduceDims;
89 ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr<Func>()) {}
91 static void run(const Mat& src, Mat& dst, std::vector<size_t> reduceDims, int reduceType, int nstripes)
93 CV_Assert_N(src.isContinuous(), dst.isContinuous(), src.type() == CV_8S, src.type() == dst.type());
95 ReduceInvoker<Func> p;
100 p.reduceDims = reduceDims;
101 p.nstripes = nstripes;
102 p.reduceType = reduceType;
104 parallel_for_(Range(0, nstripes), p, nstripes);
107 void operator()(const Range& r) const CV_OVERRIDE
109 size_t total = dst->total();
110 size_t stripeSize = (total + nstripes - 1)/nstripes;
111 size_t stripeStart = r.start*stripeSize;
112 size_t stripeEnd = std::min(r.end*stripeSize, total);
113 size_t totalDeleted = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
115 int8_t *dstData = (int8_t *)dst->data;
116 int8_t *srcData = (int8_t *)src->data;
118 for (size_t ofs = stripeStart; ofs < stripeEnd;)
120 const int8_t* first = srcData + ofs * totalDeleted;
121 const int8_t* last = srcData + (ofs + 1) * totalDeleted;
123 dstData[ofs] = func->apply(first, last);
129 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
132 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
134 std::vector<Mat> inputs, outputs;
135 inputs_arr.getMatVector(inputs);
136 outputs_arr.getMatVector(outputs);
137 CV_Assert(inputs.size() == 1);
138 const int nstripes = getNumThreads();
144 ReduceInvoker<ReduceOpMIN>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
149 ReduceInvoker<ReduceOpMAX>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
153 CV_Error(Error::StsNotImplemented, "Not implemented");
158 bool getMemoryShapes(const std::vector<MatShape> &inputs,
159 const int requiredOutputs,
160 std::vector<MatShape> &outputs,
161 std::vector<MatShape> &internals) const CV_OVERRIDE
163 CV_Assert(inputs.size() > 0);
164 CV_Assert(reduceDims.size() != 0 && inputs[0].size() >= reduceDims.size());
166 std::vector<int> outShape;
167 if (inputs[0].size() == reduceDims.size())
168 outShape.push_back(1);
171 for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++)
173 outShape.push_back(inputs[0][i]);
176 outputs.assign(1, outShape);
181 virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
182 const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
187 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
188 const std::vector<MatShape> &outputs) const CV_OVERRIDE
190 CV_UNUSED(inputs); // suppress unused variable warning
192 size_t totalDeleted = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
193 for (int i = 0; i < outputs.size(); i++)
195 flops += total(outputs[i])*(totalDeleted);
207 Ptr<ReduceLayerInt8> ReduceLayerInt8::create(const LayerParams& params)
209 return Ptr<ReduceLayerInt8>(new ReduceLayerInt8Impl(params));