1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #include "../precomp.hpp"
6 #include "opencv2/core/hal/intrin.hpp"
7 #include "../op_cuda.hpp"
8 #include "../op_webnn.hpp"
16 #include <opencv2/core/utils/logger.hpp>
23 class ReduceLayerImpl CV_FINAL : public ReduceLayer
26 ReduceLayerImpl(const LayerParams& params)
28 setParamsFrom(params);
30 CV_Assert(params.has("reduce"));
31 String typeString = toLowerCase(params.get<String>("reduce"));
32 if (typeString == "max")
34 else if (typeString == "min")
36 else if (typeString == "ave")
38 else if (typeString == "sum")
40 else if (typeString == "sum_square")
41 reduceType= SUM_SQUARE;
42 else if (typeString == "l1")
44 else if (typeString == "l2")
46 else if (typeString == "log_sum")
48 else if (typeString == "log_sum_exp")
49 reduceType= LOG_SUM_EXP;
50 else if (typeString == "prod")
53 CV_Error(Error::StsBadArg, "Unknown reduce type\"" + typeString + "\"");
56 CV_Assert(params.has("deleted_dims"));
57 DictValue tempDims = params.get("deleted_dims");
58 int i, n = tempDims.size();
60 for (i = 0; i < n; i++)
62 reduceDims[i] = tempDims.get<int>(i);
65 CV_Assert(params.has("target_dims"));
66 tempDims = params.get("target_dims");
69 for (i = 0; i < n; i++)
71 targetDims[i] = tempDims.get<int>(i);
75 virtual bool supportBackend(int backendId) CV_OVERRIDE
77 if (backendId == DNN_BACKEND_OPENCV)
87 float apply(const float* first, const float* last, const float ikarea = 1.0f)
89 return std::accumulate(first, last, FLT_MAX,
92 return std::min(a, b);
100 float apply(const float* first, const float* last, const float ikarea = 1.0f)
102 return std::accumulate(first, last, -FLT_MAX,
105 return std::max(a, b);
113 float apply(const float* first, const float* last, const float ikarea = 1.0f)
115 return std::accumulate(first, last, 0.f);
122 float apply(const float* first, const float* last, const float ikarea = 1.0f)
124 float output = std::accumulate(first, last, 0.f);
125 return output * ikarea;
129 // reduceType == SUM_SQUARE
130 struct ReduceOpSUM_SQUARE
132 float apply(const float* first, const float* last, const float ikarea = 1.0f)
134 return std::accumulate(first, last, 0.f,
145 float apply(const float* first, const float* last, const float ikarea = 1.0f)
147 return std::accumulate(first, last, 0.f,
150 return a + std::abs(b);
158 float apply(const float* first, const float* last, const float ikarea = 1.0f)
160 float output = std::accumulate(first, last, 0.f,
165 return std::sqrt(output);
169 // reduceType == PROD
172 float apply(const float* first, const float* last, const float ikarea = 1.0f)
174 return std::accumulate(first, last, 1.0f, std::multiplies<float>());
178 // reduceType == LOG_SUM
179 struct ReduceOpLOG_SUM
181 float apply(const float* first, const float* last, const float ikarea = 1.0f)
183 float output = std::accumulate(first, last, 0.0f);
184 return std::log(output);
188 // reduceType == LOG_SUM_EXP
189 struct ReduceOpLOG_SUM_EXP
191 float apply(const float* first, const float* last, const float ikarea = 1.0f)
193 float output = std::accumulate(first, last, 0.0f,
196 return a + std::exp(b);
198 return std::log(output);
202 template<typename Func>
203 class ReduceInvoker : public ParallelLoopBody
208 std::vector<size_t> reduceDims;
213 ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr<Func>()) {}
215 static void run(const Mat& src, Mat& dst, std::vector<size_t> reduceDims, int reduceType, int nstripes)
217 CV_Assert_N( src.isContinuous(), dst.isContinuous(), src.type() == CV_32F, src.type() == dst.type());
219 ReduceInvoker<Func> p;
224 p.reduceDims = reduceDims;
225 p.nstripes = nstripes;
226 p.reduceType = reduceType;
228 parallel_for_(Range(0, nstripes), p, nstripes);
231 void operator()(const Range& r) const CV_OVERRIDE
233 size_t total = dst->total();
234 size_t stripeSize = (total + nstripes - 1)/nstripes;
235 size_t stripeStart = r.start*stripeSize;
236 size_t stripeEnd = std::min(r.end*stripeSize, total);
237 size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
239 float *dstData = (float *)dst->data;
240 float *srcData = (float *)src->data;
242 for (size_t ofs = stripeStart; ofs < stripeEnd;)
244 const float* first = srcData + ofs * stride_w;
245 const float* last = srcData + (ofs + 1) * stride_w;
249 dstData[ofs] = func->apply(first, last, 1.0 / stride_w);
256 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
259 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
261 if (inputs_arr.depth() == CV_16S)
263 forward_fallback(inputs_arr, outputs_arr, internals_arr);
267 std::vector<Mat> inputs, outputs;
268 inputs_arr.getMatVector(inputs);
269 outputs_arr.getMatVector(outputs);
270 CV_Assert(inputs.size() == 1 || (inputs.size() == 2 && reduceType== SUM));
271 const int nstripes = getNumThreads();
277 ReduceInvoker<ReduceOpMIN>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
282 ReduceInvoker<ReduceOpMAX>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
287 ReduceInvoker<ReduceOpAVE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
292 ReduceInvoker<ReduceOpSUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
297 ReduceInvoker<ReduceOpL1>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
302 ReduceInvoker<ReduceOpL2>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
307 ReduceInvoker<ReduceOpSUM_SQUARE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
312 ReduceInvoker<ReduceOpPROD>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
317 ReduceInvoker<ReduceOpLOG_SUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
322 ReduceInvoker<ReduceOpLOG_SUM_EXP>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
326 CV_Error(Error::StsNotImplemented, "Not implemented");
331 bool getMemoryShapes(const std::vector<MatShape> &inputs,
332 const int requiredOutputs,
333 std::vector<MatShape> &outputs,
334 std::vector<MatShape> &internals) const CV_OVERRIDE
336 CV_Assert(inputs.size() > 0);
337 CV_Assert( reduceDims.size() !=0 && targetDims.size() != 0 && inputs[0].size() >= reduceDims.size());
339 // outShapeTmp can save the right number of `total(outShapeTmp)`. And the outShape is used as the final output shape.
340 std::vector<int> outShapeTmp, outShape;
341 outShape.assign(targetDims.begin(), targetDims.end());
342 if (inputs[0].size() == reduceDims.size())
343 outShapeTmp.push_back(1);
346 for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++)
348 outShapeTmp.push_back(inputs[0][i]);
352 // Support dynamic shape of Batch size.
353 // Note that: when there are multiple dynamic inputs, we will give an error.
354 if (total(outShape) != total(outShapeTmp) && outShape[0] != outShapeTmp[0])
356 outShape[0] = outShapeTmp[0];
359 CV_Assert(total(outShape) == total(outShapeTmp));
360 outputs.assign(1, outShape);
365 virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
366 const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
368 if (reduceType== MAX || reduceType== MIN)
375 virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
376 const std::vector<MatShape> &outputs) const CV_OVERRIDE
378 CV_UNUSED(inputs); // suppress unused variable warning
380 size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
381 for (int i = 0; i < outputs.size(); i++)
383 flops += total(outputs[i])*(stride_w);
403 Ptr<ReduceLayer> ReduceLayer::create(const LayerParams& params)
405 return Ptr<ReduceLayer>(new ReduceLayerImpl(params));