47aec237c7bf7c07c8071e0b24c2d78be80300a7
[platform/upstream/opencv.git] / modules / dnn / src / layers / reduce_layer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #include "../precomp.hpp"
6 #include "opencv2/core/hal/intrin.hpp"
7 #include "../op_cuda.hpp"
8 #include "../op_webnn.hpp"
9
10 #include <float.h>
11 #include <algorithm>
12 #include <numeric>
13 using std::max;
14 using std::min;
15
16 #include <opencv2/core/utils/logger.hpp>
17
18 namespace cv
19 {
20 namespace dnn
21 {
22
23 class ReduceLayerImpl CV_FINAL : public ReduceLayer
24 {
25 public:
26     ReduceLayerImpl(const LayerParams& params)
27     {
28         setParamsFrom(params);
29         // set reduce type
30         CV_Assert(params.has("reduce"));
31         String typeString = toLowerCase(params.get<String>("reduce"));
32         if (typeString == "max")
33             reduceType= MAX;
34         else if (typeString == "min")
35             reduceType= MIN;
36         else if (typeString == "ave")
37             reduceType= AVE;
38         else if (typeString == "sum")
39             reduceType= SUM;
40         else if (typeString == "sum_square")
41             reduceType= SUM_SQUARE;
42         else if (typeString == "l1")
43             reduceType= L1;
44         else if (typeString == "l2")
45             reduceType= L2;
46         else if (typeString == "log_sum")
47             reduceType= LOG_SUM;
48         else if (typeString == "log_sum_exp")
49             reduceType= LOG_SUM_EXP;
50         else if (typeString == "prod")
51             reduceType= PROD;
52         else
53             CV_Error(Error::StsBadArg, "Unknown reduce type\"" + typeString + "\"");
54
55         // set deleted dims
56         CV_Assert(params.has("deleted_dims"));
57         DictValue tempDims = params.get("deleted_dims");
58         int i, n = tempDims.size();
59         reduceDims.resize(n);
60         for (i = 0; i < n; i++)
61         {
62             reduceDims[i] = tempDims.get<int>(i);
63         }
64     }
65
66     virtual bool supportBackend(int backendId) CV_OVERRIDE
67     {
68         if (backendId == DNN_BACKEND_OPENCV)
69         {
70             return true;
71         }
72         return false;
73     }
74
75     // reduceType == MIN
76     struct ReduceOpMIN
77     {
78         float apply(const float* first, const float* last, const float ikarea = 1.0f)
79         {
80             return std::accumulate(first, last, FLT_MAX,
81                                    [](float a, float b)
82                                    {
83                                        return std::min(a, b);
84                                    });
85         }
86     };
87
88     // reduceType == MAX
89     struct ReduceOpMAX
90     {
91         float apply(const float* first, const float* last, const float ikarea = 1.0f)
92         {
93             return std::accumulate(first, last, -FLT_MAX,
94                                    [](float a, float b)
95                                    {
96                                        return std::max(a, b);
97                                    });
98         }
99     };
100
101     // reduceType == SUM
102     struct ReduceOpSUM
103     {
104         float apply(const float* first, const float* last, const float ikarea = 1.0f)
105         {
106             return std::accumulate(first, last, 0.f);
107         }
108     };
109
110     // reduceType == AVE
111     struct ReduceOpAVE
112     {
113         float apply(const float* first, const float* last, const float ikarea = 1.0f)
114         {
115             float output = std::accumulate(first, last, 0.f);
116             return output * ikarea;
117         }
118     };
119
120     // reduceType == SUM_SQUARE
121     struct ReduceOpSUM_SQUARE
122     {
123         float apply(const float* first, const float* last, const float ikarea = 1.0f)
124         {
125             return std::accumulate(first, last, 0.f,
126                                    [](float a, float b)
127                                    {
128                                        return a + b * b;
129                                    });
130         }
131     };
132
133     // reduceType == L1
134     struct ReduceOpL1
135     {
136         float apply(const float* first, const float* last, const float ikarea = 1.0f)
137         {
138             return std::accumulate(first, last, 0.f,
139                                    [](float a, float b)
140                                    {
141                                        return a + std::abs(b);
142                                    });
143         }
144     };
145
146     // reduceType == L2
147     struct ReduceOpL2
148     {
149         float apply(const float* first, const float* last, const float ikarea = 1.0f)
150         {
151             float output = std::accumulate(first, last, 0.f,
152                                            [](float a, float b)
153                                            {
154                                                return a + b * b;
155                                            });
156             return std::sqrt(output);
157         }
158     };
159
160     // reduceType == PROD
161     struct ReduceOpPROD
162     {
163         float apply(const float* first, const float* last, const float ikarea = 1.0f)
164         {
165             return std::accumulate(first, last, 1.0f, std::multiplies<float>());
166         }
167     };
168
169     // reduceType == LOG_SUM
170     struct ReduceOpLOG_SUM
171     {
172         float apply(const float* first, const float* last, const float ikarea = 1.0f)
173         {
174             float output = std::accumulate(first, last, 0.0f);
175             return std::log(output);
176         }
177     };
178
179     // reduceType == LOG_SUM_EXP
180     struct ReduceOpLOG_SUM_EXP
181     {
182         float apply(const float* first, const float* last, const float ikarea = 1.0f)
183         {
184             float output = std::accumulate(first, last, 0.0f,
185                                            [](float a, float b)
186                                            {
187                                                return a + std::exp(b);
188                                            });
189             return std::log(output);
190         }
191     };
192
193     template<typename Func>
194     class ReduceInvoker : public ParallelLoopBody
195     {
196     public:
197         const Mat* src;
198         Mat *dst;
199         std::vector<size_t> reduceDims;
200         int nstripes;
201         int reduceType;
202         Ptr<Func> func;
203
204         ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr<Func>()) {}
205
206         static void run(const Mat& src, Mat& dst, std::vector<size_t> reduceDims, int reduceType, int nstripes)
207         {
208             CV_Assert_N( src.isContinuous(), dst.isContinuous(), src.type() == CV_32F, src.type() == dst.type());
209
210             ReduceInvoker<Func> p;
211
212             p.src = &src;
213             p.dst = &dst;
214
215             p.reduceDims = reduceDims;
216             p.nstripes = nstripes;
217             p.reduceType = reduceType;
218
219             parallel_for_(Range(0, nstripes), p, nstripes);
220         }
221
222         void operator()(const Range& r) const CV_OVERRIDE
223         {
224             size_t total = dst->total();
225             size_t stripeSize = (total + nstripes - 1)/nstripes;
226             size_t stripeStart = r.start*stripeSize;
227             size_t stripeEnd = std::min(r.end*stripeSize, total);
228             size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
229
230             float *dstData = (float *)dst->data;
231             float *srcData = (float *)src->data;
232
233             for (size_t ofs = stripeStart; ofs < stripeEnd;)
234             {
235                 const float* first = srcData + ofs * stride_w;
236                 const float* last = srcData + (ofs + 1) * stride_w;
237
238                 if (ofs < stripeEnd)
239                 {
240                     dstData[ofs] = func->apply(first, last, 1.0 / stride_w);
241                     ofs += 1;
242                 }
243             }
244         }
245     };
246
247     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
248     {
249         CV_TRACE_FUNCTION();
250         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
251
252         if (inputs_arr.depth() == CV_16S)
253         {
254             forward_fallback(inputs_arr, outputs_arr, internals_arr);
255             return;
256         }
257
258         std::vector<Mat> inputs, outputs;
259         inputs_arr.getMatVector(inputs);
260         outputs_arr.getMatVector(outputs);
261         CV_Assert(inputs.size() == 1 || (inputs.size() == 2 && reduceType== SUM));
262         const int nstripes = getNumThreads();
263
264         switch (reduceType)
265         {
266             case MIN:
267             {
268                 ReduceInvoker<ReduceOpMIN>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
269                 break;
270             }
271             case MAX:
272             {
273                 ReduceInvoker<ReduceOpMAX>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
274                 break;
275             }
276             case AVE:
277             {
278                 ReduceInvoker<ReduceOpAVE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
279                 break;
280             }
281             case SUM:
282             {
283                 ReduceInvoker<ReduceOpSUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
284                 break;
285             }
286             case L1:
287             {
288                 ReduceInvoker<ReduceOpL1>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
289                 break;
290             }
291             case L2:
292             {
293                 ReduceInvoker<ReduceOpL2>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
294                 break;
295             }
296             case SUM_SQUARE:
297             {
298                 ReduceInvoker<ReduceOpSUM_SQUARE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
299                 break;
300             }
301             case PROD:
302             {
303                 ReduceInvoker<ReduceOpPROD>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
304                 break;
305             }
306             case LOG_SUM:
307             {
308                 ReduceInvoker<ReduceOpLOG_SUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
309                 break;
310             }
311             case LOG_SUM_EXP:
312             {
313                 ReduceInvoker<ReduceOpLOG_SUM_EXP>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
314                 break;
315             }
316             default:
317                 CV_Error(Error::StsNotImplemented, "Not implemented");
318                 break;
319         }
320     }
321
322     bool getMemoryShapes(const std::vector<MatShape> &inputs,
323                          const int requiredOutputs,
324                          std::vector<MatShape> &outputs,
325                          std::vector<MatShape> &internals) const CV_OVERRIDE
326     {
327         CV_Assert(inputs.size() > 0);
328         CV_Assert(reduceDims.size() != 0 && inputs[0].size() >= reduceDims.size());
329
330         std::vector<int> outShape;
331         if (inputs[0].size() == reduceDims.size())
332             outShape.push_back(1);
333         else
334         {
335             for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++)
336             {
337                 outShape.push_back(inputs[0][i]);
338             }
339         }
340         outputs.assign(1, outShape);
341
342         return false;
343     }
344
345     virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
346                              const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
347     {
348         if (reduceType== MAX || reduceType== MIN)
349         {
350             return true;
351         }
352         return false;
353     }
354
355     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
356                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
357     {
358         CV_UNUSED(inputs); // suppress unused variable warning
359         long flops = 0;
360         size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
361         for (int i = 0; i < outputs.size(); i++)
362         {
363             flops += total(outputs[i])*(stride_w);
364         }
365         return flops;
366     }
367 private:
368     enum ReduceType
369     {
370         MAX,
371         MIN,
372         AVE,
373         SUM,
374         L1,
375         L2,
376         PROD,
377         SUM_SQUARE,
378         LOG_SUM,
379         LOG_SUM_EXP
380     };
381 };
382
383 Ptr<ReduceLayer> ReduceLayer::create(const LayerParams& params)
384 {
385     return Ptr<ReduceLayer>(new ReduceLayerImpl(params));
386 }
387
388 }
389 }