support ReduceLayer without reshape layer.
[platform/upstream/opencv.git] / modules / dnn / src / layers / reduce_layer.cpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #include "../precomp.hpp"
6 #include "opencv2/core/hal/intrin.hpp"
7 #include "../op_cuda.hpp"
8 #include "../op_webnn.hpp"
9
10 #include <float.h>
11 #include <algorithm>
12 #include <numeric>
13 using std::max;
14 using std::min;
15
16 #include <opencv2/core/utils/logger.hpp>
17
18 namespace cv
19 {
20 namespace dnn
21 {
22
23 class ReduceLayerImpl CV_FINAL : public ReduceLayer
24 {
25 public:
26     ReduceLayerImpl(const LayerParams& params)
27     {
28         setParamsFrom(params);
29         // set reduce type
30         CV_Assert(params.has("reduce"));
31         String typeString = toLowerCase(params.get<String>("reduce"));
32         if (typeString == "max")
33             reduceType= MAX;
34         else if (typeString == "min")
35             reduceType= MIN;
36         else if (typeString == "ave")
37             reduceType= AVE;
38         else if (typeString == "sum")
39             reduceType= SUM;
40         else if (typeString == "sum_square")
41             reduceType= SUM_SQUARE;
42         else if (typeString == "l1")
43             reduceType= L1;
44         else if (typeString == "l2")
45             reduceType= L2;
46         else if (typeString == "log_sum")
47             reduceType= LOG_SUM;
48         else if (typeString == "log_sum_exp")
49             reduceType= LOG_SUM_EXP;
50         else if (typeString == "prod")
51             reduceType= PROD;
52         else
53             CV_Error(Error::StsBadArg, "Unknown reduce type\"" + typeString + "\"");
54
55         // set deleted dims
56         CV_Assert(params.has("deleted_dims"));
57         DictValue tempDims = params.get("deleted_dims");
58         int i, n = tempDims.size();
59         reduceDims.resize(n);
60         for (i = 0; i < n; i++)
61         {
62             reduceDims[i] = tempDims.get<int>(i);
63         }
64
65         CV_Assert(params.has("target_dims"));
66         tempDims = params.get("target_dims");
67         n = tempDims.size();
68         targetDims.resize(n);
69         for (i = 0; i < n; i++)
70         {
71             targetDims[i] = tempDims.get<int>(i);
72         }
73     }
74
75     virtual bool supportBackend(int backendId) CV_OVERRIDE
76     {
77         if (backendId == DNN_BACKEND_OPENCV)
78         {
79             return true;
80         }
81         return false;
82     }
83
84     // reduceType == MIN
85     struct ReduceOpMIN
86     {
87         float apply(const float* first, const float* last, const float ikarea = 1.0f)
88         {
89             return std::accumulate(first, last, FLT_MAX,
90                                    [](float a, float b)
91                                    {
92                                        return std::min(a, b);
93                                    });
94         }
95     };
96
97     // reduceType == MAX
98     struct ReduceOpMAX
99     {
100         float apply(const float* first, const float* last, const float ikarea = 1.0f)
101         {
102             return std::accumulate(first, last, -FLT_MAX,
103                                    [](float a, float b)
104                                    {
105                                        return std::max(a, b);
106                                    });
107         }
108     };
109
110     // reduceType == SUM
111     struct ReduceOpSUM
112     {
113         float apply(const float* first, const float* last, const float ikarea = 1.0f)
114         {
115             return std::accumulate(first, last, 0.f);
116         }
117     };
118
119     // reduceType == AVE
120     struct ReduceOpAVE
121     {
122         float apply(const float* first, const float* last, const float ikarea = 1.0f)
123         {
124             float output = std::accumulate(first, last, 0.f);
125             return output * ikarea;
126         }
127     };
128
129     // reduceType == SUM_SQUARE
130     struct ReduceOpSUM_SQUARE
131     {
132         float apply(const float* first, const float* last, const float ikarea = 1.0f)
133         {
134             return std::accumulate(first, last, 0.f,
135                                    [](float a, float b)
136                                    {
137                                        return a + b * b;
138                                    });
139         }
140     };
141
142     // reduceType == L1
143     struct ReduceOpL1
144     {
145         float apply(const float* first, const float* last, const float ikarea = 1.0f)
146         {
147             return std::accumulate(first, last, 0.f,
148                                    [](float a, float b)
149                                    {
150                                        return a + std::abs(b);
151                                    });
152         }
153     };
154
155     // reduceType == L2
156     struct ReduceOpL2
157     {
158         float apply(const float* first, const float* last, const float ikarea = 1.0f)
159         {
160             float output = std::accumulate(first, last, 0.f,
161                                            [](float a, float b)
162                                            {
163                                                return a + b * b;
164                                            });
165             return std::sqrt(output);
166         }
167     };
168
169     // reduceType == PROD
170     struct ReduceOpPROD
171     {
172         float apply(const float* first, const float* last, const float ikarea = 1.0f)
173         {
174             return std::accumulate(first, last, 1.0f, std::multiplies<float>());
175         }
176     };
177
178     // reduceType == LOG_SUM
179     struct ReduceOpLOG_SUM
180     {
181         float apply(const float* first, const float* last, const float ikarea = 1.0f)
182         {
183             float output = std::accumulate(first, last, 0.0f);
184             return std::log(output);
185         }
186     };
187
188     // reduceType == LOG_SUM_EXP
189     struct ReduceOpLOG_SUM_EXP
190     {
191         float apply(const float* first, const float* last, const float ikarea = 1.0f)
192         {
193             float output = std::accumulate(first, last, 0.0f,
194                                            [](float a, float b)
195                                            {
196                                                return a + std::exp(b);
197                                            });
198             return std::log(output);
199         }
200     };
201
202     template<typename Func>
203     class ReduceInvoker : public ParallelLoopBody
204     {
205     public:
206         const Mat* src;
207         Mat *dst;
208         std::vector<size_t> reduceDims;
209         int nstripes;
210         int reduceType;
211         Ptr<Func> func;
212
213         ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr<Func>()) {}
214
215         static void run(const Mat& src, Mat& dst, std::vector<size_t> reduceDims, int reduceType, int nstripes)
216         {
217             CV_Assert_N( src.isContinuous(), dst.isContinuous(), src.type() == CV_32F, src.type() == dst.type());
218
219             ReduceInvoker<Func> p;
220
221             p.src = &src;
222             p.dst = &dst;
223
224             p.reduceDims = reduceDims;
225             p.nstripes = nstripes;
226             p.reduceType = reduceType;
227
228             parallel_for_(Range(0, nstripes), p, nstripes);
229         }
230
231         void operator()(const Range& r) const CV_OVERRIDE
232         {
233             size_t total = dst->total();
234             size_t stripeSize = (total + nstripes - 1)/nstripes;
235             size_t stripeStart = r.start*stripeSize;
236             size_t stripeEnd = std::min(r.end*stripeSize, total);
237             size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
238
239             float *dstData = (float *)dst->data;
240             float *srcData = (float *)src->data;
241
242             for (size_t ofs = stripeStart; ofs < stripeEnd;)
243             {
244                 const float* first = srcData + ofs * stride_w;
245                 const float* last = srcData + (ofs + 1) * stride_w;
246
247                 if (ofs < stripeEnd)
248                 {
249                     dstData[ofs] = func->apply(first, last, 1.0 / stride_w);
250                     ofs += 1;
251                 }
252             }
253         }
254     };
255
256     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
257     {
258         CV_TRACE_FUNCTION();
259         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
260
261         if (inputs_arr.depth() == CV_16S)
262         {
263             forward_fallback(inputs_arr, outputs_arr, internals_arr);
264             return;
265         }
266
267         std::vector<Mat> inputs, outputs;
268         inputs_arr.getMatVector(inputs);
269         outputs_arr.getMatVector(outputs);
270         CV_Assert(inputs.size() == 1 || (inputs.size() == 2 && reduceType== SUM));
271         const int nstripes = getNumThreads();
272
273         switch (reduceType)
274         {
275             case MIN:
276             {
277                 ReduceInvoker<ReduceOpMIN>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
278                 break;
279             }
280             case MAX:
281             {
282                 ReduceInvoker<ReduceOpMAX>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
283                 break;
284             }
285             case AVE:
286             {
287                 ReduceInvoker<ReduceOpAVE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
288                 break;
289             }
290             case SUM:
291             {
292                 ReduceInvoker<ReduceOpSUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
293                 break;
294             }
295             case L1:
296             {
297                 ReduceInvoker<ReduceOpL1>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
298                 break;
299             }
300             case L2:
301             {
302                 ReduceInvoker<ReduceOpL2>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
303                 break;
304             }
305             case SUM_SQUARE:
306             {
307                 ReduceInvoker<ReduceOpSUM_SQUARE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
308                 break;
309             }
310             case PROD:
311             {
312                 ReduceInvoker<ReduceOpPROD>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
313                 break;
314             }
315             case LOG_SUM:
316             {
317                 ReduceInvoker<ReduceOpLOG_SUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
318                 break;
319             }
320             case LOG_SUM_EXP:
321             {
322                 ReduceInvoker<ReduceOpLOG_SUM_EXP>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
323                 break;
324             }
325             default:
326                 CV_Error(Error::StsNotImplemented, "Not implemented");
327                 break;
328         }
329     }
330
331     bool getMemoryShapes(const std::vector<MatShape> &inputs,
332                          const int requiredOutputs,
333                          std::vector<MatShape> &outputs,
334                          std::vector<MatShape> &internals) const CV_OVERRIDE
335     {
336         CV_Assert(inputs.size() > 0);
337         CV_Assert( reduceDims.size() !=0 && targetDims.size() != 0 && inputs[0].size() >= reduceDims.size());
338
339         // outShapeTmp can save the right number of `total(outShapeTmp)`. And the outShape is used as the final output shape.
340         std::vector<int> outShapeTmp, outShape;
341         outShape.assign(targetDims.begin(), targetDims.end());
342         if (inputs[0].size() == reduceDims.size())
343             outShapeTmp.push_back(1);
344         else
345         {
346             for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++)
347             {
348                 outShapeTmp.push_back(inputs[0][i]);
349             }
350         }
351
352         // Support dynamic shape of Batch size.
353         // Note that: when there are multiple dynamic inputs, we will give an error.
354         if (total(outShape) != total(outShapeTmp) && outShape[0] != outShapeTmp[0])
355         {
356                 outShape[0] = outShapeTmp[0];
357         }
358
359         CV_Assert(total(outShape) == total(outShapeTmp));
360         outputs.assign(1, outShape);
361
362         return false;
363     }
364
365     virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
366                              const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
367     {
368         if (reduceType== MAX || reduceType== MIN)
369         {
370             return true;
371         }
372         return false;
373     }
374
375     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
376                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
377     {
378         CV_UNUSED(inputs); // suppress unused variable warning
379         long flops = 0;
380         size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
381         for (int i = 0; i < outputs.size(); i++)
382         {
383             flops += total(outputs[i])*(stride_w);
384         }
385         return flops;
386     }
387 private:
388     enum ReduceType
389     {
390         MAX,
391         MIN,
392         AVE,
393         SUM,
394         L1,
395         L2,
396         PROD,
397         SUM_SQUARE,
398         LOG_SUM,
399         LOG_SUM_EXP
400     };
401 };
402
403 Ptr<ReduceLayer> ReduceLayer::create(const LayerParams& params)
404 {
405     return Ptr<ReduceLayer>(new ReduceLayerImpl(params));
406 }
407
408 }
409 }