modules/dnn/src/layers/reduce_layer.cpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #include "../precomp.hpp"
   6 #include "opencv2/core/hal/intrin.hpp"
   7 #include "../op_cuda.hpp"
   8 #include "../op_webnn.hpp"
   9
  10 #include <float.h>
  11 #include <algorithm>
  12 #include <numeric>
  13 using std::max;
  14 using std::min;
  15
  16 #include <opencv2/core/utils/logger.hpp>
  17
  18 namespace cv
  19 {
  20 namespace dnn
  21 {
  22
  23 class ReduceLayerImpl CV_FINAL : public ReduceLayer
  24 {
  25 public:
  26     ReduceLayerImpl(const LayerParams& params)
  27     {
  28         setParamsFrom(params);
  29         // set reduce type
  30         CV_Assert(params.has("reduce"));
  31         String typeString = toLowerCase(params.get<String>("reduce"));
  32         if (typeString == "max")
  33             reduceType= MAX;
  34         else if (typeString == "min")
  35             reduceType= MIN;
  36         else if (typeString == "ave")
  37             reduceType= AVE;
  38         else if (typeString == "sum")
  39             reduceType= SUM;
  40         else if (typeString == "sum_square")
  41             reduceType= SUM_SQUARE;
  42         else if (typeString == "l1")
  43             reduceType= L1;
  44         else if (typeString == "l2")
  45             reduceType= L2;
  46         else if (typeString == "log_sum")
  47             reduceType= LOG_SUM;
  48         else if (typeString == "log_sum_exp")
  49             reduceType= LOG_SUM_EXP;
  50         else if (typeString == "prod")
  51             reduceType= PROD;
  52         else
  53             CV_Error(Error::StsBadArg, "Unknown reduce type\"" + typeString + "\"");
  54
  55         // set deleted dims
  56         CV_Assert(params.has("deleted_dims"));
  57         DictValue tempDims = params.get("deleted_dims");
  58         int i, n = tempDims.size();
  59         reduceDims.resize(n);
  60         for (i = 0; i < n; i++)
  61         {
  62             reduceDims[i] = tempDims.get<int>(i);
  63         }
  64
  65         CV_Assert(params.has("target_dims"));
  66         tempDims = params.get("target_dims");
  67         n = tempDims.size();
  68         targetDims.resize(n);
  69         for (i = 0; i < n; i++)
  70         {
  71             targetDims[i] = tempDims.get<int>(i);
  72         }
  73     }
  74
  75     virtual bool supportBackend(int backendId) CV_OVERRIDE
  76     {
  77         if (backendId == DNN_BACKEND_OPENCV)
  78         {
  79             return true;
  80         }
  81         return false;
  82     }
  83
  84     // reduceType == MIN
  85     struct ReduceOpMIN
  86     {
  87         float apply(const float* first, const float* last, const float ikarea = 1.0f)
  88         {
  89             return std::accumulate(first, last, FLT_MAX,
  90                                    [](float a, float b)
  91                                    {
  92                                        return std::min(a, b);
  93                                    });
  94         }
  95     };
  96
  97     // reduceType == MAX
  98     struct ReduceOpMAX
  99     {
 100         float apply(const float* first, const float* last, const float ikarea = 1.0f)
 101         {
 102             return std::accumulate(first, last, -FLT_MAX,
 103                                    [](float a, float b)
 104                                    {
 105                                        return std::max(a, b);
 106                                    });
 107         }
 108     };
 109
 110     // reduceType == SUM
 111     struct ReduceOpSUM
 112     {
 113         float apply(const float* first, const float* last, const float ikarea = 1.0f)
 114         {
 115             return std::accumulate(first, last, 0.f);
 116         }
 117     };
 118
 119     // reduceType == AVE
 120     struct ReduceOpAVE
 121     {
 122         float apply(const float* first, const float* last, const float ikarea = 1.0f)
 123         {
 124             float output = std::accumulate(first, last, 0.f);
 125             return output * ikarea;
 126         }
 127     };
 128
 129     // reduceType == SUM_SQUARE
 130     struct ReduceOpSUM_SQUARE
 131     {
 132         float apply(const float* first, const float* last, const float ikarea = 1.0f)
 133         {
 134             return std::accumulate(first, last, 0.f,
 135                                    [](float a, float b)
 136                                    {
 137                                        return a + b * b;
 138                                    });
 139         }
 140     };
 141
 142     // reduceType == L1
 143     struct ReduceOpL1
 144     {
 145         float apply(const float* first, const float* last, const float ikarea = 1.0f)
 146         {
 147             return std::accumulate(first, last, 0.f,
 148                                    [](float a, float b)
 149                                    {
 150                                        return a + std::abs(b);
 151                                    });
 152         }
 153     };
 154
 155     // reduceType == L2
 156     struct ReduceOpL2
 157     {
 158         float apply(const float* first, const float* last, const float ikarea = 1.0f)
 159         {
 160             float output = std::accumulate(first, last, 0.f,
 161                                            [](float a, float b)
 162                                            {
 163                                                return a + b * b;
 164                                            });
 165             return std::sqrt(output);
 166         }
 167     };
 168
 169     // reduceType == PROD
 170     struct ReduceOpPROD
 171     {
 172         float apply(const float* first, const float* last, const float ikarea = 1.0f)
 173         {
 174             return std::accumulate(first, last, 1.0f, std::multiplies<float>());
 175         }
 176     };
 177
 178     // reduceType == LOG_SUM
 179     struct ReduceOpLOG_SUM
 180     {
 181         float apply(const float* first, const float* last, const float ikarea = 1.0f)
 182         {
 183             float output = std::accumulate(first, last, 0.0f);
 184             return std::log(output);
 185         }
 186     };
 187
 188     // reduceType == LOG_SUM_EXP
 189     struct ReduceOpLOG_SUM_EXP
 190     {
 191         float apply(const float* first, const float* last, const float ikarea = 1.0f)
 192         {
 193             float output = std::accumulate(first, last, 0.0f,
 194                                            [](float a, float b)
 195                                            {
 196                                                return a + std::exp(b);
 197                                            });
 198             return std::log(output);
 199         }
 200     };
 201
 202     template<typename Func>
 203     class ReduceInvoker : public ParallelLoopBody
 204     {
 205     public:
 206         const Mat* src;
 207         Mat *dst;
 208         std::vector<size_t> reduceDims;
 209         int nstripes;
 210         int reduceType;
 211         Ptr<Func> func;
 212
 213         ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr<Func>()) {}
 214
 215         static void run(const Mat& src, Mat& dst, std::vector<size_t> reduceDims, int reduceType, int nstripes)
 216         {
 217             CV_Assert_N( src.isContinuous(), dst.isContinuous(), src.type() == CV_32F, src.type() == dst.type());
 218
 219             ReduceInvoker<Func> p;
 220
 221             p.src = &src;
 222             p.dst = &dst;
 223
 224             p.reduceDims = reduceDims;
 225             p.nstripes = nstripes;
 226             p.reduceType = reduceType;
 227
 228             parallel_for_(Range(0, nstripes), p, nstripes);
 229         }
 230
 231         void operator()(const Range& r) const CV_OVERRIDE
 232         {
 233             size_t total = dst->total();
 234             size_t stripeSize = (total + nstripes - 1)/nstripes;
 235             size_t stripeStart = r.start*stripeSize;
 236             size_t stripeEnd = std::min(r.end*stripeSize, total);
 237             size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
 238
 239             float *dstData = (float *)dst->data;
 240             float *srcData = (float *)src->data;
 241
 242             for (size_t ofs = stripeStart; ofs < stripeEnd;)
 243             {
 244                 const float* first = srcData + ofs * stride_w;
 245                 const float* last = srcData + (ofs + 1) * stride_w;
 246
 247                 if (ofs < stripeEnd)
 248                 {
 249                     dstData[ofs] = func->apply(first, last, 1.0 / stride_w);
 250                     ofs += 1;
 251                 }
 252             }
 253         }
 254     };
 255
 256     void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
 257     {
 258         CV_TRACE_FUNCTION();
 259         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 260
 261         if (inputs_arr.depth() == CV_16S)
 262         {
 263             forward_fallback(inputs_arr, outputs_arr, internals_arr);
 264             return;
 265         }
 266
 267         std::vector<Mat> inputs, outputs;
 268         inputs_arr.getMatVector(inputs);
 269         outputs_arr.getMatVector(outputs);
 270         CV_Assert(inputs.size() == 1 || (inputs.size() == 2 && reduceType== SUM));
 271         const int nstripes = getNumThreads();
 272
 273         switch (reduceType)
 274         {
 275             case MIN:
 276             {
 277                 ReduceInvoker<ReduceOpMIN>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 278                 break;
 279             }
 280             case MAX:
 281             {
 282                 ReduceInvoker<ReduceOpMAX>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 283                 break;
 284             }
 285             case AVE:
 286             {
 287                 ReduceInvoker<ReduceOpAVE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 288                 break;
 289             }
 290             case SUM:
 291             {
 292                 ReduceInvoker<ReduceOpSUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 293                 break;
 294             }
 295             case L1:
 296             {
 297                 ReduceInvoker<ReduceOpL1>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 298                 break;
 299             }
 300             case L2:
 301             {
 302                 ReduceInvoker<ReduceOpL2>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 303                 break;
 304             }
 305             case SUM_SQUARE:
 306             {
 307                 ReduceInvoker<ReduceOpSUM_SQUARE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 308                 break;
 309             }
 310             case PROD:
 311             {
 312                 ReduceInvoker<ReduceOpPROD>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 313                 break;
 314             }
 315             case LOG_SUM:
 316             {
 317                 ReduceInvoker<ReduceOpLOG_SUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 318                 break;
 319             }
 320             case LOG_SUM_EXP:
 321             {
 322                 ReduceInvoker<ReduceOpLOG_SUM_EXP>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
 323                 break;
 324             }
 325             default:
 326                 CV_Error(Error::StsNotImplemented, "Not implemented");
 327                 break;
 328         }
 329     }
 330
 331     bool getMemoryShapes(const std::vector<MatShape> &inputs,
 332                          const int requiredOutputs,
 333                          std::vector<MatShape> &outputs,
 334                          std::vector<MatShape> &internals) const CV_OVERRIDE
 335     {
 336         CV_Assert(inputs.size() > 0);
 337         CV_Assert( reduceDims.size() !=0 && targetDims.size() != 0 && inputs[0].size() >= reduceDims.size());
 338
 339         // outShapeTmp can save the right number of `total(outShapeTmp)`. And the outShape is used as the final output shape.
 340         std::vector<int> outShapeTmp, outShape;
 341         outShape.assign(targetDims.begin(), targetDims.end());
 342         if (inputs[0].size() == reduceDims.size())
 343             outShapeTmp.push_back(1);
 344         else
 345         {
 346             for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++)
 347             {
 348                 outShapeTmp.push_back(inputs[0][i]);
 349             }
 350         }
 351
 352         // Support dynamic shape of Batch size.
 353         // Note that: when there are multiple dynamic inputs, we will give an error.
 354         if (total(outShape) != total(outShapeTmp) && outShape[0] != outShapeTmp[0])
 355         {
 356                 outShape[0] = outShapeTmp[0];
 357         }
 358
 359         CV_Assert(total(outShape) == total(outShapeTmp));
 360         outputs.assign(1, outShape);
 361
 362         return false;
 363     }
 364
 365     virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
 366                              const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
 367     {
 368         if (reduceType== MAX || reduceType== MIN)
 369         {
 370             return true;
 371         }
 372         return false;
 373     }
 374
 375     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
 376                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
 377     {
 378         CV_UNUSED(inputs); // suppress unused variable warning
 379         long flops = 0;
 380         size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
 381         for (int i = 0; i < outputs.size(); i++)
 382         {
 383             flops += total(outputs[i])*(stride_w);
 384         }
 385         return flops;
 386     }
 387 private:
 388     enum ReduceType
 389     {
 390         MAX,
 391         MIN,
 392         AVE,
 393         SUM,
 394         L1,
 395         L2,
 396         PROD,
 397         SUM_SQUARE,
 398         LOG_SUM,
 399         LOG_SUM_EXP
 400     };
 401 };
 402
 403 Ptr<ReduceLayer> ReduceLayer::create(const LayerParams& params)
 404 {
 405     return Ptr<ReduceLayer>(new ReduceLayerImpl(params));
 406 }
 407
 408 }
 409 }