inference-engine/src/extension/ext_region_yolo.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "ext_list.hpp"
   6 #include "ext_base.hpp"
   7 #include "defs.h"
   8 #include "softmax.h"
   9 #include <vector>
  10 #include "simple_copy.h"
  11
  12 namespace InferenceEngine {
  13 namespace Extensions {
  14 namespace Cpu {
  15
  16 class RegionYoloImpl: public ExtLayerBase {
  17 public:
  18     explicit RegionYoloImpl(const CNNLayer* layer) {
  19         try {
  20             if (layer->insData.size() != 1 || layer->outData.empty())
  21                 THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
  22
  23             classes = layer->GetParamAsInt("classes");
  24             coords = layer->GetParamAsInt("coords");
  25             num = layer->GetParamAsInt("num");
  26             do_softmax = layer->GetParamAsBool("do_softmax", true);
  27             mask = layer->GetParamAsInts("mask", {});
  28
  29             addConfig(layer, {DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
  30         } catch (InferenceEngine::details::InferenceEngineException &ex) {
  31             errorMsg = ex.what();
  32         }
  33     }
  34
  35     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
  36                        ResponseDesc *resp) noexcept override {
  37         const auto *src_data = inputs[0]->cbuffer().as<const float *>();
  38         auto *dst_data = outputs[0]->buffer().as<float *>();
  39
  40         int mask_size = mask.size();
  41
  42         int IW = (inputs[0]->getTensorDesc().getDims().size() > 3) ? inputs[0]->getTensorDesc().getDims()[3] : 1;
  43         int IH = (inputs[0]->getTensorDesc().getDims().size() > 2) ? inputs[0]->getTensorDesc().getDims()[2] : 1;
  44         int IC = (inputs[0]->getTensorDesc().getDims().size() > 1) ? inputs[0]->getTensorDesc().getDims()[1] : 1;
  45         int B = (inputs[0]->getTensorDesc().getDims().size() > 0) ? inputs[0]->getTensorDesc().getDims()[0] : 1;
  46
  47         simple_copy(dst_data, outputs[0]->byteSize(), src_data, (size_t)B * IC * IH * IW * sizeof(float));
  48
  49         int end_index = 0;
  50         int num_ = 0;
  51         if (do_softmax) {
  52             // Region layer (Yolo v2)
  53             end_index = IW * IH;
  54             num_ = num;
  55         } else {
  56             // Yolo layer (Yolo v3)
  57             end_index = IW * IH * (classes + 1);
  58             num_ = mask_size;
  59         }
  60         int inputs_size = IH * IW * num_ * (classes + coords + 1);
  61
  62         for (int b = 0; b < B; b++) {
  63             for (int n = 0; n < num_; n++) {
  64                 int index = entry_index(IW, IH, coords, classes, inputs_size, b, n * IW * IH, 0);
  65                 for (int i = index; i < index + 2 * IW * IH; i++) {
  66                     dst_data[i] = logistic_activate(dst_data[i]);
  67                 }
  68
  69                 index = entry_index(IW, IH, coords, classes, inputs_size, b, n * IW * IH, coords);
  70                 for (int i = index; i < index + end_index; i++) {
  71                     dst_data[i] = logistic_activate(dst_data[i]);
  72                 }
  73             }
  74         }
  75
  76         if (do_softmax) {
  77             int index = entry_index(IW, IH, coords, classes, inputs_size, 0, 0, coords + 1);
  78             int batch_offset = inputs_size / num;
  79             for (int b = 0; b < B * num; b++)
  80                 softmax_generic(src_data + index + b * batch_offset, dst_data + index + b * batch_offset, 1, classes,
  81                                 IH, IW);
  82         }
  83
  84         return OK;
  85     }
  86
  87 private:
  88     int classes;
  89     int coords;
  90     int num;
  91     float do_softmax;
  92     std::vector<int> mask;
  93
  94     inline int entry_index(int width, int height, int coords, int classes, int outputs, int batch, int location,
  95                            int entry) {
  96         int n = location / (width * height);
  97         int loc = location % (width * height);
  98         return batch * outputs + n * width * height * (coords + classes + 1) +
  99                entry * width * height + loc;
 100     }
 101
 102     inline float logistic_activate(float x) {
 103         return 1.f / (1.f + exp(-x));
 104     }
 105 };
 106
 107 REG_FACTORY_FOR(ImplFactory<RegionYoloImpl>, RegionYolo);
 108
 109 }  // namespace Cpu
 110 }  // namespace Extensions
 111 }  // namespace InferenceEngine