inference-engine/src/extension/ext_fill.cpp

   1 // Copyright (C) 2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "ext_list.hpp"
   6 #include "ext_base.hpp"
   7
   8 #include <cmath>
   9 #include <string>
  10 #include <vector>
  11 #include <cassert>
  12 #include "ie_parallel.hpp"
  13
  14 namespace InferenceEngine {
  15 namespace Extensions {
  16 namespace Cpu {
  17
  18 class FillImpl: public ExtLayerBase {
  19 public:
  20     explicit FillImpl(const CNNLayer* layer) {
  21         try {
  22             if (layer->insData.empty() || layer->outData.empty())
  23                 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
  24
  25             if (layer->insData.size() != 2)
  26                 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";
  27
  28             SizeVector fill_dims = layer->insData[FILL_DIMS].lock()->getTensorDesc().getDims();
  29             if (fill_dims.size() > 1)
  30                 THROW_IE_EXCEPTION << layer->name << " Fill dimensions vector should be 1 dimension";
  31
  32             if (layer->insData[FILL_DIMS].lock()->getTensorDesc().getPrecision() != Precision::I32)
  33                 THROW_IE_EXCEPTION << layer->name << " Fill dimensions vector should be I32!";
  34
  35             SizeVector value_dims = layer->insData[FILL_VALUE].lock()->getTensorDesc().getDims();
  36             if (value_dims.size() > 1)
  37                 THROW_IE_EXCEPTION << layer->name << " Value scalar should have 1 dimension";
  38
  39             if (!(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
  40                   layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
  41                 !(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
  42                   layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
  43                 THROW_IE_EXCEPTION << layer->name <<
  44                     " 'Value' input scalars and output tensor should have same precision and only FP32 and I32 are supported!";
  45             }
  46
  47             addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
  48                              { DataConfigurator(ConfLayout::PLN) });
  49         } catch (InferenceEngine::details::InferenceEngineException &ex) {
  50             errorMsg = ex.what();
  51         }
  52     }
  53
  54     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
  55         int32_t* fill_dims = inputs[FILL_DIMS]->cbuffer().as<int32_t *>() +
  56                              inputs[FILL_DIMS]->getTensorDesc().getBlockingDesc().getOffsetPadding();
  57         size_t fill_size = inputs[FILL_DIMS]->getTensorDesc().getDims()[0];
  58         SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
  59
  60         if (dst_dims.size() != fill_size) {
  61             if (resp) {
  62                 std::string errorMsg = "Output tensor dimension mismatch";
  63                 errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
  64             }
  65             return PARAMETER_MISMATCH;
  66         }
  67
  68         size_t work_amount_dst = 1;
  69         for (size_t i = 0; i < dst_dims.size(); i++) {
  70             work_amount_dst *= fill_dims[i];
  71             if (static_cast<int>(dst_dims[i]) != fill_dims[i]) {
  72                 if (resp) {
  73                     std::string errorMsg = "Output tensor dimension size mismatch";
  74                     errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
  75                 }
  76                 return PARAMETER_MISMATCH;
  77             }
  78         }
  79
  80         switch (outputs[0]->precision()) {
  81         case Precision::FP32: {
  82             float* dst_data = outputs[0]->cbuffer().as<float *>() +
  83                               outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
  84             float value = (inputs[FILL_VALUE]->cbuffer().as<float *>() +
  85                            inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
  86
  87             parallel_nt(0, [&](const int ithr, const int nthr) {
  88                 size_t start = 0, end = 0;
  89                 splitter(work_amount_dst, nthr, ithr, start, end);
  90                 std::fill_n(dst_data + start, end - start, value);
  91             });
  92         }
  93         break;
  94         case Precision::I32: {
  95             int32_t* dst_data = outputs[0]->cbuffer().as<int32_t *>() +
  96                                 outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
  97             int32_t value = (inputs[FILL_VALUE]->cbuffer().as<int32_t *>() +
  98                              inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
  99
 100             parallel_nt(0, [&](const int ithr, const int nthr) {
 101                 size_t start = 0, end = 0;
 102                 splitter(work_amount_dst, nthr, ithr, start, end);
 103                 std::fill_n(dst_data + start, end - start, value);
 104             });
 105             return OK;
 106         }
 107         break;
 108         default:
 109             if (resp) {
 110                 std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!";
 111                 errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
 112             }
 113             return GENERAL_ERROR;
 114         }
 115
 116         return OK;
 117     }
 118
 119 private:
 120     const size_t FILL_DIMS = 0;
 121     const size_t FILL_VALUE = 1;
 122 };
 123
 124 REG_FACTORY_FOR(ImplFactory<FillImpl>, Fill);
 125
 126 }  // namespace Cpu
 127 }  // namespace Extensions
 128 }  // namespace InferenceEngine