1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "ext_list.hpp"
6 #include "ext_base.hpp"
12 #include "ie_parallel.hpp"
14 namespace InferenceEngine {
15 namespace Extensions {
18 class FillImpl: public ExtLayerBase {
20 explicit FillImpl(const CNNLayer* layer) {
22 if (layer->insData.empty() || layer->outData.empty())
23 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
25 if (layer->insData.size() != 2)
26 THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";
28 SizeVector fill_dims = layer->insData[FILL_DIMS].lock()->getTensorDesc().getDims();
29 if (fill_dims.size() > 1)
30 THROW_IE_EXCEPTION << layer->name << " Fill dimensions vector should be 1 dimension";
32 if (layer->insData[FILL_DIMS].lock()->getTensorDesc().getPrecision() != Precision::I32)
33 THROW_IE_EXCEPTION << layer->name << " Fill dimensions vector should be I32!";
35 SizeVector value_dims = layer->insData[FILL_VALUE].lock()->getTensorDesc().getDims();
36 if (value_dims.size() > 1)
37 THROW_IE_EXCEPTION << layer->name << " Value scalar should have 1 dimension";
39 if (!(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
40 layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
41 !(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
42 layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
43 THROW_IE_EXCEPTION << layer->name <<
44 " 'Value' input scalars and output tensor should have same precision and only FP32 and I32 are supported!";
47 addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
48 { DataConfigurator(ConfLayout::PLN) });
49 } catch (InferenceEngine::details::InferenceEngineException &ex) {
54 StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
55 int32_t* fill_dims = inputs[FILL_DIMS]->cbuffer().as<int32_t *>() +
56 inputs[FILL_DIMS]->getTensorDesc().getBlockingDesc().getOffsetPadding();
57 size_t fill_size = inputs[FILL_DIMS]->getTensorDesc().getDims()[0];
58 SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
60 if (dst_dims.size() != fill_size) {
62 std::string errorMsg = "Output tensor dimension mismatch";
63 errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
65 return PARAMETER_MISMATCH;
68 size_t work_amount_dst = 1;
69 for (size_t i = 0; i < dst_dims.size(); i++) {
70 work_amount_dst *= fill_dims[i];
71 if (static_cast<int>(dst_dims[i]) != fill_dims[i]) {
73 std::string errorMsg = "Output tensor dimension size mismatch";
74 errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
76 return PARAMETER_MISMATCH;
80 switch (outputs[0]->precision()) {
81 case Precision::FP32: {
82 float* dst_data = outputs[0]->cbuffer().as<float *>() +
83 outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
84 float value = (inputs[FILL_VALUE]->cbuffer().as<float *>() +
85 inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
87 parallel_nt(0, [&](const int ithr, const int nthr) {
88 size_t start = 0, end = 0;
89 splitter(work_amount_dst, nthr, ithr, start, end);
90 std::fill_n(dst_data + start, end - start, value);
94 case Precision::I32: {
95 int32_t* dst_data = outputs[0]->cbuffer().as<int32_t *>() +
96 outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
97 int32_t value = (inputs[FILL_VALUE]->cbuffer().as<int32_t *>() +
98 inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
100 parallel_nt(0, [&](const int ithr, const int nthr) {
101 size_t start = 0, end = 0;
102 splitter(work_amount_dst, nthr, ithr, start, end);
103 std::fill_n(dst_data + start, end - start, value);
110 std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!";
111 errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
113 return GENERAL_ERROR;
120 const size_t FILL_DIMS = 0;
121 const size_t FILL_VALUE = 1;
124 REG_FACTORY_FOR(ImplFactory<FillImpl>, Fill);
127 } // namespace Extensions
128 } // namespace InferenceEngine