Publishing 2019 R3 content
[platform/upstream/dldt.git] / inference-engine / src / vpu / myriad_plugin / myriad_executable_network.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include <algorithm>
6 #include <utility>
7
8 #include <ie_metric_helpers.hpp>
9 #include "cnn_network_impl.hpp"
10 #include "exec_graph_info.hpp"
11 #include <myriad_executable_network.h>
12 #include <vpu/blob_reader.hpp>
13 #include <vpu/utils/profiling.hpp>
14 #include <net_pass.h>
15
16 using namespace InferenceEngine;
17
18 namespace vpu {
19 namespace MyriadPlugin {
20
21 static void selectNumberOfExecutors(const ncDevicePlatform_t& platform,
22                                     std::uint32_t numShaves, std::uint32_t numSlices, int& numExecutors) {
23     const std::uint32_t maxShaves = platform == NC_MYRIAD_2 ? 12 : 16;
24     const std::uint32_t maxSlices = platform == NC_MYRIAD_2 ? 15 : 19;
25
26     if (numExecutors == MyriadConfig::UNDEFINED_THROUGHPUT_STREAMS) {
27         const std::uint32_t defaultPlatformExecutors = platform == NC_MYRIAD_2 ? 1 : 2;
28         auto getMaximumAvailableExecutors = [&]() { return std::min(maxShaves / numShaves, maxSlices / numSlices); };
29
30         numExecutors = std::min(defaultPlatformExecutors, getMaximumAvailableExecutors());
31     }
32
33     if (numExecutors < 1) {
34         THROW_IE_EXCEPTION << "Number of executors must be not less than 1, " << numExecutors << " provided";
35     }
36
37     auto isEnoughResources = [&]() {
38         return numShaves * numExecutors <= maxShaves && numSlices * numExecutors <= maxSlices;
39     };
40
41     if (!isEnoughResources()) {
42         THROW_IE_EXCEPTION << "There are no enough resources for using " << platform << " on "
43                            << (platform == NC_MYRIAD_2 ? "MYRIAD_2" : "MYRIAD_X");
44     }
45 }
46
47 ExecutableNetwork::ExecutableNetwork(std::vector<DevicePtr> &devicePool,
48     const std::map<std::string, std::string> &config, ConfigMode mode) {
49     VPU_PROFILE(ExecutableNetwork);
50     _config = std::make_shared<MyriadConfig>(config, mode);
51
52     _log = std::make_shared<Logger>("MyriadPlugin", _config->hostLogLevel, consoleOutput());
53     _executor = std::make_shared<MyriadExecutor>(_config->forceReset, _config->deviceLogLevel, _log);
54     _device = _executor->openDevice(devicePool, _config);
55     _supportedMetrics = {
56         METRIC_KEY(NETWORK_NAME),
57         METRIC_KEY(SUPPORTED_METRICS),
58         METRIC_KEY(SUPPORTED_CONFIG_KEYS),
59         METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
60         METRIC_KEY(DEVICE_THERMAL)
61     };
62
63     // ignore hardware optimization config for MYRIAD2, it is always disabled
64     if (_device->_platform == NC_MYRIAD_2) {
65         _config->compileConfig.hwOptimization = false;
66     }
67 }
68
69 ExecutableNetwork::ExecutableNetwork(ICNNNetwork &network, std::vector<DevicePtr> &devicePool,
70                                      const std::map<std::string, std::string> &config) :
71                                      ExecutableNetwork(devicePool, config) {
72     VPU_PROFILE(ExecutableNetwork);
73     bool ti_proc_ok = !NetPass::CombineRNNSeq(network) ? NetPass::UnrollTI(network) : true;
74     if (!ti_proc_ok)
75         THROW_IE_EXCEPTION << "Plugin doesn't support Tensor Iterator in pure form. "
76                               "None TI optimization pattern has been applied successfully";
77
78     auto compiledGraph = compileNetwork(
79         network,
80         static_cast<Platform>(_device->_platform),
81         _config->compileConfig,
82         std::make_shared<Logger>("GraphCompiler", _config->hostLogLevel, consoleOutput()));
83
84     selectNumberOfExecutors(_device->_platform,
85                             compiledGraph->numShaves, compiledGraph->numSlices, _config->numExecutors);
86
87     _graphBlob = std::move(compiledGraph->blob);
88     _graphMetaData = std::move(compiledGraph->graphMeta);
89
90     _inputInfo  = std::move(compiledGraph->inputInfo);
91     _outputInfo = std::move(compiledGraph->outputInfo);
92
93     if (!_device->isBooted()) {
94         return;
95     }
96
97     char networkName[1024] = {};
98     network.getName(networkName, sizeof(networkName));
99     _executor->allocateGraph(_device, _graphDesc, _graphBlob, compiledGraph->blobHeader,
100                              compiledGraph->numActiveStages, networkName, _config->numExecutors);
101     if (_config->exclusiveAsyncRequests) {
102         ExecutorManager *executorManager = ExecutorManager::getInstance();
103         _taskExecutor = executorManager->getExecutor("MYRIAD");
104     }
105
106     for (size_t i = 0; i < _maxTaskExecutorGetResultCount; i++) {
107         std::stringstream idStream;
108         idStream << networkName << "_TaskExecutorGetResult" << i;
109         _taskExecutorGetResultIds.emplace(idStream.str());
110     }
111 }
112
113 ExecutableNetwork::ExecutableNetwork(const std::string &blobFilename,
114                            std::vector<DevicePtr> &devicePool,
115                            const std::map<std::string, std::string> &config) :
116                            ExecutableNetwork(devicePool, config, ConfigMode::RUNTIME_MODE) {
117     VPU_PROFILE(ExecutableNetwork);
118     std::ifstream blobFile(blobFilename, std::ios::binary);
119     std::ostringstream blobContentStream;
120     blobContentStream << blobFile.rdbuf();
121     const std::string& blobContentString = blobContentStream.str();
122     std::copy(blobContentString.begin(), blobContentString.end(), std::back_inserter(_graphBlob));
123
124     if (!_device->isBooted()) {
125         return;
126     }
127
128     // TODO: better name
129     char networkName[1024] = "importedNetwork";
130
131     BlobReader blobReader;
132     blobReader.parse(_graphBlob);
133
134     selectNumberOfExecutors(_device->_platform,
135                             blobReader.getNumberOfShaves(), blobReader.getNumberOfSlices(), _config->numExecutors);
136
137     this->_networkInputs  = blobReader.getNetworkInputs();
138     this->_networkOutputs = blobReader.getNetworkOutputs();
139     std::size_t numStages = blobReader.getStageCount();
140     auto blobHeader = blobReader.getHeader();
141
142
143     _inputInfo  = blobReader.getInputInfo();
144     _outputInfo = blobReader.getOutputInfo();
145
146     _executor->allocateGraph(_device, _graphDesc, _graphBlob, blobHeader, numStages, networkName,
147                              _config->numExecutors);
148
149     _graphMetaData.stagesMeta.resize(numStages);
150     for (auto &meta : _graphMetaData.stagesMeta) {
151         meta.stageName = meta.stageType = meta.layerName = meta.layerType = "UNKNOWN";
152         meta.status = InferenceEngineProfileInfo::LayerStatus::EXECUTED;
153     }
154
155     if (_config->exclusiveAsyncRequests) {
156         ExecutorManager *executorManager = ExecutorManager::getInstance();
157         _taskExecutor = executorManager->getExecutor("MYRIAD");
158     }
159
160     for (size_t i = 0; i < _maxTaskExecutorGetResultCount; i++) {
161         std::stringstream idStream;
162         idStream << networkName << "_TaskExecutorGetResult" << i;
163         _taskExecutorGetResultIds.emplace(idStream.str());
164     }
165 }
166
167 void ExecutableNetwork::GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const {
168     if (name == METRIC_KEY(NETWORK_NAME)) {
169         result = IE_SET_METRIC(NETWORK_NAME, _graphDesc._name);
170     } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
171         result = IE_SET_METRIC(SUPPORTED_METRICS, _supportedMetrics);
172     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
173         result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, std::vector<std::string>());
174     } else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
175         result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, static_cast<unsigned int>(2u*_config->numExecutors));
176     } else if (name == METRIC_KEY(DEVICE_THERMAL)) {
177         result = IE_SET_METRIC(DEVICE_THERMAL, _executor->GetThermal(_device));
178     } else {
179         THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
180     }
181 }
182
183 void ExecutableNetwork::GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) {
184     graphPtr = buildRuntimeGraph(_graphMetaData);
185 }
186
187 InferenceEngine::ICNNNetwork::Ptr ExecutableNetwork::buildRuntimeGraph(GraphMetaInfo& graphMetaInfo) {
188     auto net = std::make_shared<InferenceEngine::details::CNNNetworkImpl>();
189     net->setPrecision(Precision::FP16);
190     net->setName(graphMetaInfo.graphName);
191
192     std::map<size_t, CNNLayerPtr> stageMetaIndexToLayer;
193
194     auto createLayerFromMeta = [&](const StageMetaInfo &stageMetaInfo) -> CNNLayer::Ptr {
195         auto layer = std::make_shared<CNNLayer>(LayerParams{stageMetaInfo.stageName,
196                                           stageMetaInfo.layerType,
197                                           Precision::FP16});
198
199         layer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = stageMetaInfo.layerName;
200         layer->params[ExecGraphInfoSerialization::IMPL_TYPE] = stageMetaInfo.stageType;
201         layer->params[ExecGraphInfoSerialization::EXECUTION_ORDER] = std::to_string(stageMetaInfo.execOrder);
202
203         std::stringstream layoutStream;
204         int ind = 0;
205         for (auto &outLayout : stageMetaInfo.outLayouts) {
206             if (ind == 0) {
207                 layoutStream << outLayout;
208                 ind++;
209                 continue;
210             }
211             layoutStream << ',' << outLayout;
212         }
213         layer->params[ExecGraphInfoSerialization::OUTPUT_LAYOUTS] = layoutStream.str();
214
215         std::string outPrecisionsStr;
216         ind = 0;
217         for (auto &outPrecision : stageMetaInfo.outPrecisions) {
218             if (ind == 0) {
219                 outPrecisionsStr += outPrecision.name();
220                 ind++;
221                 continue;
222             }
223             outPrecisionsStr += ',' + std::string(outPrecision.name());
224         }
225         layer->params[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = outPrecisionsStr;
226
227         if (stageMetaInfo.execOrder < 0) {
228             layer->params[ExecGraphInfoSerialization::PERF_COUNTER] = "not_executed";
229         } else {
230             layer->params[ExecGraphInfoSerialization::PERF_COUNTER] = std::to_string(stageMetaInfo.execTime);
231         }
232
233         return layer;
234     };
235
236     //
237     // Write performance counts
238     //
239
240     auto perfInfo = _executor->getPerfTimeInfo(_graphDesc._graphHandle);
241
242     const auto deviceTimings = perfInfo.data();
243     auto deviceTimingsCount = perfInfo.size();
244
245     if (deviceTimingsCount > 0) {
246         std::size_t timeIndex = 0;
247
248         for (auto &stageMeta : graphMetaInfo.stagesMeta) {
249             if (stageMeta.status == ie::InferenceEngineProfileInfo::EXECUTED &&
250                 timeIndex < deviceTimingsCount) {
251                 stageMeta.execTime += deviceTimings[timeIndex];
252                 timeIndex++;
253             }
254         }
255     }
256
257     //
258     // Add all stages to network
259     //
260
261     for (std::size_t i = 0; i < graphMetaInfo.stagesMeta.size(); i++) {
262         const auto stageMetaData = graphMetaInfo.stagesMeta[i];
263
264         if (stageMetaData.status == ie::InferenceEngineProfileInfo::LayerStatus::OPTIMIZED_OUT ||
265             stageMetaData.stageName == "<Receive-Tensor>" ||
266             stageMetaData.stageName == "<none>") {
267             continue;
268         }
269
270         auto layer = createLayerFromMeta(stageMetaData);
271         stageMetaIndexToLayer.insert(std::make_pair(i, layer));
272         net->addLayer(layer);
273     }
274
275     //
276     // Add all edges to network
277     //
278
279     for (const auto &dataMetaData : graphMetaInfo.datasMeta) {
280         DataPtr data;
281
282         auto parent = stageMetaIndexToLayer[dataMetaData.parentIndex];
283         data = std::make_shared<Data>(dataMetaData.name, dataMetaData.desc);
284         parent->outData.push_back(data);
285         data->getCreatorLayer() = parent;
286
287         for (auto &childMetaIndex : dataMetaData.childrenIndices) {
288             auto child = stageMetaIndexToLayer[childMetaIndex];
289             data->getInputTo()[child->name] = child;
290             child->insData.push_back(data);
291         }
292     }
293
294     //
295     // Specify inputs data
296     //
297
298     for (std::size_t i = 0; i < graphMetaInfo.stagesMeta.size(); i++) {
299         const auto stageMetaData = graphMetaInfo.stagesMeta[i];
300
301         if (stageMetaData.inputsNum != 0 ||
302             stageMetaData.stageName == "<Receive-Tensor>" ||
303             stageMetaData.stageName == "<none>") {
304             continue;
305         }
306
307         auto input = stageMetaIndexToLayer[i];
308         auto inputInfo = std::make_shared<InputInfo>();
309         inputInfo->setInputData(input->outData[0]);
310         net->setInputInfo(inputInfo);
311     }
312
313     return net;
314 }
315
316 }  // namespace MyriadPlugin
317 }  // namespace vpu