inference-engine/src/cldnn_engine/cldnn_infer_request.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include <algorithm>
   6 #include <string>
   7 #include <map>
   8 #include <functional>
   9 #include <CPP/detection_output.hpp>  // todo: find a way to remove this
  10 #include <description_buffer.hpp>
  11 #include "cldnn_infer_request.h"
  12
  13 using namespace InferenceEngine;
  14
  15 namespace CLDNNPlugin {
  16
  17 const char CLDNNInferRequest::fp32_suffix[] = "_fp32";
  18
  19 Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
  20     const Layout l = desc.getLayout();
  21     const Precision p = desc.getPrecision();
  22     const SizeVector sz = SizeVector(desc.getDims().rbegin(), desc.getDims().rend());
  23
  24     switch (p) {
  25     case Precision::FP32:
  26         if (mem_ptr != nullptr)
  27             return make_shared_blob<float>(p, l, sz, reinterpret_cast<float*>(mem_ptr));
  28         else
  29             return make_shared_blob<float, const SizeVector>(p, l, sz);
  30     case Precision::FP16:
  31         if (mem_ptr != nullptr)
  32             return make_shared_blob<uint16_t>(p, l, sz, reinterpret_cast<uint16_t*>(mem_ptr));
  33         else
  34             return make_shared_blob<uint16_t, const SizeVector>(p, l, sz);
  35     case Precision::I16:
  36         if (mem_ptr != nullptr)
  37             return make_shared_blob<int16_t>(p, l, sz, reinterpret_cast<int16_t*>(mem_ptr));
  38         else
  39             return make_shared_blob<int16_t, const SizeVector>(p, l, sz);
  40     case Precision::U8:
  41         if (mem_ptr != nullptr)
  42             return make_shared_blob<uint8_t>(p, l, sz, reinterpret_cast<uint8_t*>(mem_ptr));
  43         else
  44             return make_shared_blob<uint8_t, const SizeVector>(Precision::U8, l, sz);
  45     default:
  46         THROW_IE_EXCEPTION << "The plugin does not support input " << p.name() << " precision";
  47     }
  48 }
  49
  50 Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
  51     const Layout l = desc.getLayout();
  52     const Precision p = desc.getPrecision();
  53     const SizeVector sz = SizeVector(desc.getDims().rbegin(), desc.getDims().rend());
  54
  55     switch (p) {
  56     case Precision::FP32:
  57         if (mem_ptr != nullptr)
  58             return make_shared_blob<float>(p, l, sz, reinterpret_cast<float*>(mem_ptr));
  59         else
  60             return make_shared_blob<float, const SizeVector>(p, l, sz);
  61     case Precision::FP16:
  62         if (mem_ptr != nullptr)
  63             return make_shared_blob<uint16_t>(p, l, sz, reinterpret_cast<uint16_t*>(mem_ptr));
  64         else
  65             return make_shared_blob<uint16_t, const SizeVector>(p, l, sz);
  66     default:
  67         THROW_IE_EXCEPTION << "The plugin does not support output " << p.name() << " precision";
  68     }
  69 }
  70
  71 void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory,
  72                                         Blob::Ptr bptr,
  73                                         buf_info* bi) {
  74     size_t n = (bi == nullptr) ? bptr->size() : bi->buf_size;
  75     size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
  76
  77     auto layout = outputMemory.get_layout();
  78     auto size = layout.size;
  79     auto l_padd = layout.data_padding.lower_size();
  80     auto u_padd = layout.data_padding.upper_size();
  81
  82     auto h_padding = u_padd.spatial[0] + l_padd.spatial[0];
  83     auto v_padding_l = (h_padding + size.spatial[0]) * u_padd.spatial[1];
  84     auto v_padding_u = (h_padding + size.spatial[0]) * l_padd.spatial[1];
  85
  86     switch (bptr->precision()) {
  87     case Precision::FP32: {
  88         TBlob<float>::Ptr out_f = std::dynamic_pointer_cast<TBlob<float>>(bptr);
  89         if (out_f == nullptr) {
  90             THROW_IE_EXCEPTION << "Invalid output blob";
  91         }
  92         auto resPtr = outputMemory.pointer<float>();
  93         float *resVec = out_f->data() + offset;
  94
  95         if (h_padding || v_padding_l || v_padding_u) {
  96             size_t i = 0;
  97             for (size_t b = 0; b < size.batch[0]; b++) {
  98                 for (size_t f = 0; f < size.feature[0]; f++) {
  99                     i += v_padding_l;
 100                     for (size_t y = 0; y < size.spatial[1]; y++) {
 101                         i += l_padd.spatial[0];
 102                         for (size_t x = 0; x < size.spatial[0]; x++, i++) {
 103                             *resVec++ = resPtr[i];
 104                         }
 105                         i += u_padd.spatial[0];
 106                     }
 107                     i += v_padding_u;
 108                 }
 109             }
 110         } else {
 111             for (size_t i = 0; i < n; i++) {
 112                 resVec[i] = resPtr[i];
 113             }
 114         }
 115     }
 116     break;
 117     case Precision::FP16: {
 118         TBlob<uint16_t>::Ptr out_f = std::dynamic_pointer_cast<TBlob<uint16_t>>(bptr);
 119         if (out_f == nullptr) {
 120             THROW_IE_EXCEPTION << "Invalid output blob";
 121         }
 122         auto resPtr = outputMemory.pointer<uint16_t>();
 123         uint16_t *resVec = out_f->data() + offset;
 124
 125         if (h_padding || v_padding_l || v_padding_u) {
 126             size_t i = 0;
 127             for (size_t b = 0; b < size.batch[0]; b++) {
 128                 for (size_t f = 0; f < size.feature[0]; f++) {
 129                     i += v_padding_l;
 130                     for (size_t y = 0; y < size.spatial[1]; y++) {
 131                         i += l_padd.spatial[0];
 132                         for (size_t x = 0; x < size.spatial[0]; x++, i++) {
 133                             *resVec++ = resPtr[i];
 134                         }
 135                         i += u_padd.spatial[0];
 136                     }
 137                     i += v_padding_u;
 138                 }
 139             }
 140         } else {
 141             for (size_t i = 0; i < n; i++) {
 142                 resVec[i] = resPtr[i];
 143             }
 144         }
 145     }
 146     break;
 147     default:
 148         THROW_IE_EXCEPTION << "The plugin does not support output " << bptr->precision() << " precision";
 149     }
 150 }
 151
 152 void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
 153                                     const cldnn::primitive_id &inputName,
 154                                     const cldnn::layout& inputLayout,
 155                                     const Blob &inputBlob, buf_info* bi) {
 156     size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size;
 157     size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
 158
 159     cldnn::primitive_id internalName = "Input:" + inputName;
 160     switch (inputBlob.precision()) {
 161     case Precision::FP32: {
 162         float* blob_ptr = const_cast<float*>(inputBlob.cbuffer().as<const float*>()) + offset;
 163         network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
 164         break;
 165     }
 166     case Precision::FP16: {
 167         uint16_t* blob_ptr = const_cast<uint16_t*>(inputBlob.cbuffer().as<const uint16_t*>()) + offset;
 168         network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
 169         break;
 170     }
 171     case Precision::U8: {
 172         uint8_t* blob_ptr = const_cast<uint8_t*>(inputBlob.cbuffer().as<const uint8_t*>()) + offset;
 173         network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
 174         break;
 175     }
 176     default:
 177         THROW_IE_EXCEPTION << "The plugin does not support input " << inputBlob.precision() << " precision";
 178     }
 179 }
 180
 181 void CLDNNInferRequest::AllocateInputs() {
 182     // allocate inputs
 183     for (auto &input : m_env.inputLayouts) {
 184         std::string name = input.first;
 185         cldnn::layout layout = input.second;
 186
 187         InputInfo::Ptr ni = _networkInputs.at(input.first);
 188         const TensorDesc& desc = ni->getTensorDesc();
 189
 190         cldnn::memory inputMem = cldnn::memory::allocate(*(m_env.engine), layout);
 191         cldnn::pointer<uint8_t> mem_ptr = inputMem.pointer<uint8_t>();
 192
 193         inputsMemory.insert({ name, inputMem });
 194         _inputs[name] = createInputBlob(desc, mem_ptr.data());
 195
 196         if (desc.getPrecision() == Precision::I16) {
 197             cldnn::layout layout_fp32 = layout;
 198             layout_fp32.data_type = cldnn::data_types::f32;
 199             cldnn::memory inputMem_fp32 = cldnn::memory::allocate(*(m_env.engine), layout_fp32);
 200             inputsMemory.insert({ input.first + fp32_suffix, inputMem_fp32 });
 201         }
 202     }
 203 }
 204
 205 void CLDNNInferRequest::AllocateInputsDyn() {
 206     // allocate inputs
 207     for (auto &input : m_env.inputLayouts) {
 208         InputInfo::Ptr ni = _networkInputs.at(input.first);
 209         TensorDesc desc = ni->getTensorDesc();
 210         SizeVector& dims = desc.getDims();
 211
 212         if (!dims.empty()) {
 213             *dims.begin() = static_cast<size_t>(m_env.m_max_batch);
 214         } else {
 215             THROW_IE_EXCEPTION << "Empty dimensions for input blob " << input.first;
 216         }
 217
 218         Blob::Ptr inputBlob = createInputBlob(desc);
 219         if (desc.getPrecision() == Precision::I16) {
 220             auto fp32inputBlob = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(Precision::FP32,
 221                                                                                                             desc.getLayout(),
 222                                                                                                             desc.getDims());
 223             fp32inputBlob->allocate();
 224             _inputs[input.first + fp32_suffix] = fp32inputBlob;
 225         }
 226         inputBlob->allocate();
 227         _inputs[input.first] = inputBlob;
 228     }
 229 }
 230
 231 void CLDNNInferRequest::AllocateOutputs() {
 232     auto networkOutputsIDs = m_env.network->get_output_ids();
 233     auto allPrimitiveIds = m_env.network->get_all_primitives();
 234
 235     // allocate outputs
 236     for (auto& no : _networkOutputs) {
 237         // Find correct output ID. Start with name stored in IR.
 238         std::string outputID = m_env.primitiveIDs.at(no.first);
 239         while (std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), outputID) == networkOutputsIDs.end()) {
 240             // If current ID isn't found in cldnn network outputs, get previous primitive id and try again.
 241             auto prim = allPrimitiveIds.find(outputID);
 242             if (prim == allPrimitiveIds.end()) {
 243                 THROW_IE_EXCEPTION << "Unknown primitive id " << outputID;
 244             }
 245
 246             if (m_env.prevPrimitiveIDs.at(outputID).size() != 1 || prim->second != "_optimized_") {
 247                 THROW_IE_EXCEPTION << "Unable to find parent for output primitive " << outputID;
 248             }
 249             outputID = m_env.prevPrimitiveIDs.at(outputID)[0];
 250         }
 251
 252         cldnn::memory output_mem = m_env.network->get_output_memory(outputID);
 253         cldnn::pointer<uint8_t> output_mem_ptr = output_mem.pointer<uint8_t>();
 254         if (output_mem_ptr.data() == nullptr) {
 255             THROW_IE_EXCEPTION << "Empty output memory for primitive " << outputID;
 256         }
 257
 258         DataPtr oi = no.second;
 259         const TensorDesc& desc = oi->getTensorDesc();
 260
 261         _outputs[no.first] = createOutputBlob(desc, output_mem_ptr.data());
 262         outputsMap[no.first] = outputID;
 263     }
 264 }
 265
 266 void CLDNNInferRequest::AllocateOutputsDyn() {
 267     // allocate outputs
 268     for (auto& no : _networkOutputs) {
 269         DataPtr oi = no.second;
 270         TensorDesc desc = oi->getTensorDesc();
 271         SizeVector& dims = desc.getDims();
 272
 273         if (!dims.empty()) {
 274             *dims.begin() = static_cast<size_t>(m_env.m_max_batch);
 275         } else {
 276             THROW_IE_EXCEPTION << "Empty dimensions for output blob " << no.first;
 277         }
 278
 279         Blob::Ptr outputBlob = createOutputBlob(desc);
 280         outputBlob->allocate();
 281         _outputs[no.first] = outputBlob;
 282     }
 283 }
 284
 285 void CLDNNInferRequest::SetBatch(int new_batch) {
 286     if (m_env.m_max_batch < 0)
 287         THROW_IE_EXCEPTION << "Dynamic batch is not enabled.";
 288
 289     if (new_batch < 1 || new_batch > m_env.m_max_batch) {
 290         THROW_IE_EXCEPTION << "Invalid dynamic batch size " << new_batch <<
 291             " for this request.";
 292     }
 293
 294     if (new_batch == m_curBatch)
 295         return;
 296
 297     batchInputs.clear();
 298     batchOutputs.clear();
 299
 300     // tune expected inputs
 301     for (auto &input : m_env.inputLayouts) {
 302         cldnn::tensor dims = input.second.size;
 303         const SizeVector sz = { size_t(dims.spatial[0]), size_t(dims.spatial[1]), size_t(dims.feature[0]), 1 };
 304         size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
 305         std::vector<buf_info> in_buf;
 306
 307         size_t offset = 0;
 308         size_t bsz = single_batch;
 309         int b = 0;
 310
 311         // calculate metadata for input buffers
 312         for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) {
 313             unsigned int mask = 1 << nb;
 314
 315             buf_info ib = { offset, bsz };
 316             in_buf.push_back(ib);
 317
 318             if (new_batch & mask)
 319                 offset += bsz;
 320             bsz <<= 1;
 321         }
 322
 323         batchInputs[input.first] = in_buf;
 324     }
 325
 326     // tune expected outputs
 327     for (auto& no : _networkOutputs) {
 328         auto res_output = m_env.outputDims.find(no.first);
 329
 330         InferenceEngine::SizeVector sz;
 331         if (res_output != m_env.outputDims.end())
 332             sz = res_output->second;
 333         else
 334             sz = m_env.outputDims.at(m_env.primitiveIDs.at(no.first));
 335
 336         sz.back() = 1;
 337         size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies<size_t>());
 338         std::vector<buf_info> out_buf;
 339
 340         size_t offset = 0;
 341         size_t bsz = single_batch;
 342         int b = 0;
 343         // calculate metadata for output buffers
 344         for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) {
 345             unsigned int mask = 1 << nb;
 346
 347             buf_info ob = { offset, bsz };
 348             out_buf.push_back(ob);
 349
 350             if (new_batch & mask)
 351                 offset += bsz;
 352
 353             bsz <<= 1;
 354         }
 355
 356         batchOutputs[no.first] = out_buf;
 357     }
 358
 359     m_curBatch = new_batch;
 360 }
 361
 362 CLDNNInferRequest::CLDNNInferRequest(InferenceEnv env, bool useProfiling,
 363                                      InputsDataMap networkInputs, OutputsDataMap networkOutputs)
 364         : InferRequestInternal(networkInputs, networkOutputs),
 365           m_env(env),
 366           m_useProfiling(useProfiling) {
 367     if (m_env.m_max_batch > 1) {
 368         SetBatch(m_env.m_max_batch);
 369         AllocateInputsDyn();
 370         AllocateOutputsDyn();
 371     } else {
 372         AllocateInputs();
 373         AllocateOutputs();
 374     }
 375
 376     // Fill implementations map
 377     if (m_useProfiling) {
 378         auto extractImplementationFromInfo = [](const std::string& info) -> std::string {
 379             std::string def_implementation = "undef";
 380             std::string impl_section = "implementation :";
 381             std::string::size_type pos = info.find(impl_section);
 382             if (pos == std::string::npos) {
 383                 return def_implementation;
 384             }
 385
 386             std::string::size_type end_pos = info.find(',', pos);
 387             if (end_pos == std::string::npos) {
 388                 return def_implementation;
 389             }
 390
 391             std::string::size_type length = end_pos - pos - impl_section.size();
 392
 393             auto trim = [](const std::string& str) {
 394                 size_t first = str.find_first_not_of(' ');
 395                 if (std::string::npos == first) {
 396                     return str;
 397                 }
 398                 size_t last = str.find_last_not_of(' ');
 399                 return str.substr(first, (last - first + 1));
 400             };
 401             std::string tmp = trim(info.substr(pos + impl_section.size(), length));
 402
 403             return tmp.length() > 1 ? tmp : def_implementation;
 404         };
 405
 406         // Parse primitive info and extract implementation name.
 407         for (auto& id : m_env.profilingIDs) {
 408             std::string prim_info = "";
 409             try {
 410                 prim_info = m_env.network->get_primitive_info(id);
 411             } catch (std::exception& e) { }
 412
 413             implementationsMap.insert({id, extractImplementationFromInfo(prim_info)});
 414         }
 415     }
 416 }
 417
 418 void CLDNNInferRequest::execAndParse() {
 419     auto networkOutputs = m_env.network->execute();
 420
 421     // Collect outputs as requested by the model
 422     for (auto& no : _networkOutputs) {
 423         std::string outputID = outputsMap[no.first];
 424         auto outputMemory = networkOutputs.at(outputID).get_memory();
 425         Blob::Ptr bptr = _outputs[no.first];
 426
 427         auto out_ptr = outputMemory.pointer<uint8_t>();
 428         auto blob_ptr = bptr->buffer().as<uint8_t*>();
 429
 430         // If Async API is used, copy of output blobs is not needed, unless SetBlob function was called.
 431         // But in the case when old API is used we have to copy data to memory provided by user.
 432         if (blob_ptr != &out_ptr[0]) {
 433             copyOutputData(outputMemory, bptr);
 434         }
 435     }
 436
 437     // finally collect profiling info
 438     if (m_useProfiling) {
 439         std::map<cldnn::primitive_id, cldnn::event> executedPrimitives = m_env.network->get_executed_primitives();
 440         auto allPrimitives = m_env.network->get_all_primitives();
 441
 442         // Get profiling info for all layers
 443         for (auto &profiledID : m_env.profilingIDs) {
 444             auto& perfCount = m_env.perfMap[profiledID].second;
 445             // Change status if layer wasn't executed by cldnn engine
 446             if (perfCount.num == 0 &&
 447                 executedPrimitives.find(profiledID) == executedPrimitives.end()) {
 448                 if (allPrimitives.find(profiledID) != allPrimitives.end() &&
 449                     allPrimitives.at(profiledID) == "_optimized_") {
 450                     // Layer was marked as optimized by cldnn
 451                     perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT;
 452                 } else {
 453                     // Layer wasn't run for some reason
 454                     perfCount.status = InferenceEngineProfileInfo::NOT_RUN;
 455                 }
 456                 continue;
 457             }
 458
 459             auto event = executedPrimitives.at(profiledID);
 460             executedPrimitives.erase(profiledID);
 461
 462             cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event.get_profiling_info()};
 463
 464             // Collect timings
 465             for (auto &interval : cldnnInfo.intervals) {
 466                 using duration_t = std::chrono::duration<long long, std::chrono::microseconds::period>;
 467                 auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();
 468
 469                 if (interval.name == "submission") {
 470                     perfCount.cpu_uSec += count;
 471                 } else if (interval.name == "executing") {
 472                     perfCount.realTime_uSec += count;
 473                 } else if (interval.name == "duration") {  // "duration" is used for CPU layers
 474                     perfCount.cpu_uSec += count;
 475
 476                     if (perfCount.num == 0)
 477                         perfCount.isCPU = true;
 478                 }
 479             }
 480             perfCount.num++;
 481         }
 482     }
 483 }
 484
 485 void CLDNNInferRequest::execAndParseDyn() {
 486     std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> networkOutputs(m_env.m_bv_sz);
 487
 488     // set up exection and put all graphs into driver queue
 489     for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) {
 490         unsigned int mask = 1 << nb;
 491
 492         if (m_curBatch & mask) {
 493             networkOutputs[nb] = m_env.batchNetworks[nb]->execute();
 494         }
 495     }
 496
 497     // now try to get execution results
 498     for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) {
 499         unsigned int mask = 1 << nb;
 500
 501         if (m_curBatch & mask) {
 502             for (auto& no : _networkOutputs) {
 503                 std::string outputID = no.first;
 504                 while ((m_env.primitiveIDs.find(outputID) != m_env.primitiveIDs.end()) &&
 505                     (m_env.primitiveIDs.at(outputID) != outputID)) {
 506                     outputID = m_env.primitiveIDs.at(outputID);
 507                 }
 508
 509                 auto outputMemory = networkOutputs[nb].at(outputID).get_memory();
 510                 Blob::Ptr bptr = _outputs[no.first];
 511
 512                 copyOutputData(outputMemory, bptr, &batchOutputs[no.first][nb]);
 513             }
 514         }
 515     }
 516 }
 517
 518 void CLDNNInferRequest::InferImpl() {
 519     IE_PROFILING_AUTO_SCOPE(CLDNN_INFER)
 520
 521     // execute input pre-processing.
 522     execDataPreprocessing(_inputs, true);  // "true" stands for serial preprocessing in case of OpenMP
 523
 524     for (auto &item : _inputs) {
 525         if (m_env.m_max_batch > 1) {
 526             PrepareInputDyn(item.first, *item.second);
 527         } else {
 528             PrepareInput(item.first, *item.second);
 529         }
 530     }
 531
 532     // The actual inference
 533     if (m_env.m_max_batch > 1) {
 534         execAndParseDyn();
 535     } else {
 536         execAndParse();
 537     }
 538 }
 539
 540 void CLDNNInferRequest::GetPerformanceCounts(
 541         std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
 542     if (!m_useProfiling) {
 543         THROW_IE_EXCEPTION << "Performance counters were not enabled";
 544     } else {
 545         unsigned i = 0;
 546         for (auto& profiledID : m_env.profilingIDs) {
 547             const auto& layerName = m_env.perfMap.at(profiledID).first;
 548             if (layerName.length() == 0)    // no layer directly associated
 549                 continue;
 550
 551             const auto& perfCounter = m_env.perfMap.at(profiledID).second;
 552             auto& extPerfEntry = perfMap[layerName];
 553
 554             // copy layer implementation
 555             if (perfCounter.isCPU) {
 556                 static const std::string cpuExecType("CPU");
 557                 memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
 558                 cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length());  // Override execType as CPU
 559             } else {
 560                 std::string impl = implementationsMap.at(profiledID);
 561                 impl.copy(extPerfEntry.exec_type, impl.length());
 562             }
 563
 564             extPerfEntry.execution_index = i++;
 565             extPerfEntry.status = perfCounter.status;
 566             extPerfEntry.cpu_uSec = perfCounter.cpu_avg();
 567             extPerfEntry.realTime_uSec = perfCounter.realTime_avg();
 568
 569             perfCounter.layerType.copy(extPerfEntry.layer_type, perfCounter.layerType.length());
 570         }
 571     }
 572 }
 573
 574 void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const Blob &inputBlob) {
 575     // Get input layout
 576     if (m_env.inputLayouts.find(inputName) == m_env.inputLayouts.end()) {
 577         THROW_IE_EXCEPTION << "Input name mismatch.";
 578     }
 579     auto inputLayout = m_env.inputLayouts.at(inputName);
 580     auto is_same_buffer = [](const Blob& blob, const cldnn::memory& memory) -> bool {
 581         const std::string str_not_allocated("Input data was not allocated.");
 582         cldnn::pointer<const uint8_t> ptr = memory.pointer<const uint8_t>();
 583         const uint8_t* blob_ptr = blob.cbuffer().as<const uint8_t*>();
 584         const uint8_t* mem_ptr = ptr.data();
 585         if (blob_ptr == nullptr || mem_ptr == nullptr) {
 586             THROW_IE_EXCEPTION << str_not_allocated;
 587         }
 588         return (blob_ptr == mem_ptr) && (blob.byteSize() == memory.size());
 589     };
 590
 591     cldnn::primitive_id internalName = "Input:" + inputName;
 592     const cldnn::memory& memory = inputsMemory.at(inputName);
 593     if (inputBlob.precision() == Precision::I16) {
 594         // clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision
 595         const cldnn::memory& fp32_mem = inputsMemory.at(inputName+fp32_suffix);
 596         cldnn::pointer<float> ptr = fp32_mem.pointer<float>();
 597         InferenceEngine::copyToFloat<int16_t>(ptr.data(), &inputBlob);
 598         m_env.network->set_input_data(internalName, fp32_mem);
 599     } else if (is_same_buffer(inputBlob, memory)) {
 600         // If input memory was allocated by cldnn engine and wasn't overwritten by user set_input_data method won't copy input data.
 601         switch (inputBlob.precision()) {
 602             case Precision::FP32:
 603             case Precision::FP16:
 604             case Precision::U8: {
 605                 m_env.network->set_input_data(internalName, memory);
 606                 break;
 607             }
 608             default:
 609                 THROW_IE_EXCEPTION << "Unsupported input precision " << inputBlob.precision();
 610         }
 611     } else {
 612         // Otherwise, we have to attach to user memory and then copy the data.
 613         copyInputData(m_env.network, inputName, inputLayout, inputBlob);
 614     }
 615 }
 616
 617 void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, const Blob &inputBlob) {
 618     // now try to get execution results
 619     for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) {
 620         unsigned int mask = 1 << nb;
 621
 622         if (m_curBatch & mask) {
 623             auto inputLayout = m_env.inputLayouts.at(inputName);
 624             inputLayout.size.batch[0] = mask;
 625             copyInputData(m_env.batchNetworks[nb], inputName, inputLayout, inputBlob, &batchInputs[inputName][nb]);
 626         }
 627     }
 628 }
 629
 630 };  // namespace CLDNNPlugin