1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <vpu/model/data.hpp>
12 #include <unordered_map>
17 #include <precision_utils.h>
18 #include <ie_parallel.hpp>
20 #include <vpu/model/edges.hpp>
21 #include <vpu/model/stage.hpp>
22 #include <vpu/utils/ie_helpers.hpp>
23 #include <vpu/utils/numeric.hpp>
24 #include <vpu/backend/backend.hpp>
32 const void* CalculatedDataContent::getRaw() const {
34 _temp.resize(getTempBufSize(_baseContents));
35 fillTempBuf(_baseContents, _temp.data());
36 _baseContents.clear();
41 size_t CalculatedDataContent::getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const {
42 return _desc.totalDimSize() * _desc.elemSize();
47 class IeBlobContent final : public DataContent {
49 IeBlobContent(const ie::Blob::Ptr& blob, int repeat) : _blob(blob), _repeat(repeat) {}
52 const void* getRaw() const override {
53 IE_ASSERT(_desc.type() == DataType::FP16);
55 if (_blobFp16 == nullptr) {
56 _blobFp16 = getBlobFP16(_blob);
61 return _blobFp16->cbuffer();
64 VPU_PROFILE(IeBlobContent);
66 IE_ASSERT(_desc.totalDimSize() % _repeat == 0);
68 auto origNumElems = _desc.totalDimSize() / _repeat;
69 IE_ASSERT(origNumElems <= _blobFp16->size());
71 auto origPtr = _blobFp16->cbuffer().as<const fp16_t*>();
72 IE_ASSERT(origPtr != nullptr);
74 _temp.resize(_desc.totalDimSize());
76 ie::parallel_for(_repeat, [this, origPtr, origNumElems](int i) {
77 std::copy_n(origPtr, origNumElems, _temp.data() + i * origNumElems);
86 mutable ie::Blob::Ptr _blob;
89 mutable ie::Blob::Ptr _blobFp16;
90 mutable std::vector<fp16_t> _temp;
95 DataContent::Ptr ieBlobContent(const ie::Blob::Ptr& blob, int repeat) {
96 return std::make_shared<IeBlobContent>(blob, repeat);
101 class ReplicatedContent final : public CalculatedDataContent {
103 ReplicatedContent(float val, int count) : _val(val), _count(count) {}
105 ReplicatedContent(const DataContent::Ptr& origContent, int count) :
106 CalculatedDataContent({origContent}), _count(count) {
110 size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>& baseContents) const override {
111 if (baseContents.empty()) {
112 return _count * sizeof(fp16_t);
114 IE_ASSERT(baseContents.size() == 1);
115 IE_ASSERT(_desc.totalDimSize() % _count == 0);
117 return _desc.totalDimSize() * sizeof(fp16_t);
121 void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
122 VPU_PROFILE(ReplicatedContent);
124 auto dstPtr = static_cast<fp16_t*>(tempBuf);
126 if (baseContents.empty()) {
127 std::fill_n(dstPtr, _count, ie::PrecisionUtils::f32tof16(_val));
129 IE_ASSERT(baseContents.size() == 1);
130 IE_ASSERT(_desc.totalDimSize() % _count == 0);
132 auto origCount = _desc.totalDimSize() / _count;
133 auto origPtr = baseContents[0]->get<fp16_t>();
134 IE_ASSERT(origPtr != nullptr);
136 ie::parallel_for(_count, [origPtr, origCount, dstPtr](int i) {
137 std::copy_n(origPtr, origCount, dstPtr + i * origCount);
149 DataContent::Ptr replicateContent(
152 return std::make_shared<ReplicatedContent>(val, count);
155 DataContent::Ptr replicateContent(
156 const DataContent::Ptr& origContent,
158 return std::make_shared<ReplicatedContent>(origContent, count);
163 class ScaledContent final : public CalculatedDataContent {
166 const DataContent::Ptr& origContent,
168 CalculatedDataContent({origContent}), _scale(scale) {
172 void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
173 VPU_PROFILE(ScaledContent);
175 IE_ASSERT(baseContents.size() == 1);
177 auto totalSize = _desc.totalDimSize();
179 auto origDesc = baseContents[0]->desc();
180 IE_ASSERT(origDesc.type() == DataType::FP16);
181 IE_ASSERT(origDesc.totalDimSize() == totalSize);
183 auto srcPtr = baseContents[0]->get<fp16_t>();
184 IE_ASSERT(srcPtr != nullptr);
186 auto dstPtr = static_cast<fp16_t*>(tempBuf);
188 ie::parallel_for(totalSize, [this, srcPtr, dstPtr](int i) {
189 dstPtr[i] = ie::PrecisionUtils::f32tof16(ie::PrecisionUtils::f16tof32(srcPtr[i]) * _scale);
199 DataContent::Ptr scaleContent(
200 const DataContent::Ptr& origContent,
202 return std::make_shared<ScaledContent>(origContent, scale);
209 Data DataNode::getTopParentData() const {
210 auto topParent = handle_from_this();
211 while (auto nextParent = topParent->parentData()) {
212 topParent = nextParent;
217 DimValues DataNode::strides() const {
218 if (_parentDataEdge != nullptr) {
219 if (_parentDataEdge->mode() == SharedDataMode::ROI) {
220 return _parentDataEdge->parent()->strides();
224 return calcStrides(_desc, _requiredStrides);
227 int DataNode::totalByteSize() const {
228 // IT doesn't have sence for child Data.
229 IE_ASSERT(_parentDataEdge == nullptr);
231 return calcTotalByteSize(_desc, strides());
234 int DataNode::elemOffset(const DimValues& coord) const {
235 auto strides = this->strides();
238 for (const auto& p : coord) {
239 IE_ASSERT(_desc.dimsOrder().hasDim(p.first));
240 IE_ASSERT(p.second < _desc.dim(p.first));
241 res += p.second * strides[p.first];
247 int DataNode::lastElemOffset() const {
249 for (const auto& p : _desc.dims()) {
250 lastElem.set(p.first, p.second - 1);
252 return elemOffset(lastElem);
255 bool DataNode::checkStrides(const StridesRequirement& reqs) const {
256 return vpu::checkStrides(_desc, strides(), reqs);
259 void DataNode::updateRequiredStrides(const StridesRequirement& newReqs) {
260 // There shouldn't be any Data<->Data edges.
261 IE_ASSERT(_parentDataEdge == nullptr);
262 IE_ASSERT(_childDataEdges.empty());
264 auto prevReqs = _requiredStrides;
266 StridesRequirement mergedReqs;
267 for (int i = 0; i < _desc.numDims(); ++i) {
268 auto prevReq = prevReqs.get(i);
269 auto newReq = newReqs.get(i);
271 if (prevReq == DimStride::Any &&
272 newReq == DimStride::Any) {
276 // In case if both requirements are defined, use `prevReq`.
277 // We'll check that both requirements are satisfied at the end.
278 if (prevReq != DimStride::Any) {
279 mergedReqs.add(i, prevReq);
281 mergedReqs.add(i, newReq);
285 _requiredStrides = mergedReqs;
287 IE_ASSERT(checkStrides(prevReqs));
288 IE_ASSERT(checkStrides(newReqs));
291 void DataNode::clearAllocation() {
292 _location = DataLocation::None;
294 attrs().erase("ioBufferOffset");
297 void DataNode::setMemReqs(MemoryType mem) {
298 if (mem != MemoryType::DDR) {
299 IE_ASSERT(_usage == DataUsage::Intermediate);
305 void DataNode::setIOInfo(DataLocation location, int ioBufferOffset) {
306 IE_ASSERT(_usage == DataUsage::Input || _usage == DataUsage::Output);
308 if (_usage == DataUsage::Input) {
309 IE_ASSERT(location == DataLocation::Input);
310 } else if (_usage == DataUsage::Output) {
311 IE_ASSERT(location == DataLocation::Output);
314 _location = location;
316 attrs().set<int>("ioBufferOffset", ioBufferOffset);
319 void DataNode::setAllocationInfo(DataLocation location, int memoryOffset) {
320 IE_ASSERT(_usage == DataUsage::Const || _usage == DataUsage::Intermediate || _usage == DataUsage::Temp);
322 if (_usage == DataUsage::Const) {
323 IE_ASSERT(location == DataLocation::Blob);
324 } else if (_usage == DataUsage::Temp) {
325 IE_ASSERT(location == DataLocation::BSS);
328 _location = location;
329 _memoryOffset = memoryOffset;
332 void DataNode::serializeNewBuffer(
333 BlobSerializer& serializer,
334 DimsOrder newOrder) {
335 if (newOrder.numDims() == 0) {
336 serializeBufferImpl(serializer, _desc, this->strides());
338 IE_ASSERT(newOrder.numDims() >= _desc.dimsOrder().numDims());
340 auto newDims = _desc.dims();
341 auto newStrides = this->strides();
342 auto newPerm = newOrder.toPermutation();
344 auto origOrder = _desc.dimsOrder();
345 auto origPerm = origOrder.toPermutation();
348 for (int i = 0; i < newPerm.size(); i++) {
351 if (origPermInd < origPerm.size() && origPerm[origPermInd] == d) {
358 newStrides.set(d, _desc.elemSize());
360 newStrides.set(d, newStrides[newPerm[i - 1]] * newDims[newPerm[i - 1]]);
363 IE_ASSERT(origPermInd == origPerm.size());
365 DataDesc newDesc(_desc.type(), newOrder, newDims);
366 serializeBufferImpl(serializer, newDesc, newStrides);
372 // Decreases all order's valuable digits simultaneously so minimal digit is equal 1
373 void rebaseOrderToOne(DimsOrder& ord, DimValues& dims, DimValues& strides) {
374 auto perm = ord.toPermutation();
375 IE_ASSERT(!perm.empty());
377 auto minDim = MAX_DIMS_64 + 1;
378 for (auto d : perm) {
379 minDim = std::min(minDim, static_cast<int>(d));
383 DimValues newStrides;
385 for (int i = 0; i < perm.size(); ++i) {
386 auto oldDim = perm[i];
387 auto newDim = static_cast<Dim>(static_cast<int>(oldDim) - minDim);
390 newDims.set(newDim, dims[oldDim]);
391 newStrides.set(newDim, strides[oldDim]);
394 ord = DimsOrder::fromPermutation(perm);
396 strides = newStrides;
401 void DataNode::serializeOldBuffer(
403 BlobSerializer& serializer,
405 const EnumMap<Dim, std::vector<Dim>>& dimsReloc) {
406 const int OLD_FORMAT_NUM_DIMS = 3;
408 auto newDims = _desc.dims();
409 auto newStrides = this->strides();
412 // Apply alternative DimsOrder if any.
415 if (newOrder.numDims() == 0) {
416 newOrder = _desc.dimsOrder();
418 IE_ASSERT(newOrder.numDims() == OLD_FORMAT_NUM_DIMS);
420 auto origPerm = _desc.dimsOrder().toPermutation();
421 auto origIndeces = _desc.dimsOrder().toIndices();
422 auto origDims = newDims;
423 auto origStrides = newStrides;
425 auto newPerm = newOrder.toPermutation();
431 // Move real dims and strides according ro relocation map
434 EnumSet<Dim> usedOrigDims;
435 int prevOrigDimInd = -1;
437 for (int i = 0; i < newPerm.size(); ++i) {
438 auto newDim = newPerm[i];
441 int newStrideVal = 0;
443 newStrideVal = _desc.elemSize();
445 newStrideVal = newStrides[newPerm[i - 1]] * newDims[newPerm[i - 1]];
448 auto it = dimsReloc.find(newDim);
449 if (it != dimsReloc.end()) {
450 auto origDimsToReloc = it->second;
451 IE_ASSERT(!origDimsToReloc.empty());
453 for (int j = 0; j < origDimsToReloc.size(); ++j) {
454 auto origDim = origDimsToReloc[j];
455 auto origDimInd = origIndeces[origDim];
457 IE_ASSERT(usedOrigDims.count(origDim) == 0);
458 IE_ASSERT(_desc.dimsOrder().hasDim(origDim));
459 IE_ASSERT(origDimInd == prevOrigDimInd + 1);
461 usedOrigDims.insert(origDim);
463 if (j > 0 && origDims[origDim] > 1) {
464 IE_ASSERT(checkStride(origStrides, _desc, origDimInd, DimStride::Compact));
467 newDimVal *= origDims[origDim];
469 newStrideVal = origStrides[origDim];
472 prevOrigDimInd = origDimInd;
476 newDims.set(newDim, newDimVal);
477 newStrides.set(newDim, newStrideVal);
480 IE_ASSERT(usedOrigDims.size() == origDims.size());
481 for (auto usedDim : usedOrigDims) {
482 IE_ASSERT(_desc.dimsOrder().hasDim(usedDim));
487 // Adjust num dims and dims order to FixedNumDims
490 auto newPerm = newOrder.toPermutation();
491 IE_ASSERT(!newPerm.empty());
493 int maxDimDigit = -1;
494 for (auto d : newPerm) {
495 maxDimDigit = std::max(maxDimDigit, static_cast<int>(d));
497 IE_ASSERT(maxDimDigit >= 0);
499 if (newPerm.size() < OLD_FORMAT_NUM_DIMS) {
500 for (int i = newPerm.size(); i < OLD_FORMAT_NUM_DIMS; i++) {
501 auto lastDim = newPerm.back();
502 auto newLastDim = static_cast<Dim>(++maxDimDigit);
504 newDims.set(newLastDim, 1);
505 newStrides.set(newLastDim, newStrides[lastDim] * newDims[lastDim]);
507 newPerm.emplace_back(newLastDim);
510 newOrder = DimsOrder::fromPermutation(newPerm);
513 if (newPerm.size() > OLD_FORMAT_NUM_DIMS) {
514 for (int i = OLD_FORMAT_NUM_DIMS; i < newPerm.size(); i++) {
515 IE_ASSERT(newDims[newPerm[i]] == 1);
516 newDims.erase(newPerm[i]);
517 newStrides.erase(newPerm[i]);
520 newPerm.resize(OLD_FORMAT_NUM_DIMS);
522 newOrder = DimsOrder::fromPermutation(newPerm);
525 rebaseOrderToOne(newOrder, newDims, newStrides);
527 IE_ASSERT(newOrder.numDims() == OLD_FORMAT_NUM_DIMS);
528 IE_ASSERT(newOrder == DimsOrder::HWC || newOrder == DimsOrder::CHW || newOrder == DimsOrder::HCW);
531 // Create new DataDesc
534 DataDesc newDesc(_desc.type(), newOrder, newDims);
536 if (stage != nullptr) {
537 for (const auto& inEdge : stage->inputEdges()) {
538 if (inEdge->input() == handle_from_this()) {
539 inEdge->attrs().set<DataDesc>("newDesc", newDesc);
540 inEdge->attrs().set<DimValues>("newStrides", newStrides);
543 for (const auto& outEdge : stage->outputEdges()) {
544 if (outEdge->output() == handle_from_this()) {
545 outEdge->attrs().set<DataDesc>("newDesc", newDesc);
546 outEdge->attrs().set<DimValues>("newStrides", newStrides);
552 // Serialize update data
555 serializeBufferImpl(serializer, newDesc, newStrides);
558 void DataNode::serializeIOInfo(BlobSerializer& serializer) const {
559 auto ioIdx = attrs().get<int>("ioIdx");
560 serializer.append(checked_cast<uint32_t>(ioIdx));
562 auto ioBufferOffset = attrs().get<int>("ioBufferOffset");
563 serializer.append(checked_cast<uint32_t>(ioBufferOffset));
565 auto nameLength = checked_cast<uint32_t>(_name.length());
566 auto nameLengthAligned = alignVal(nameLength, 16u);
568 serializer.append(nameLengthAligned);
569 for (auto c : _name) {
570 serializer.append(c);
572 for (uint32_t i = 0; i < nameLengthAligned - nameLength; ++i) {
573 serializer.append(uint8_t(0));
576 serializeDescImpl(serializer, _desc, strides());
579 void DataNode::serializeDescImpl(
580 BlobSerializer& serializer,
581 const DataDesc& storedDesc,
582 const DimValues& storedStrides) const {
583 IE_ASSERT(storedDesc.numDims() <= MAX_DIMS_32);
585 const auto& storedDims = storedDesc.dims();
587 auto storedDimsOrder = storedDesc.dimsOrder();
589 auto storedPerm = storedDimsOrder.toPermutation();
590 IE_ASSERT(!storedPerm.empty());
592 serializer.append(checked_cast<uint32_t>(storedDesc.type()));
593 serializer.append(checked_cast<uint32_t>(storedDimsOrder.code()));
595 serializer.append(checked_cast<uint32_t>(storedPerm.size()));
596 for (auto d : storedPerm) {
597 serializer.append(checked_cast<uint32_t>(storedDims[d]));
599 for (auto d : storedPerm) {
600 serializer.append(checked_cast<uint32_t>(storedStrides[d]));
604 void DataNode::serializeBufferImpl(
605 BlobSerializer& serializer,
606 const DataDesc& storedDesc,
607 const DimValues& storedStrides) const {
608 serializeDescImpl(serializer, storedDesc, storedStrides);
610 serializer.append(checked_cast<uint32_t>(_location));
612 if (_location == DataLocation::Input || _location == DataLocation::Output) {
613 auto topParent = getTopParentData();
615 auto ioIdx = topParent->attrs().get<int>("ioIdx");
616 serializer.append(checked_cast<uint32_t>(ioIdx));
618 auto parentByteSize = topParent->totalByteSize();
619 serializer.append(checked_cast<uint32_t>(parentByteSize));
622 serializer.append(checked_cast<uint32_t>(_memoryOffset));
625 void printTo(std::ostream& os, const Data& data) {
626 os << (data == nullptr ? "<null>" : data->name());
635 struct StopSignal final {};
637 void loopOverDataImpl(
639 const FuncRef<DataLoopStatus(const Data&)>& op) {
640 for (const auto& childData : data->childDatas()) {
641 auto status = op(childData);
643 if (status == DataLoopStatus::NextChild) {
644 loopOverDataImpl(childData, op);
645 } else if (status == DataLoopStatus::Stop) {
655 const FuncRef<DataLoopStatus(const Data&)>& op) {
656 auto status = op(data);
657 if (status != DataLoopStatus::NextChild)
661 loopOverDataImpl(data, op);
662 } catch (const StopSignal&) {