1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <vpu/hw/utility.hpp>
8 #include <unordered_map>
11 #include <ie_parallel.hpp>
13 #include <vpu/model/stage.hpp>
14 #include <vpu/utils/numeric.hpp>
15 #include <vpu/utils/profiling.hpp>
23 void printTo(std::ostream& os, const HwOpList& hwOps) {
24 os << "[" << std::endl;
25 os << "size=" << hwOps.vec.size() << std::endl;
29 void printTo(DotLabel& lbl, const HwOpList& hwOps) {
31 subLbl.appendPair("size", hwOps.vec.size());
38 HwPaddingInfo getHwPaddingInfo(
39 const DimValues& inDims, const DimValues& outDims,
40 int kernelDimX, int kernelDimY,
41 int kernelStrideX, int kernelStrideY,
42 int padLeft, int padTop) {
43 auto pad_along_x = (outDims[Dim::W] - 1) * kernelStrideX + kernelDimX - inDims[Dim::W];
44 auto pad_along_y = (outDims[Dim::H] - 1) * kernelStrideY + kernelDimY - inDims[Dim::H];
49 pad.right = std::max(0, pad_along_x - pad.left);
51 pad.bottom = std::max(0, pad_along_y - pad.top);
53 pad.enable = pad.left || pad.right || pad.top || pad.bottom;
58 void printTo(std::ostream& os, const HwPaddingInfo& hwPad) {
59 os << "[" << std::endl;
60 os << "enable=" << hwPad.enable << std::endl;
62 os << "left=" << hwPad.left << std::endl;
63 os << "right=" << hwPad.right << std::endl;
64 os << "top=" << hwPad.top << std::endl;
65 os << "bottom=" << hwPad.bottom << std::endl;
70 void printTo(DotLabel& lbl, const HwPaddingInfo& hwPad) {
72 subLbl.appendPair("enable", hwPad.enable);
74 subLbl.appendPair("left", hwPad.left);
75 subLbl.appendPair("right", hwPad.right);
76 subLbl.appendPair("top", hwPad.top);
77 subLbl.appendPair("bottom", hwPad.bottom);
85 HwWeightsContent::HwWeightsContent(const DataContent::Ptr& origContent,
86 const DataDesc& origWeightsDesc,
88 int channelStartIndex) :
89 CalculatedDataContent({origContent}),
90 _origWeightsDesc(origWeightsDesc),
91 _numInputChannels(numInputChannels),
92 _channelStartIndex(channelStartIndex) {
95 void HwWeightsContent::fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const {
96 VPU_PROFILE(HwWeightsContent);
98 IE_ASSERT(desc().type() == DataType::FP16);
99 IE_ASSERT(baseContents.size() == 1);
101 auto KX = _origWeightsDesc.dim(Dim::W);
102 auto KY = _origWeightsDesc.dim(Dim::H);
103 auto IC = _origWeightsDesc.dim(Dim::C);
104 auto OC = _origWeightsDesc.dim(Dim::N);
105 auto origTotalSize = _origWeightsDesc.totalDimSize();
107 auto HW_OC_inner = desc().dim(Dim::W);
108 auto HW_OC_outer = desc().dim(Dim::N);
109 IE_ASSERT(HW_OC_outer * HW_OC_inner >= OC);
111 auto HW_K = desc().dim(Dim::H);
112 IE_ASSERT(HW_K == KX * KY);
114 IE_ASSERT(_channelStartIndex < IC);
115 auto HW_IC = desc().dim(Dim::C);
116 auto HW_IC_real = std::min(_numInputChannels, IC - _channelStartIndex);
118 auto srcData = baseContents[0]->get<fp16_t>();
119 IE_ASSERT(srcData != nullptr);
121 auto dstData = static_cast<fp16_t*>(tempBuf);
123 IE_ASSERT((_channelStartIndex + HW_IC_real) * HW_K + (OC - 1) * HW_K * IC - 1 < origTotalSize);
124 IE_ASSERT((OC - 1) % HW_OC_inner +
125 (HW_K - 1) * HW_OC_inner +
126 (HW_IC_real - 1) * HW_OC_inner * HW_K +
127 ((OC - 1) / 8) * HW_OC_inner * HW_K * HW_IC < desc().totalDimSize());
129 if (KX == 1 && KY == 1) {
130 ie::parallel_for(OC, [=](int oc) {
131 auto oc_inner = oc % HW_OC_inner;
132 auto oc_outer = oc / HW_OC_inner;
133 for (int ic = 0; ic < HW_IC_real; ++ic) {
135 (_channelStartIndex + ic) +
139 ic * HW_OC_inner * HW_K +
140 oc_outer * HW_OC_inner * HW_K * HW_IC;
142 dstData[dstInd] = srcData[srcInd];
146 ie::parallel_for(OC, [=](int oc) {
147 auto oc_inner = oc % HW_OC_inner;
148 auto oc_outer = oc / HW_OC_inner;
149 for (int ic = 0; ic < HW_IC_real; ++ic) {
150 for (int ky = 0; ky < KY; ++ky) {
151 for (int kx = 0; kx < KX; ++kx) {
154 (_channelStartIndex + ic) * HW_K +
158 (ky * KX + kx) * HW_OC_inner +
159 ic * HW_OC_inner * HW_K +
160 oc_outer * HW_OC_inner * HW_K * HW_IC;
162 dstData[dstInd] = srcData[srcInd];
170 // calculateHwBufferSize
173 int calculateHwBufferSize(const DimValues& dims, DimsOrder order) {
175 order = DimsOrder::fromNumDims(dims.size());
178 DataDesc desc(DataType::FP16, order, dims);
180 if (desc.numDims() > 2) {
181 return calcTotalByteSize(desc, calcStrides(desc, StridesRequirement().add(1, DimStride::Aligned)));
183 IE_ASSERT(desc.dimsOrder() == DimsOrder::NC);
185 return calcTotalByteSize(desc, calcStrides(desc, StridesRequirement().add(0, DimStride::Aligned)));