1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <vpu/hw/utility.hpp>
8 #include <unordered_map>
11 #include <ie_parallel.hpp>
13 #include <vpu/model/stage.hpp>
14 #include <vpu/utils/numeric.hpp>
22 void printTo(std::ostream& os, const HwOpList& hwOps) {
23 os << "[" << std::endl;
24 os << "size=" << hwOps.vec.size() << std::endl;
28 void printTo(DotLabel& lbl, const HwOpList& hwOps) {
30 subLbl.appendPair("size", hwOps.vec.size());
37 HwPaddingInfo getHwPaddingInfo(
38 const DimValues& inDims, const DimValues& outDims,
39 int kernelDimX, int kernelDimY,
40 int kernelStrideX, int kernelStrideY) {
41 int valid_out_x = std::ceil(static_cast<double>(inDims[Dim::W] - kernelDimX + 1) / kernelStrideX);
42 int valid_out_y = std::ceil(static_cast<double>(inDims[Dim::H] - kernelDimY + 1) / kernelStrideY);
44 auto pad_along_x = (outDims[Dim::W] - 1) * kernelStrideX + kernelDimX - inDims[Dim::W];
45 auto pad_along_y = (outDims[Dim::H] - 1) * kernelStrideY + kernelDimY - inDims[Dim::H];
49 pad.left = pad_along_x / 2;
50 pad.right = pad_along_x - pad.left;
51 pad.top = pad_along_y / 2;
52 pad.bottom = pad_along_y - pad.top;
54 pad.enable = (outDims[Dim::W] != valid_out_x || outDims[Dim::H] != valid_out_y);
59 void printTo(std::ostream& os, const HwPaddingInfo& hwPad) {
60 os << "[" << std::endl;
61 os << "enable=" << hwPad.enable << std::endl;
63 os << "left=" << hwPad.left << std::endl;
64 os << "right=" << hwPad.right << std::endl;
65 os << "top=" << hwPad.top << std::endl;
66 os << "bottom=" << hwPad.bottom << std::endl;
71 void printTo(DotLabel& lbl, const HwPaddingInfo& hwPad) {
73 subLbl.appendPair("enable", hwPad.enable);
75 subLbl.appendPair("left", hwPad.left);
76 subLbl.appendPair("right", hwPad.right);
77 subLbl.appendPair("top", hwPad.top);
78 subLbl.appendPair("bottom", hwPad.bottom);
86 HwWeightsContent::HwWeightsContent(const DataContent::Ptr& origContent,
87 const DataDesc& origWeightsDesc,
89 int channelStartIndex) :
90 CalculatedDataContent({origContent}),
91 _origWeightsDesc(origWeightsDesc),
92 _numInputChannels(numInputChannels),
93 _channelStartIndex(channelStartIndex) {
96 void HwWeightsContent::fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const {
97 VPU_PROFILE(HwWeightsContent);
99 IE_ASSERT(_desc.type() == DataType::FP16);
100 IE_ASSERT(baseContents.size() == 1);
102 auto KX = _origWeightsDesc.dim(Dim::W);
103 auto KY = _origWeightsDesc.dim(Dim::H);
104 auto IC = _origWeightsDesc.dim(Dim::C);
105 auto OC = _origWeightsDesc.dim(Dim::N);
106 auto origTotalSize = _origWeightsDesc.totalDimSize();
108 auto HW_OC_inner = desc().dim(Dim::W);
109 auto HW_OC_outer = desc().dim(Dim::N);
110 IE_ASSERT(HW_OC_outer * HW_OC_inner >= OC);
112 auto HW_K = desc().dim(Dim::H);
113 IE_ASSERT(HW_K == HW_K);
115 IE_ASSERT(_channelStartIndex < IC);
116 auto HW_IC = desc().dim(Dim::C);
117 auto HW_IC_real = std::min(_numInputChannels, IC - _channelStartIndex);
119 auto srcData = baseContents[0]->get<fp16_t>();
120 IE_ASSERT(srcData != nullptr);
122 auto dstData = static_cast<fp16_t*>(tempBuf);
124 IE_ASSERT((_channelStartIndex + HW_IC_real) * HW_K + (OC - 1) * HW_K * IC - 1 < origTotalSize);
125 IE_ASSERT((OC - 1) % HW_OC_inner +
126 (HW_K - 1) * HW_OC_inner +
127 (HW_IC_real - 1) * HW_OC_inner * HW_K +
128 ((OC - 1) / 8) * HW_OC_inner * HW_K * HW_IC < _desc.totalDimSize());
130 if (KX == 1 && KY == 1) {
131 ie::parallel_for(OC, [=](int oc) {
132 auto oc_inner = oc % HW_OC_inner;
133 auto oc_outer = oc / HW_OC_inner;
134 for (int ic = 0; ic < HW_IC_real; ++ic) {
136 (_channelStartIndex + ic) +
140 ic * HW_OC_inner * HW_K +
141 oc_outer * HW_OC_inner * HW_K * HW_IC;
143 dstData[dstInd] = srcData[srcInd];
147 ie::parallel_for(OC, [=](int oc) {
148 auto oc_inner = oc % HW_OC_inner;
149 auto oc_outer = oc / HW_OC_inner;
150 for (int ic = 0; ic < HW_IC_real; ++ic) {
151 for (int ky = 0; ky < KY; ++ky) {
152 for (int kx = 0; kx < KX; ++kx) {
155 (_channelStartIndex + ic) * HW_K +
159 (ky * KX + kx) * HW_OC_inner +
160 ic * HW_OC_inner * HW_K +
161 oc_outer * HW_OC_inner * HW_K * HW_IC;
163 dstData[dstInd] = srcData[srcInd];
171 // calculateHwBufferSize
174 int calculateHwBufferSize(const DimValues& dims, DimsOrder order) {
176 order = DimsOrder::fromNumDims(dims.size());
179 DataDesc desc(DataType::FP16, order, dims);
181 if (desc.numDims() > 2) {
182 return calcTotalByteSize(desc, calcStrides(desc, StridesRequirement().add(1, DimStride::Aligned)));
184 IE_ASSERT(desc.dimsOrder() == DimsOrder::NC);
186 return calcTotalByteSize(desc, calcStrides(desc, StridesRequirement().add(0, DimStride::Aligned)));