1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
6 #include "blob_factory.hpp"
7 #include "mkldnn_memory.h"
9 // It's so bad to include by relative path :-(
10 #include "../../thirdparty/mkl-dnn/src/common/memory_desc_wrapper.hpp"
14 using namespace InferenceEngine;
16 namespace MKLDNNPlugin {
18 // IEB file format routine
19 static unsigned char IEB_MAGIC[4] = {'I', 'E', 'B', '0'};
20 static unsigned char NO_SCALES = 0xFF;
23 unsigned char magic[4];
26 unsigned char precision; // 0-8
28 unsigned int dims[7]; // max is 7-D blob
30 unsigned char scaling_axis; // FF - no scaling
31 unsigned char reserved[3];
33 unsigned long data_offset;
34 unsigned long data_size;
35 unsigned long scaling_data_offset;
36 unsigned long scaling_data_size;
39 static IEB_HEADER prepare_header(const TensorDesc& desc) {
40 IEB_HEADER header = {0};
42 header.magic[0] = IEB_MAGIC[0];
43 header.magic[1] = IEB_MAGIC[1];
44 header.magic[2] = IEB_MAGIC[2];
45 header.magic[3] = IEB_MAGIC[3];
47 // IEB file format version 0.1
51 header.precision = desc.getPrecision();
53 if (desc.getDims().size() > 7)
54 THROW_IE_EXCEPTION << "Dumper support max 7D blobs";
56 header.ndims = desc.getDims().size();
57 for (int i = 0; i < header.ndims; i++)
58 header.dims[i] = desc.getDims()[i];
60 header.scaling_axis = NO_SCALES;
65 static TensorDesc parse_header(IEB_HEADER &header) {
66 if (header.magic[0] != IEB_MAGIC[0] ||
67 header.magic[1] != IEB_MAGIC[1] ||
68 header.magic[2] != IEB_MAGIC[2] ||
69 header.magic[3] != IEB_MAGIC[3])
70 THROW_IE_EXCEPTION << "Dumper cannot parse file. Wrong format.";
72 if (header.ver[0] != 0 ||
74 THROW_IE_EXCEPTION << "Dumper cannot parse file. Unsupported IEB format version.";
76 Precision prc = Precision(static_cast<Precision::ePrecision>(header.precision));
77 SizeVector dims(header.ndims);
78 for (int i = 0; i < header.ndims; i++)
79 dims[i] = header.dims[i];
81 return TensorDesc {prc, dims, plain_layout(dims)};
85 bool is_plain(Blob::Ptr blob) {
88 auto orig_strides = blob->getTensorDesc().getBlockingDesc().getStrides();
89 auto orig_order = blob->getTensorDesc().getBlockingDesc().getOrder();
90 auto dims = blob->getTensorDesc().getDims();
92 for (int stride = 1, i = dims.size()-1; i >= 0; --i) {
93 if (stride != orig_strides[i] || i != orig_order[i]) res = false;
100 static Blob::Ptr prepare_plain_data(Blob::Ptr blob) {
101 // check if it already plain
102 if (is_plain(blob)) return blob;
104 Blob::Ptr pln_blob = make_plain_blob(blob->precision(), blob->getTensorDesc().getDims());
105 pln_blob->allocate();
108 MKLDNNMemoryDesc mdesc(blob->getTensorDesc());
109 mkldnn::memory::desc desc = mdesc;
110 mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data);
112 int data_size = blob->size();
114 // TODO: make it with blob_copy utility
115 switch (blob->precision()) {
116 case Precision::FP32:
117 case Precision::I32: {
118 int32_t *pln_blob_ptr = pln_blob->buffer().as<int32_t*>();
119 int32_t *blob_ptr = blob->buffer().as<int32_t*>();
120 for (size_t i = 0; i < data_size; i++)
121 pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
125 case Precision::U16: {
126 int16_t *pln_blob_ptr = pln_blob->buffer().as<int16_t*>();
127 int16_t *blob_ptr = blob->buffer().as<int16_t *>();
128 for (size_t i = 0; i < data_size; i++)
129 pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
133 case Precision::U8: {
134 int8_t *pln_blob_ptr = pln_blob->buffer().as<int8_t*>();
135 int8_t *blob_ptr = blob->buffer().as<int8_t *>();
136 for (size_t i = 0; i < data_size; i++)
137 pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
141 THROW_IE_EXCEPTION << "Dumper. Unsupported precision";
147 void BlobDumper::dump(std::ostream &stream) {
149 THROW_IE_EXCEPTION << "Dumper cannot dump empty Blob";
151 if (_blob->buffer().as<float*>() == nullptr)
152 THROW_IE_EXCEPTION << "Dumper cannot dump. Blob is not allocated.";
154 IEB_HEADER header = prepare_header(_blob->getTensorDesc());
155 Blob::Ptr pln_blob = prepare_plain_data(_blob);
157 header.data_offset = sizeof(header);
158 header.data_size = pln_blob->byteSize();
159 header.scaling_data_offset = 0;
160 header.scaling_data_size = 0;
163 header.scaling_axis = 1;
164 header.scaling_data_offset = header.data_offset + header.data_size;
165 header.scaling_data_size = _scales->byteSize();
168 stream.write(reinterpret_cast<char*>(&header), sizeof(header));
169 stream.write(pln_blob->buffer().as<char*>(), pln_blob->byteSize());
172 stream.write(_scales->buffer().as<char*>(), _scales->byteSize());
176 void BlobDumper::dumpAsTxt(std::ostream &stream) {
178 THROW_IE_EXCEPTION << "Dumper cannot dump empty Blob";
180 if (_blob->buffer().as<float*>() == nullptr)
181 THROW_IE_EXCEPTION << "Dumper cannot dump. Blob is not allocated.";
183 SizeVector dims = _blob->getTensorDesc().getDims();
185 // Header like "U8 4D shape: 2 3 224 224 ()
186 stream << _blob->precision().name() << " "
187 << dims.size() << "D "
189 for (size_t d : dims) stream << d << " ";
190 stream << "(" << _blob->size() << ")" <<std::endl;
193 MKLDNNMemoryDesc mdesc(_blob->getTensorDesc());
194 mkldnn::memory::desc desc = mdesc;
195 mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data);
197 int data_size = _blob->size();
198 switch (_blob->precision()) {
199 case Precision::FP32: {
200 auto *blob_ptr = _blob->buffer().as<float*>();
201 for (size_t i = 0; i < data_size; i++)
202 stream << blob_ptr[blob_wrp.off_l(i)] << std::endl;
205 case Precision::I32: {
206 auto *blob_ptr = _blob->buffer().as<int32_t*>();
207 for (size_t i = 0; i < data_size; i++)
208 stream << blob_ptr[blob_wrp.off_l(i)] << std::endl;
211 case Precision::I16: {
212 auto *blob_ptr = _blob->buffer().as<int16_t*>();
213 for (size_t i = 0; i < data_size; i++)
214 stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
217 case Precision::U16: {
218 auto *blob_ptr = _blob->buffer().as<uint16_t*>();
219 for (size_t i = 0; i < data_size; i++)
220 stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
223 case Precision::I8: {
224 auto *blob_ptr = _blob->buffer().as<int8_t*>();
225 for (size_t i = 0; i < data_size; i++)
226 stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
229 case Precision::U8: {
230 auto *blob_ptr = _blob->buffer().as<uint8_t*>();
231 for (size_t i = 0; i < data_size; i++)
232 stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
236 THROW_IE_EXCEPTION << "Dumper. Unsupported precision";
240 BlobDumper BlobDumper::read(std::istream &stream) {
242 stream.read(reinterpret_cast<char*>(&header), sizeof(header));
244 TensorDesc desc = parse_header(header);
245 Blob::Ptr blob = make_blob_with_precision(desc);
248 stream.seekg(header.data_offset, stream.beg);
249 stream.read(blob->buffer().as<char*>(), header.data_size);
251 BlobDumper res(blob);
253 // Parse scales fields.
254 if (header.scaling_axis != NO_SCALES) {
255 if (header.scaling_axis != 1)
256 THROW_IE_EXCEPTION << "Dumper support scaling only for channel dims.";
258 size_t scl_size = header.scaling_data_size / sizeof(float);
259 auto scl = make_blob_with_precision({Precision::FP32, {scl_size}, C});
262 stream.seekg(header.scaling_data_offset, stream.beg);
263 stream.read(scl->buffer().as<char*>(), header.scaling_data_size);
270 BlobDumper BlobDumper::read(const std::string &file_path) {
272 file.open(file_path);
274 THROW_IE_EXCEPTION << "Dumper cannot open file " << file_path;
276 auto res = read(file);
281 void BlobDumper::dump(const std::string &dump_path) {
282 std::ofstream dump_file;
283 dump_file.open(dump_path);
284 if (!dump_file.is_open())
285 THROW_IE_EXCEPTION << "Dumper cannot create dump file";
291 void BlobDumper::dumpAsTxt(const std::string dump_path) {
292 std::ofstream dump_file;
293 dump_file.open(dump_path);
294 if (!dump_file.is_open())
295 THROW_IE_EXCEPTION << "Dumper cannot create dump file";
297 dumpAsTxt(dump_file);
301 Blob::Ptr BlobDumper::get() {
305 template <typename data_t>
306 static void plain_copy(const Blob::Ptr &from, const Blob::Ptr &scls, Blob::Ptr &to) {
307 auto dims = from->getTensorDesc().getDims();
309 size_t data_size = from->size();
310 size_t outer_size = dims[0];
311 size_t c_size = dims.size() > 1 ? dims[1] : 1;
312 size_t inner_size = dims.size() == 4 ? dims[2]*dims[3] :
313 dims.size() == 3 ? dims[2] : 1;
315 auto to_data = to->buffer().as<float*>();
316 auto from_data = from->buffer().as<data_t*>();
319 auto scls_data = scls->buffer().as<float*>();
321 for (size_t o=0; o < outer_size; o++)
322 for (size_t c=0; c < c_size; c++)
323 for (size_t i=0; i < inner_size; i++)
324 *to_data++ = static_cast<float>(*from_data++) * scls_data[c];
326 for (size_t i=0; i < data_size; i++)
327 *to_data++ = static_cast<float>(*from_data++);
331 Blob::Ptr BlobDumper::getRealValue() {
332 if (_blob->precision() == Precision::FP32 && !_scales)
335 auto res = make_plain_blob(Precision::FP32, _blob->getTensorDesc().getDims());
338 switch (_blob->precision()) {
339 case Precision::U8: plain_copy<uint8_t>(_blob, _scales, res); break;
340 case Precision::FP32: plain_copy<float>(_blob, _scales, res); break;
341 case Precision::I8: plain_copy<int8_t >(_blob, _scales, res); break;
342 default: THROW_IE_EXCEPTION << "Unsupported precesion for getRealValue method.";
349 BlobDumper& BlobDumper::withScales(InferenceEngine::Blob::Ptr scales) {
350 if ( _blob->getTensorDesc().getDims().size() < 2 ||
351 scales->getTensorDesc().getDims().size() != 1 ||
352 scales->getTensorDesc().getDims()[0] != _blob->getTensorDesc().getDims()[1] ||
353 scales->getTensorDesc().getPrecision() != Precision::FP32)
354 THROW_IE_EXCEPTION << "Dumper cannot use passed scales. Blob has incompatible shape.";
360 BlobDumper& BlobDumper::withoutScales() {
366 const InferenceEngine::Blob::Ptr& BlobDumper::getScales() const {
370 } // namespace MKLDNNPlugin