2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "RecordMinMax.h"
18 #include "MinMaxObserver.h"
20 #include <luci/Importer.h>
21 #include <luci/CircleExporter.h>
22 #include <luci/CircleFileExpContract.h>
23 #include <luci/IR/CircleQuantParam.h>
25 #include <dio_hdf5/HDF5Importer.h>
36 using Shape = std::vector<loco::Dimension>;
37 using DataType = loco::DataType;
42 // Max h5 file size for parallel recording in bytes = 1 GB
43 const long h5_max_size_bytes = 1000000000;
45 long getH5FileSize(const std::string &input_data_path)
47 std::ifstream in_file(input_data_path, std::ios::binary);
48 in_file.seekg(0, std::ios::end);
50 return in_file.tellg();
53 uint32_t numElements(const luci::CircleNode *node)
55 uint32_t num_elements = 1;
56 for (uint32_t i = 0; i < node->rank(); i++)
57 num_elements *= node->dim(i).value();
62 // Throw exception if input has one of the following conditions.
63 // 1. Have unknown dimension
64 // 2. Number of elements is 0
65 void checkInputDimension(const luci::CircleInput *input)
67 for (uint32_t i = 0; i < input->rank(); i++)
68 if (!input->dim(i).known())
69 throw std::runtime_error(input->name() + " has unknown dimension");
71 if (numElements(input) == 0)
72 throw std::runtime_error(input->name() + " is a zero-sized input");
75 void readDataFromFile(const std::string &filename, std::vector<char> &data, size_t data_size)
77 assert(data.size() == data_size); // FIX_CALLER_UNLESS
79 std::ifstream fs(filename, std::ifstream::binary);
81 throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
82 if (fs.read(data.data(), data_size).fail())
83 throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
85 throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
88 std::vector<uint8_t> genRandomBoolData(std::mt19937 &gen, uint32_t num_elements)
90 std::uniform_int_distribution<> dist(0, 1);
91 std::vector<uint8_t> input_data(num_elements);
94 for (auto &iter : input_data)
95 iter = static_cast<uint8_t>(dist(gen));
100 template <typename T>
101 std::vector<T> genRandomIntData(std::mt19937 &gen, uint32_t num_elements, T min, T max)
103 std::uniform_int_distribution<T> dist(min, max);
104 std::vector<T> input_data(num_elements);
108 auto const generator = [&gen, &dist]() { return dist(gen); };
109 std::generate(begin(input_data), end(input_data), generator);
116 * @brief getTensorSize will return size in bytes
118 template <typename NodeT> size_t getTensorSize(const NodeT *node)
120 uint32_t tensor_size = loco::size(node->dtype());
121 for (uint32_t i = 0; i < node->rank(); ++i)
122 tensor_size *= node->dim(i).value();
127 * @brief verifyTypeShape checks the type and the shape of CircleInput
128 * This throws an exception if type or shape does not match
130 void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
133 if (dtype != input_node->dtype())
134 throw std::runtime_error("Wrong input type.");
136 if (shape.size() != input_node->rank())
137 throw std::runtime_error("Input rank mismatch.");
139 for (uint32_t i = 0; i < shape.size(); i++)
141 if (not(shape.at(i) == input_node->dim(i)))
142 throw std::runtime_error("Input shape mismatch.");
148 namespace record_minmax
151 void RecordMinMax::initialize(const std::string &input_model_path)
153 assert(_threads_size > 0);
155 // Load model from the file
156 std::ifstream fs(input_model_path, std::ifstream::binary);
159 throw std::runtime_error("Cannot open model file \"" + input_model_path + "\".\n");
161 std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
162 std::istreambuf_iterator<char>());
164 // Verify flatbuffers
165 flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
167 if (!circle::VerifyModelBuffer(verifier))
169 throw std::runtime_error("Failed to verify circle '" + input_model_path + "'");
172 const circle::Model *circle_model = circle::GetModel(model_data.data());
173 if (circle_model == nullptr)
175 throw std::runtime_error("Failed to load '" + input_model_path + "'");
178 _module = luci::Importer().importModule(circle_model);
180 if (_module == nullptr)
182 throw std::runtime_error("Failed to load '" + input_model_path + "'");
185 // Create and initialize interpreters and observers
186 _interpreters.resize(_threads_size);
187 _observers.resize(_threads_size);
189 for (uint32_t thread_idx = 0; thread_idx < _threads_size; ++thread_idx)
191 auto interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
192 auto observer = std::make_unique<MinMaxObserver>();
194 interpreter->attachObserver(observer.get());
196 _observers[thread_idx] = std::move(observer);
197 _interpreters[thread_idx] = std::move(interpreter);
201 // input_data_path is a path to the directory
202 // The directory should contain binary files each of which is a raw data,
203 // ready to be consumed by the input circle model without any modification
204 // TODO reduce duplicate codes with profileRawData
205 void RecordMinMax::profileRawDataDirectory(const std::string &input_data_path)
207 struct dirent *entry = nullptr;
210 dp = opendir(input_data_path.c_str());
212 throw std::runtime_error("Cannot open directory. Please check \"" + input_data_path +
213 "\" is a directory.\n");
215 uint32_t num_records = 0;
216 const auto input_nodes = loco::input_nodes(_module->graph());
218 // Get total input size
219 uint32_t total_input_size = 0;
220 for (auto input : input_nodes)
222 const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
223 checkInputDimension(input_node);
224 total_input_size += getTensorSize(input_node);
227 while ((entry = readdir(dp)))
229 // Skip if the entry is not a regular file
230 if (entry->d_type != DT_REG)
233 const std::string filename = entry->d_name;
234 std::cout << "Recording " << num_records << "'th data" << std::endl;
236 // Read data from file to buffer
237 // Assumption: For a multi-input model, the binary file should have inputs concatenated in the
238 // same order with the input index.
239 std::vector<char> input_data(total_input_size);
240 readDataFromFile(input_data_path + "/" + filename, input_data, total_input_size);
242 // Write data from buffer to interpreter
244 for (auto input : input_nodes)
246 const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
247 const auto input_size = getTensorSize(input_node);
248 getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size);
250 offset += input_size;
253 getInterpreter()->interpret();
260 if (num_records == 0)
261 throw std::runtime_error("The input data file does not contain any record.");
263 std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
265 _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap());
268 // input_data_path is a text file which specifies the representative data
269 // The text file should contain absolute file path per line.
270 // The pointed file should be a binary file containing one representative data,
271 // ready to be consumed by the input circle model without any modification
272 // NOTE If a model has multiple inputs, the binary file should have inputs concatenated in the same
273 // order with the input index of the circle model.
274 void RecordMinMax::profileRawData(const std::string &input_data_path)
276 std::ifstream input_file(input_data_path);
277 if (input_file.fail())
278 throw std::runtime_error("Cannot open file \"" + input_data_path + "\".\n");
281 uint32_t num_records = 0;
282 const auto input_nodes = loco::input_nodes(_module->graph());
284 // Get total input size
285 uint32_t total_input_size = 0;
286 for (auto input : input_nodes)
288 const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
289 checkInputDimension(input_node);
290 total_input_size += getTensorSize(input_node);
293 while (getline(input_file, record))
295 std::cout << "Recording " << num_records << "'th data" << std::endl;
297 // Read data from file to buffer
298 // Assumption: For a multi-input model, the binary file should have inputs concatenated in the
299 // same order with the input index.
300 std::vector<char> input_data(total_input_size);
301 readDataFromFile(record, input_data, total_input_size);
303 // Write data from buffer to interpreter
305 for (auto input : input_nodes)
307 const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
308 const auto input_size = getTensorSize(input_node);
309 getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size);
311 offset += input_size;
314 getInterpreter()->interpret();
319 if (num_records == 0)
320 throw std::runtime_error("The input data file does not contain any record.");
322 std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
324 _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap());
327 WholeOutput RecordMinMax::importH5Data(const std::string &input_data_path)
331 dio::hdf5::HDF5Importer importer(input_data_path);
332 importer.importGroup("value");
334 bool is_raw_data = importer.isRawData();
336 const auto num_records = importer.numData();
337 if (num_records == 0)
338 throw std::runtime_error("The input data file does not contain any record.");
340 const auto input_nodes = loco::input_nodes(_module->graph());
341 const auto num_inputs = input_nodes.size();
343 WholeOutput whole_output(num_records);
345 // Read inputs to whole_output
346 for (int i = 0; i < num_records; ++i)
348 if (num_inputs != static_cast<uint32_t>(importer.numInputs(i)))
349 throw std::runtime_error("Wrong number of inputs.");
351 for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
353 const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
354 assert(input_node->index() == input_idx);
355 checkInputDimension(input_node);
356 Buffer input_data(getTensorSize(input_node));
362 importer.readTensor(i, input_idx, &dtype, &shape, input_data.data(), input_data.size());
364 // Check the type and the shape of the input data is valid
365 verifyTypeShape(input_node, dtype, shape);
369 // Skip type/shape check for raw data
370 importer.readTensor(i, input_idx, input_data.data(), input_data.size());
372 whole_output[i].emplace_back(std::move(input_data));
378 catch (const H5::Exception &e)
380 H5::Exception::printErrorStack();
381 throw std::runtime_error("HDF5 error occurred.");
385 void RecordMinMax::profileData(const std::string &input_data_path)
389 dio::hdf5::HDF5Importer importer(input_data_path);
390 importer.importGroup("value");
392 bool is_raw_data = importer.isRawData();
394 const auto num_records = importer.numData();
395 if (num_records == 0)
396 throw std::runtime_error("The input data file does not contain any record.");
398 const auto input_nodes = loco::input_nodes(_module->graph());
399 const auto num_inputs = input_nodes.size();
401 for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
403 if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
404 throw std::runtime_error("Wrong number of inputs.");
406 std::cout << "Recording " << record_idx << "'th data" << std::endl;
408 for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
410 const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
411 assert(input_node->index() == input_idx);
412 checkInputDimension(input_node);
413 std::vector<char> input_data(getTensorSize(input_node));
419 importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data(),
422 // Check the type and the shape of the input data is valid
423 verifyTypeShape(input_node, dtype, shape);
427 // Skip type/shape check for raw data
428 importer.readTensor(record_idx, input_idx, input_data.data(), input_data.size());
431 // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
432 // We can redcue the copy by directly writing data from file to interpreter inputs
433 getInterpreter()->writeInputTensor(input_node, input_data.data(), input_data.size());
436 getInterpreter()->interpret();
439 std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
441 catch (const H5::Exception &e)
443 H5::Exception::printErrorStack();
444 throw std::runtime_error("HDF5 error occurred.");
447 _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap());
450 void RecordMinMax::profileDataInParallel(const std::string &input_data_path)
454 assert(_interpreters.size() == _threads_size);
455 assert(_observers.size() == _threads_size);
457 const long h5_file_size = getH5FileSize(input_data_path);
459 if (h5_file_size > h5_max_size_bytes)
460 throw std::runtime_error("H5 file size is too large for parallel recording");
462 WholeOutput whole_output;
465 whole_output = importH5Data(input_data_path);
467 catch (const std::bad_alloc &e)
469 throw std::runtime_error("Out of memory during h5 data load.");
472 const auto num_records = whole_output.size();
473 const auto input_nodes = loco::input_nodes(_module->graph());
475 // Start parallel part
476 INFO(l) << _threads_size << " concurrent threads are supported." << std::endl;
478 const auto run_threads = num_records < _threads_size ? num_records : _threads_size;
480 const auto records_batch = static_cast<uint32_t>(num_records / run_threads);
482 auto interpret_batch = [&whole_output, &input_nodes](int first_record, int last_record,
483 luci_interpreter::Interpreter *interpreter) {
484 for (int record_index = first_record; record_index < last_record; ++record_index)
486 for (uint32_t input_idx = 0; input_idx < input_nodes.size(); input_idx++)
488 const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
490 const auto &cur_input_data = whole_output[record_index][input_idx];
491 interpreter->writeInputTensor(input_node, cur_input_data.data(), cur_input_data.size());
493 interpreter->interpret();
497 std::vector<std::thread> threads;
498 for (uint32_t t = 0; t < run_threads; ++t)
500 if (t < run_threads - 1)
502 threads.emplace_back(interpret_batch, records_batch * t, records_batch * (t + 1),
503 _interpreters[t].get());
507 threads.emplace_back(interpret_batch, records_batch * t, num_records, _interpreters[t].get());
511 for (uint32_t i = 0; i < run_threads; ++i)
512 threads.at(i).join();
516 // Copy all min, max values to one min/max map
517 MinMaxMap main_min_max_map;
519 for (const auto &obs : _observers)
521 const auto cur_minmax_map = obs->minMaxData()->getMap();
522 for (auto &iter : *cur_minmax_map)
524 const auto node = iter.first;
525 const auto &minmax = iter.second;
527 main_min_max_map.appendMinMaxVector(node, minmax);
531 std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
533 _minmax_computer->update_qparam(main_min_max_map.getMap());
536 void RecordMinMax::profileDataWithRandomInputs(void)
538 // We use three randomly-generated records
539 const uint32_t num_records = 3;
541 const auto input_nodes = loco::input_nodes(_module->graph());
542 const auto num_inputs = input_nodes.size();
544 std::random_device rd;
545 std::mt19937 gen(rd());
546 std::uniform_real_distribution<> dist(-5, 5);
548 for (uint32_t record_idx = 0; record_idx < num_records; record_idx++)
550 std::cout << "Recording " << record_idx << "'th data" << std::endl;
552 for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
554 const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
555 assert(input_node->index() == input_idx);
556 checkInputDimension(input_node);
558 const auto num_elements = numElements(input_node);
560 // TODO Support more input data types
561 assert(input_node->dtype() == loco::DataType::FLOAT32 ||
562 input_node->dtype() == loco::DataType::BOOL ||
563 input_node->dtype() == loco::DataType::S32 ||
564 input_node->dtype() == loco::DataType::S64);
566 if (input_node->dtype() == DataType::FLOAT32)
568 std::vector<float> input_data(num_elements);
571 for (auto &iter : input_data)
572 iter = static_cast<float>(dist(gen));
574 // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
575 // We can redcue the copy by directly writing data from file to interpreter inputs
576 getInterpreter()->writeInputTensor(input_node, input_data.data(),
577 input_data.size() * sizeof(float));
579 else if (input_node->dtype() == DataType::BOOL)
581 auto input_data = genRandomBoolData(gen, num_elements);
582 getInterpreter()->writeInputTensor(input_node, input_data.data(),
583 input_data.size() * sizeof(uint8_t));
585 else if (input_node->dtype() == DataType::S32)
587 auto input_data = genRandomIntData<int32_t>(gen, num_elements, 0, 100);
588 getInterpreter()->writeInputTensor(input_node, input_data.data(),
589 input_data.size() * sizeof(int32_t));
591 else if (input_node->dtype() == DataType::S64)
593 auto input_data = genRandomIntData<int64_t>(gen, num_elements, 0, 100);
594 getInterpreter()->writeInputTensor(input_node, input_data.data(),
595 input_data.size() * sizeof(int64_t));
599 getInterpreter()->interpret();
602 std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
604 _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap());
607 void RecordMinMax::saveModel(const std::string &output_model_path)
609 // Export to output Circle file
610 luci::CircleExporter exporter;
612 luci::CircleFileExpContract contract(_module.get(), output_model_path);
614 if (!exporter.invoke(&contract))
616 throw std::runtime_error("Failed to export '" + output_model_path + "'");
620 } // namespace record_minmax