compiler/circle-tensordump/src/Dump.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "Dump.h"
  18
  19 #include <mio_circle/Reader.h>
  20
  21 #include <H5Cpp.h>
  22
  23 #include <memory>
  24 #include <ostream>
  25 #include <string>
  26 #include <vector>
  27
  28 namespace
  29 {
  30
  31 template <typename T>
  32 void print_comma_sepearted(std::ostream &os, const flatbuffers::Vector<T> *vec)
  33 {
  34   if (vec == nullptr)
  35     return;
  36   for (auto iter = vec->begin(); iter != vec->end(); iter++)
  37   {
  38     if (iter != vec->begin())
  39       os << ", ";
  40     os << *iter;
  41   }
  42 }
  43
  44 void print_buffer(std::ostream &os, uint32_t buff_idx, const flatbuffers::Vector<uint8_t> *data_ptr,
  45                   const circle::TensorType &type)
  46 {
  47   if (data_ptr == nullptr)
  48     return;
  49
  50   os << " └── buffer" << std::endl;
  51   os << "     ├── index : " << buff_idx << std::endl;
  52   size_t buff_size = data_ptr->size();
  53   os << "     ├── size  : " << buff_size << std::endl;
  54   os << "     └── data  : ";
  55   switch (type)
  56   {
  57     case circle::TensorType_UINT8:
  58     {
  59       const uint8_t *buff_data_ui8 = reinterpret_cast<const uint8_t *>(data_ptr->data());
  60       for (uint32_t idx = 0; idx < buff_size / sizeof(uint8_t); idx++)
  61       {
  62         os << static_cast<const uint32_t>(buff_data_ui8[idx]) << ", ";
  63       }
  64       break;
  65     }
  66     case circle::TensorType_INT32:
  67     {
  68       const int32_t *buff_data_i32 = reinterpret_cast<const int32_t *>(data_ptr->data());
  69       for (uint32_t idx = 0; idx < buff_size / sizeof(int32_t); idx++)
  70       {
  71         os << buff_data_i32[idx] << ", ";
  72       }
  73       break;
  74     }
  75     case circle::TensorType_INT64:
  76     {
  77       const int64_t *buff_data_i64 = reinterpret_cast<const int64_t *>(data_ptr->data());
  78       for (uint32_t idx = 0; idx < buff_size / sizeof(int64_t); idx++)
  79       {
  80         os << buff_data_i64[idx] << ", ";
  81       }
  82       break;
  83     }
  84     case circle::TensorType_FLOAT32:
  85     {
  86       const float *buff_data_f32 = reinterpret_cast<const float *>(data_ptr->data());
  87       for (uint32_t idx = 0; idx < buff_size / sizeof(float); idx++)
  88       {
  89         os << buff_data_f32[idx] << ", ";
  90       }
  91       break;
  92     }
  93     default:
  94       throw std::runtime_error("NYI tensor type : " + std::to_string(type));
  95   }
  96   os << std::endl;
  97 }
  98
  99 } // namespace
 100
 101 namespace circletensordump
 102 {
 103
 104 void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::string &)
 105 {
 106   mio::circle::Reader reader(model);
 107   uint32_t num_subgraph = reader.num_subgraph();
 108   auto buffers = reader.buffers();
 109
 110   for (uint32_t subgraph_idx = 0; subgraph_idx < num_subgraph; subgraph_idx++)
 111   {
 112     reader.select_subgraph(subgraph_idx);
 113
 114     auto tensors = reader.tensors();
 115     for (const auto &tensor : *tensors)
 116     {
 117       const auto tensor_name = tensor->name();
 118       std::string tensor_name_str = tensor_name ? tensor_name->str() : "no_name";
 119       os << std::string(70, '-') << std::endl;
 120       os << "[" << tensor_name_str << "]" << std::endl;
 121       auto buff_idx = tensor->buffer();
 122       auto buff_data_ptr = reader.buffers()->Get(buff_idx)->data();
 123       auto quant_param = tensor->quantization();
 124       std::string print_format = (!buff_data_ptr && !quant_param) ? "└──" : "├──";
 125
 126       // shape
 127       auto shape = tensor->shape();
 128       os << " " + print_format + " shape : (";
 129       ::print_comma_sepearted(os, shape);
 130       os << ")" << std::endl;
 131
 132       // quantization paramters
 133       if (quant_param)
 134       {
 135         std::string print_format1 = buff_data_ptr ? "├──" : "└──";
 136         std::string print_format2 = buff_data_ptr ? "│" : " ";
 137         os << " " + print_format1 + " quantization" << std::endl;
 138         auto min = quant_param->min();
 139         auto max = quant_param->max();
 140         auto scale = quant_param->scale();
 141         auto zero_point = quant_param->zero_point();
 142         auto quantized_dimension = quant_param->quantized_dimension();
 143
 144         os << " " + print_format2 + "   ├── min        : ";
 145         ::print_comma_sepearted(os, min);
 146         os << std::endl;
 147         os << " " + print_format2 + "   ├── max        : ";
 148         ::print_comma_sepearted(os, max);
 149         os << std::endl;
 150         os << " " + print_format2 + "   ├── scale      : ";
 151         ::print_comma_sepearted(os, scale);
 152         os << std::endl;
 153         os << " " + print_format2 + "   ├── zero_point : ";
 154         ::print_comma_sepearted(os, zero_point);
 155         os << std::endl;
 156         os << " " + print_format2 + "   └── quantized_dimension : " << quantized_dimension;
 157         os << std::endl;
 158       }
 159
 160       // buffer
 161       print_buffer(os, buff_idx, buff_data_ptr, tensor->type());
 162       os << std::endl;
 163     }
 164   }
 165 }
 166
 167 } // namespace circletensordump
 168
 169 namespace
 170 {
 171
 172 // HDF5 forbids the inclusion of '/' in the name.
 173 std::string mangle(const std::string &name)
 174 {
 175   std::string ret{name};
 176   std::replace(ret.begin(), ret.end(), '/', '_');
 177   return ret;
 178 }
 179
 180 H5::PredType hdf5_dtype_cast(const circle::TensorType &circle_type)
 181 {
 182   switch (circle_type)
 183   {
 184     case circle::TensorType_UINT8:
 185     {
 186       return H5::PredType::NATIVE_UINT8;
 187     }
 188     case circle::TensorType_INT8:
 189     {
 190       return H5::PredType::NATIVE_INT8;
 191     }
 192     case circle::TensorType_INT16:
 193     {
 194       return H5::PredType::NATIVE_INT16;
 195     }
 196     case circle::TensorType_INT32:
 197     {
 198       return H5::PredType::NATIVE_INT32;
 199     }
 200     case circle::TensorType_INT64:
 201     {
 202       return H5::PredType::NATIVE_INT64;
 203     }
 204     case circle::TensorType_FLOAT32:
 205     {
 206       return H5::PredType::NATIVE_FLOAT;
 207     }
 208     default:
 209       throw std::runtime_error("NYI tensor type : " + std::to_string(circle_type));
 210   }
 211 }
 212
 213 /**
 214  *  In order to create a dataspace, its rank and dimensions are required as hsize_t type.
 215  *  This function converts flatbuffers::Vector<T> to std::vector<hsize_t>.
 216  *
 217  *  If "dims" parameter is passed, the parameter will be converted. However, if
 218  *  not passed(nullptr), data is considered as a rank 1 vector.
 219  */
 220 template <typename T>
 221 std::vector<hsize_t> hdf5_dims_cast(const flatbuffers::Vector<T> *data,
 222                                     const flatbuffers::Vector<int32_t> *dims = nullptr)
 223 {
 224   std::vector<hsize_t> ret;
 225   if (data != nullptr)
 226   {
 227     if (dims == nullptr)
 228     {
 229       ret.resize(1);
 230       ret.at(0) = data->size();
 231     }
 232     else
 233     {
 234       const uint32_t rank = dims->size();
 235       ret.resize(rank);
 236       for (uint32_t d = 0; d < rank; d++)
 237       {
 238         if (dims->Get(d) < 0)
 239           throw std::runtime_error("Dimensions shouldn't be negative");
 240         ret.at(d) = static_cast<hsize_t>(dims->Get(d));
 241       }
 242     }
 243   }
 244   return ret;
 245 }
 246
 247 /**
 248  *  This function writes vector data to given hdf5 file like below.
 249  *
 250  *  GROUP "group_name"
 251  *   ㄴDATATYPE "type"
 252  *   ㄴDATASET "dataset_name"
 253  *   ㄴDATASPACE "dims"
 254  *   ㄴDATA "data"
 255  */
 256 template <typename T>
 257 void write_vector_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
 258                                const H5::PredType &type, const flatbuffers::Vector<T> *data,
 259                                std::vector<hsize_t> dims)
 260 {
 261   if (data == nullptr)
 262     return;
 263   auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
 264   auto dataset = std::make_unique<H5::DataSet>(
 265     file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
 266   dataset->write(data->data(), type);
 267 }
 268
 269 /// @brief This function writes scalar data to given hdf5 file
 270 template <typename T>
 271 void write_scalar_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
 272                                const H5::PredType &type, T data)
 273 {
 274   auto dataspace = std::make_unique<H5::DataSpace>(H5S_SCALAR);
 275   auto dataset = std::make_unique<H5::DataSet>(
 276     file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
 277   dataset->write(&data, type);
 278 }
 279
 280 } // namespace
 281
 282 namespace circletensordump
 283 {
 284
 285 /**
 286  *  HDF5 layout is like below
 287  *
 288  *  GROUP "/"
 289  *   ㄴGROUP "tensor name"
 290  *     ㄴDATASET "weights"    : Shape (x, y, ...), type(uint8, int16)
 291  *     ㄴDATASET "min"        : Shape (n)
 292  *     ㄴDATASET "max"        : Shape (n)
 293  *     ㄴDATASET "scale"      : Shape (m)
 294  *     ㄴDATASET "zero_point" : Shape (m)
 295  *
 296  *  NOTE All Dataset is optional. It means that if tensor doesn't have the data, it won't be created
 297  *  as a Dataset
 298  *
 299  */
 300 void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model,
 301                             const std::string &output_path)
 302 {
 303   // loads a circle model
 304   mio::circle::Reader reader(model);
 305   uint32_t num_subgraph = reader.num_subgraph();
 306
 307   // create a hdf5 file
 308   H5::H5File file{output_path, H5F_ACC_TRUNC};
 309
 310   for (uint32_t subgraph_idx = 0; subgraph_idx < num_subgraph; subgraph_idx++)
 311   {
 312     reader.select_subgraph(subgraph_idx);
 313
 314     auto tensors = reader.tensors();
 315     for (const auto &tensor : *tensors)
 316     {
 317       // If tensor does not have name, do nothing.
 318       const auto tensor_name = tensor->name();
 319       if (tensor_name == nullptr)
 320       {
 321         assert(false && "There is no tensor name");
 322         continue;
 323       }
 324
 325       // create a group for each tensor whose name is its tensor name
 326       std::string group_name = ::mangle(tensor_name->c_str());
 327       std::unique_ptr<H5::Group> tensor_group =
 328         std::make_unique<H5::Group>(file.createGroup(group_name));
 329
 330       // write a buffer data
 331       uint32_t buff_idx = tensor->buffer();
 332       auto buff_data_ptr = reader.buffers()->Get(buff_idx)->data();
 333       if (buff_data_ptr)
 334       {
 335         ::write_vector_data_to_hdf5(file, group_name, "weights", ::hdf5_dtype_cast(tensor->type()),
 336                                     buff_data_ptr,
 337                                     ::hdf5_dims_cast(buff_data_ptr, tensor->shape()));
 338       }
 339
 340       // write quantization parameters
 341       auto quant_param = tensor->quantization();
 342       if (quant_param)
 343       {
 344         auto min = quant_param->min();
 345         ::write_vector_data_to_hdf5(file, group_name, "min", H5::PredType::NATIVE_FLOAT, min,
 346                                     ::hdf5_dims_cast(min));
 347         auto max = quant_param->max();
 348         ::write_vector_data_to_hdf5(file, group_name, "max", H5::PredType::NATIVE_FLOAT, max,
 349                                     ::hdf5_dims_cast(max));
 350         auto scale = quant_param->scale();
 351         ::write_vector_data_to_hdf5(file, group_name, "scale", H5::PredType::NATIVE_FLOAT, scale,
 352                                     ::hdf5_dims_cast(scale));
 353         auto zero_point = quant_param->zero_point();
 354         ::write_vector_data_to_hdf5(file, group_name, "zero_point", H5::PredType::NATIVE_INT64,
 355                                     zero_point, ::hdf5_dims_cast(zero_point));
 356         auto quantized_dimension = quant_param->quantized_dimension();
 357         ::write_scalar_data_to_hdf5(file, group_name, "quantized_dimension",
 358                                     H5::PredType::NATIVE_INT32, quantized_dimension);
 359       }
 360     }
 361   }
 362 }
 363
 364 } // namespace circletensordump