compiler/circle-tensordump/src/Dump.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "Dump.h"
  18 #include "Reader.h"
  19
  20 #include <H5Cpp.h>
  21
  22 #include <memory>
  23 #include <ostream>
  24 #include <string>
  25 #include <vector>
  26
  27 namespace
  28 {
  29
  30 template <typename T>
  31 void print_comma_sepearted(std::ostream &os, const flatbuffers::Vector<T> *vec)
  32 {
  33   if (vec == nullptr)
  34     return;
  35   for (auto iter = vec->begin(); iter != vec->end(); iter++)
  36   {
  37     if (iter != vec->begin())
  38       os << ", ";
  39     os << *iter;
  40   }
  41 }
  42
  43 void print_buffer(std::ostream &os, uint32_t buff_idx, const flatbuffers::Vector<uint8_t> *data_ptr,
  44                   const circle::TensorType &type)
  45 {
  46   if (data_ptr == nullptr)
  47     return;
  48
  49   os << " └── buffer" << std::endl;
  50   os << "     ├── index : " << buff_idx << std::endl;
  51   size_t buff_size = data_ptr->size();
  52   os << "     ├── size  : " << buff_size << std::endl;
  53   os << "     └── data  : ";
  54   switch (type)
  55   {
  56     case circle::TensorType_UINT8:
  57     {
  58       const uint8_t *buff_data_ui8 = reinterpret_cast<const uint8_t *>(data_ptr->data());
  59       for (uint32_t idx = 0; idx < buff_size / sizeof(uint8_t); idx++)
  60       {
  61         os << static_cast<const uint32_t>(buff_data_ui8[idx]) << ", ";
  62       }
  63       break;
  64     }
  65     case circle::TensorType_INT32:
  66     {
  67       const int32_t *buff_data_i32 = reinterpret_cast<const int32_t *>(data_ptr->data());
  68       for (uint32_t idx = 0; idx < buff_size / sizeof(int32_t); idx++)
  69       {
  70         os << buff_data_i32[idx] << ", ";
  71       }
  72       break;
  73     }
  74     case circle::TensorType_INT64:
  75     {
  76       const int64_t *buff_data_i64 = reinterpret_cast<const int64_t *>(data_ptr->data());
  77       for (uint32_t idx = 0; idx < buff_size / sizeof(int64_t); idx++)
  78       {
  79         os << buff_data_i64[idx] << ", ";
  80       }
  81       break;
  82     }
  83     case circle::TensorType_FLOAT32:
  84     {
  85       const float *buff_data_f32 = reinterpret_cast<const float *>(data_ptr->data());
  86       for (uint32_t idx = 0; idx < buff_size / sizeof(float); idx++)
  87       {
  88         os << buff_data_f32[idx] << ", ";
  89       }
  90       break;
  91     }
  92     default:
  93       throw std::runtime_error("NYI tensor type : " + std::to_string(type));
  94   }
  95   os << std::endl;
  96 }
  97
  98 } // namespace
  99
 100 namespace circletensordump
 101 {
 102
 103 void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::string &)
 104 {
 105   circletensordump::Reader reader(model);
 106   uint32_t num_subgraph = reader.num_subgraph();
 107   auto buffers = reader.buffers();
 108
 109   for (uint32_t subgraph_idx = 0; subgraph_idx < num_subgraph; subgraph_idx++)
 110   {
 111     reader.select_subgraph(subgraph_idx);
 112
 113     auto tensors = reader.tensors();
 114     for (const auto &tensor : *tensors)
 115     {
 116       os << std::string(70, '-') << std::endl;
 117       os << "[" << tensor->name()->str() << "]" << std::endl;
 118       auto buff_idx = tensor->buffer();
 119       auto buff_data_ptr = reader.buffers()->Get(buff_idx)->data();
 120       auto quant_param = tensor->quantization();
 121       std::string print_format = (!buff_data_ptr && !quant_param) ? "└──" : "├──";
 122
 123       // shape
 124       auto shape = tensor->shape();
 125       os << " " + print_format + " shape : (";
 126       ::print_comma_sepearted(os, shape);
 127       os << ")" << std::endl;
 128
 129       // quantization paramters
 130       if (quant_param)
 131       {
 132         std::string print_format1 = buff_data_ptr ? "├──" : "└──";
 133         std::string print_format2 = buff_data_ptr ? "│" : " ";
 134         os << " " + print_format1 + " quantization" << std::endl;
 135         auto min = quant_param->min();
 136         auto max = quant_param->max();
 137         auto scale = quant_param->scale();
 138         auto zero_point = quant_param->zero_point();
 139         auto quantized_dimension = quant_param->quantized_dimension();
 140
 141         os << " " + print_format2 + "   ├── min        : ";
 142         ::print_comma_sepearted(os, min);
 143         os << std::endl;
 144         os << " " + print_format2 + "   ├── max        : ";
 145         ::print_comma_sepearted(os, max);
 146         os << std::endl;
 147         os << " " + print_format2 + "   ├── scale      : ";
 148         ::print_comma_sepearted(os, scale);
 149         os << std::endl;
 150         os << " " + print_format2 + "   ├── zero_point : ";
 151         ::print_comma_sepearted(os, zero_point);
 152         os << std::endl;
 153         os << " " + print_format2 + "   └── quantized_dimension : " << quantized_dimension;
 154         os << std::endl;
 155       }
 156
 157       // buffer
 158       print_buffer(os, buff_idx, buff_data_ptr, tensor->type());
 159       os << std::endl;
 160     }
 161   }
 162 }
 163
 164 } // namespace circletensordump
 165
 166 namespace
 167 {
 168
 169 // HDF5 forbids the inclusion of '/' in the name.
 170 std::string mangle(const std::string &name)
 171 {
 172   std::string ret{name};
 173   std::replace(ret.begin(), ret.end(), '/', '_');
 174   return ret;
 175 }
 176
 177 H5::PredType hdf5_dtype_cast(const circle::TensorType &circle_type)
 178 {
 179   switch (circle_type)
 180   {
 181     case circle::TensorType_UINT8:
 182     {
 183       return H5::PredType::NATIVE_UINT8;
 184     }
 185     case circle::TensorType_INT32:
 186     {
 187       return H5::PredType::NATIVE_INT32;
 188     }
 189     case circle::TensorType_INT64:
 190     {
 191       return H5::PredType::NATIVE_INT64;
 192     }
 193     case circle::TensorType_FLOAT32:
 194     {
 195       return H5::PredType::NATIVE_FLOAT;
 196     }
 197     default:
 198       throw std::runtime_error("NYI tensor type : " + std::to_string(circle_type));
 199   }
 200 }
 201
 202 /**
 203  *  In order to create a dataspace, its rank and dimensions are required as hsize_t type.
 204  *  This function converts flatbuffers::Vector<T> to std::vector<hsize_t>.
 205  *
 206  *  If "dims" parameter is passed, the parameter will be converted. However, if
 207  *  not passed(nullptr), data is considered as a rank 1 vector.
 208  */
 209 template <typename T>
 210 std::vector<hsize_t> hdf5_dims_cast(const flatbuffers::Vector<T> *data,
 211                                     const flatbuffers::Vector<int32_t> *dims = nullptr)
 212 {
 213   std::vector<hsize_t> ret;
 214   if (data != nullptr)
 215   {
 216     if (dims == nullptr)
 217     {
 218       ret.resize(1);
 219       ret.at(0) = data->size();
 220     }
 221     else
 222     {
 223       const uint32_t rank = dims->size();
 224       ret.resize(rank);
 225       for (uint32_t d = 0; d < rank; d++)
 226       {
 227         ret.at(d) = dims->Get(d);
 228       }
 229     }
 230   }
 231   return ret;
 232 }
 233
 234 /**
 235  *  This function writes vector data to given hdf5 file like below.
 236  *
 237  *  GROUP "group_name"
 238  *   ㄴDATATYPE "type"
 239  *   ㄴDATASET "dataset_name"
 240  *   ㄴDATASPACE "dims"
 241  *   ㄴDATA "data"
 242  */
 243 template <typename T>
 244 void write_vector_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
 245                                const H5::PredType &type, const flatbuffers::Vector<T> *data,
 246                                std::vector<hsize_t> dims)
 247 {
 248   if (data == nullptr)
 249     return;
 250   auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
 251   auto dataset = std::make_unique<H5::DataSet>(
 252       file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
 253   dataset->write(data->data(), type);
 254 }
 255
 256 /// @brief This function writes scalar data to given hdf5 file
 257 template <typename T>
 258 void write_scalar_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
 259                                const H5::PredType &type, T data)
 260 {
 261   auto dataspace = std::make_unique<H5::DataSpace>(H5S_SCALAR);
 262   auto dataset = std::make_unique<H5::DataSet>(
 263       file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
 264   dataset->write(&data, type);
 265 }
 266
 267 } // namespace
 268
 269 namespace circletensordump
 270 {
 271
 272 /**
 273  *  HDF5 layout is like below
 274  *
 275  *  GROUP "/"
 276  *   ㄴGROUP "tensor name"
 277  *     ㄴDATASET "weights"    : Shape (x, y, ...), type(uint8, int16)
 278  *     ㄴDATASET "min"        : Shape (n)
 279  *     ㄴDATASET "max"        : Shape (n)
 280  *     ㄴDATASET "scale"      : Shape (m)
 281  *     ㄴDATASET "zero_point" : Shape (m)
 282  *
 283  *  NOTE All Dataset is optional. It means that if tensor doesn't have the data, it won't be created
 284  *  as a Dataset
 285  *
 286  */
 287 void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model,
 288                             const std::string &output_path)
 289 {
 290   // loads a circle model
 291   circletensordump::Reader reader(model);
 292   uint32_t num_subgraph = reader.num_subgraph();
 293
 294   // create a hdf5 file
 295   H5::H5File file{output_path, H5F_ACC_TRUNC};
 296
 297   for (uint32_t subgraph_idx = 0; subgraph_idx < num_subgraph; subgraph_idx++)
 298   {
 299     reader.select_subgraph(subgraph_idx);
 300
 301     auto tensors = reader.tensors();
 302     for (const auto &tensor : *tensors)
 303     {
 304       // create a group for each tensor whose name is its tensor name
 305       std::string group_name = ::mangle(tensor->name()->c_str());
 306       std::unique_ptr<H5::Group> tensor_group =
 307           std::make_unique<H5::Group>(file.createGroup(group_name));
 308
 309       // write a buffer data
 310       uint32_t buff_idx = tensor->buffer();
 311       auto buff_data_ptr = reader.buffers()->Get(buff_idx)->data();
 312       if (buff_data_ptr)
 313       {
 314         ::write_vector_data_to_hdf5(file, group_name, "weights", ::hdf5_dtype_cast(tensor->type()),
 315                                     buff_data_ptr,
 316                                     ::hdf5_dims_cast(buff_data_ptr, tensor->shape()));
 317       }
 318
 319       // write quantization parameters
 320       auto quant_param = tensor->quantization();
 321       if (quant_param)
 322       {
 323         auto min = quant_param->min();
 324         ::write_vector_data_to_hdf5(file, group_name, "min", H5::PredType::NATIVE_FLOAT, min,
 325                                     ::hdf5_dims_cast(min));
 326         auto max = quant_param->max();
 327         ::write_vector_data_to_hdf5(file, group_name, "max", H5::PredType::NATIVE_FLOAT, max,
 328                                     ::hdf5_dims_cast(max));
 329         auto scale = quant_param->scale();
 330         ::write_vector_data_to_hdf5(file, group_name, "scale", H5::PredType::NATIVE_FLOAT, scale,
 331                                     ::hdf5_dims_cast(scale));
 332         auto zero_point = quant_param->zero_point();
 333         ::write_vector_data_to_hdf5(file, group_name, "zero_point", H5::PredType::NATIVE_INT64,
 334                                     zero_point, ::hdf5_dims_cast(zero_point));
 335         auto quantized_dimension = quant_param->quantized_dimension();
 336         ::write_scalar_data_to_hdf5(file, group_name, "quantized_dimension",
 337                                     H5::PredType::NATIVE_INT32, quantized_dimension);
 338       }
 339     }
 340   }
 341 }
 342
 343 } // namespace circletensordump