runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "Fp32ToFp16Converter.h"
  18 #include "ir/operation/ConvertFp32ToFp16.h"
  19 #include "ir/operation/ConvertFp16ToFp32.h"
  20 #include "util/logging.h"
  21
  22 #include <Half.h>
  23
  24 using float16 = Half;
  25
  26 namespace
  27 {
  28
  29 const std::string kAclClBackendConfigId = "acl_cl";
  30
  31 void copyDataFromFp32ToFp16(const float *from, float16 *into, size_t num_elements)
  32 {
  33   for (size_t i = 0; i < num_elements; ++i)
  34   {
  35     into[i] = static_cast<float16>(from[i]);
  36   }
  37 }
  38
  39 } // namespace
  40
  41 namespace onert
  42 {
  43
  44 namespace compiler
  45 {
  46
  47 Fp32ToFp16Converter::Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph)
  48     : _lowered_graph{lowered_graph}
  49 {
  50   VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
  51 }
  52
  53 // For example, two OpSequences are there and each OpSequence has an Operation
  54 //
  55 //   OP#0      // model input
  56 //    |
  57 // [OPERATION] // OpSeq#0
  58 //    |
  59 //   OP#1
  60 //    |
  61 // [OPERATION] // OpSeq#1
  62 //    |
  63 //   OP#2      // model output
  64 //
  65 //
  66 // AFTER `appendOpSequences()`,
  67 // note that model_input and model_output are not changed.
  68 //
  69 //   OP#0
  70 //    |
  71 // [FP32TO16]  // OpSeq#2
  72 //    |
  73 //   OP#3
  74 //    |
  75 // [OPERATION] // OpSeq#0
  76 //    |
  77 //   OP#4
  78 //    |
  79 // [FP16TO32]  // OpSeq#3
  80 //    |
  81 //   OP#1
  82 //    |
  83 // [FP32TO16]  // OpSeq#4
  84 //    |
  85 //   OP#5
  86 //    |
  87 // [OPERATION] // OpSeq#1
  88 //    |
  89 //   OP#6
  90 //    |
  91 // [FP16TO32]  // OpSeq#5
  92 //    |
  93 //   OP#2
  94 //
  95 //
  96 // AFTER `optimize()`,
  97 //
  98 //   OP#0
  99 //    |
 100 // [FP32TO16]  // OpSeq#2
 101 //    |
 102 //   OP#3
 103 //    |
 104 // [OPERATION] // OpSeq#0
 105 //    |
 106 //   OP#4
 107 //    |
 108 // [OPERATION] // OpSeq#1
 109 //    |
 110 //   OP#6
 111 //    |
 112 // [FP16TO32]  // OpSeq#5
 113 //    |
 114 //   OP#2
 115 //
 116 //
 117 // AFTER `convertOperands()`,
 118 //
 119 //   OP#0      // model_input, not fp16
 120 //    |
 121 // [FP32TO16]  // OpSeq#2
 122 //    |
 123 //   OP#3      // fp16
 124 //    |
 125 // [OPERATION] // OpSeq#0
 126 //    |
 127 //   OP#4      // fp16
 128 //    |
 129 // [OPERATION] // OpSeq#1
 130 //    |
 131 //   OP#6      // fp16
 132 //    |
 133 // [FP16TO32]  // OpSeq#5
 134 //    |
 135 //   OP#2      // model_output, notfp16
 136 //
 137 //
 138 // AFTER `convertDatas()`,
 139 //
 140 //   OP#0      // model_input, not fp16
 141 //    |
 142 // [FP32TO16]  // OpSeq#2
 143 //    |
 144 //   OP#3      // fp16
 145 //    |
 146 // [OPERATION] // OpSeq#0, constants are fp16
 147 //    |
 148 //   OP#4      // fp16
 149 //    |
 150 // [OPERATION] // OpSeq#1, constants are fp16
 151 //    |
 152 //   OP#6      // fp16
 153 //    |
 154 // [FP16TO32]  // OpSeq#5
 155 //    |
 156 //   OP#2      // model_output, notfp16
 157 //
 158 void Fp32ToFp16Converter::run()
 159 {
 160   // Append new OpSequence which includes ConvertFp32ToFp16
 161   //   and append new OpSequence which includes ConvertFp16ToFp32
 162   appendOpSequences();
 163
 164   // Remove unnecessary converting operations
 165   optimize();
 166
 167   // Convert operands' data types from fp32 to fp16
 168   convertOperands();
 169
 170   // Convert Datas
 171   convertDatas();
 172
 173   // Print the result
 174   printOpSequences("FINAL OpSequences");
 175 }
 176
 177 void Fp32ToFp16Converter::appendOpSequences()
 178 {
 179   _lowered_graph.op_seqs().iterate(
 180       [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
 181         const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
 182         assert(lower_info != nullptr);
 183
 184         // For now, the only acl_cl supports fully fp16 type
 185         // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
 186         // operations.
 187         //      To do this, we could check the support by `operation by operation`. After that, we
 188         //      would partition an op_seq if it contains unsupported operations.
 189         if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
 190           return;
 191
 192         // OpSeq's input set should be included in the first operation's input set or
 193         // OpSeq's output set should be included in the last operation's output set
 194         assert(checkOperandsOfOpSequence(op_seq));
 195
 196         // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
 197         appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
 198         appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
 199       });
 200 }
 201
 202 //
 203 // BEFORE
 204 //
 205 //   OP#0      // model input
 206 //    |
 207 // [OPERATION] // OpSeq#0
 208 //    |
 209 //   OP#1      // model output
 210 //
 211 //
 212 // AFTER
 213 //
 214 //   OP#0      // model input
 215 //    |
 216 // [FP32TO16]  // OpSeq#1
 217 //    |
 218 //   OP#2
 219 //    |
 220 // [OPERATION] // OpSeq#0
 221 //    |
 222 //   OP#1      // model output
 223 //
 224 void Fp32ToFp16Converter::appendNewOpSeqForConvertFp32ToFp16(const ir::OpSequenceIndex &op_seq_ind,
 225                                                              ir::OpSequence &op_seq)
 226 {
 227   // OpSeq's input set is included in the first operation's input set
 228   const ir::OperandIndexSequence op_seq_inputs = op_seq.getInputs(); // copied
 229
 230   // NOTE Please do not change sequence of op_seq_inputs. It can change the sequence of inputs of
 231   // Subgraph
 232   for (const auto &op_seq_input_ind :
 233        op_seq_inputs | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
 234   {
 235     if (checkOperandType(op_seq_input_ind) == false)
 236       continue;
 237
 238     // new operand w/ datatype fp32
 239     const auto new_op_ind = newCopiedOperand(op_seq_input_ind);
 240
 241     // set new lower_info for operand
 242     setNewOperandLowerInfo(op_seq_ind, new_op_ind);
 243
 244     // manipulate input of operation and op_seq
 245     // - replace the first operation's input to new operand
 246     //   with old operand's removeUse and new operand's appendUse()
 247     manipulateInput(op_seq_ind, op_seq_input_ind, new_op_ind);
 248
 249     // new op
 250     const auto new_node_ind = newOperationConvertFp32ToFp16(op_seq_input_ind, new_op_ind);
 251
 252     // new op_seq
 253     const auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
 254
 255     // set new lower_info for op_seq
 256     setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
 257
 258     _list_fp32_to_fp16.insert(new_op_seq_ind);
 259
 260     VERBOSE(Fp32ToFp16Converter) << "NEW   |Fp32To16]"
 261                                  << ir::getStrFromOpSeq(_lowered_graph.op_seqs().at(new_op_seq_ind),
 262                                                         _lowered_graph.graph().operations())
 263                                  << std::endl;
 264   }
 265 }
 266
 267 //
 268 // BEFORE
 269 //
 270 //   OP#0      // model input
 271 //    |
 272 // [FP32TO16]  // OpSeq#1
 273 //    |
 274 //   OP#2
 275 //    |
 276 // [OPERATION] // OpSeq#0
 277 //    |
 278 //   OP#1      // model output
 279 //
 280 //
 281 // AFTER
 282 //
 283 //   OP#0      // model input
 284 //    |
 285 // [FP32TO16]  // OpSeq#1
 286 //    |
 287 //   OP#2
 288 //    |
 289 // [OPERATION] // OpSeq#0
 290 //    |
 291 //   OP#3
 292 //    |
 293 // [FP16TO32]  // OpSeq#2
 294 //    |
 295 //   OP#1      // model output
 296 //
 297 void Fp32ToFp16Converter::appendNewOpSeqForConvertFp16ToFp32(const ir::OpSequenceIndex &op_seq_ind,
 298                                                              ir::OpSequence &op_seq)
 299 {
 300   // OpSeq's output set is included in the last operation's output set
 301   const ir::OperandIndexSequence op_seq_outputs = op_seq.getOutputs(); // copied
 302
 303   // NOTE Please do not change sequence of op_seq_outputs. It can change the sequence of outputs of
 304   // Subgraph
 305   for (const auto &op_seq_output_ind :
 306        op_seq_outputs | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
 307   {
 308     if (checkOperandType(op_seq_output_ind) == false)
 309       continue;
 310
 311     // new operand w/ datatype fp32
 312     const auto new_op_ind = newCopiedOperand(op_seq_output_ind);
 313
 314     // set new lower_info for operand
 315     setNewOperandLowerInfo(op_seq_ind, new_op_ind);
 316
 317     // manipulate output of operation and op_seq
 318     // - replace output of the last operation's output to new operand
 319     //    with old operand's removeDef and new operand's appendDef()
 320     manipulateOutput(op_seq_ind, op_seq_output_ind, new_op_ind);
 321
 322     // new op
 323     auto new_node_ind = newOperationConvertFp16ToFp32(op_seq_output_ind, new_op_ind);
 324
 325     // new op_seq
 326     auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
 327
 328     // set new lower_info for op_seq
 329     setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
 330
 331     _list_fp16_to_fp32.insert(new_op_seq_ind);
 332
 333     VERBOSE(Fp32ToFp16Converter) << "NEW   |Fp16To32]"
 334                                  << ir::getStrFromOpSeq(_lowered_graph.op_seqs().at(new_op_seq_ind),
 335                                                         _lowered_graph.graph().operations())
 336                                  << std::endl;
 337   }
 338 }
 339
 340 void Fp32ToFp16Converter::optimize()
 341 {
 342   printOpSequences("BEFORE opt");
 343
 344   removeContiguousConvertOpSequences();
 345
 346   printOpSequences("AFTER removeContiguousConverts");
 347
 348   // TODO Handle Split from the beginning of the model. ex) MODELS/inception_module
 349   //
 350   // BEFORE)
 351   //
 352   //   OP#0---------------------.         // model_input
 353   //    |                       |
 354   // [FP32TO16]  // OpSeq#0   [FP32TO16]  // OpSeq#1
 355   //    |                       |
 356   //   OP#1                    OP#2
 357   //    |                       |
 358   // [OPERATION] // OpSeq#2   [OPERATION] // OpSeq#3
 359   //
 360   //
 361   // AFTER)
 362   //
 363   //   OP#0      // model_input
 364   //    |
 365   // [FP32TO16]  // OpSeq#4
 366   //    |
 367   //   OP#3---------------------------.
 368   //    |                             |
 369   // [OPERATION] // OpSeq#2   [OPERATION] // OpSeq#3
 370 }
 371
 372 void Fp32ToFp16Converter::convertOperands()
 373 {
 374   _lowered_graph.op_seqs().iterate(
 375       [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
 376         const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
 377         assert(lower_info != nullptr);
 378         // For now, the only acl_cl supports fully fp16
 379         if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
 380           return;
 381
 382         // Convert input,output operands' type to fp16
 383         convertOperandsOfOpSequence(op_seq);
 384       });
 385 }
 386
 387 void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
 388 {
 389   auto &operands = _lowered_graph.graph().operands();
 390   const auto &operations = _lowered_graph.graph().operations();
 391   const auto &op_seq_inputs = _lowered_graph.graph().getInputs();
 392   const auto &op_seq_outputs = _lowered_graph.graph().getOutputs();
 393
 394   for (auto &op_idx : op_seq)
 395   {
 396     const auto &node = operations.at(op_idx);
 397     for (auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
 398     {
 399       if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind))
 400         continue;
 401
 402       auto &obj = operands.at(ind);
 403       if (obj.isConstant() || obj.typeInfo().type() != ir::DataType::FLOAT32)
 404         continue;
 405
 406       obj.type(ir::DataType::FLOAT16);
 407
 408       VERBOSE(Fp32ToFp16Converter) << "Input Operand #" << ind.value() << ": fp16" << std::endl;
 409     }
 410
 411     for (auto &ind : node.getOutputs())
 412     {
 413       if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind))
 414         continue;
 415
 416       auto &obj = operands.at(ind);
 417       if (obj.isConstant() || obj.typeInfo().type() != ir::DataType::FLOAT32)
 418         continue;
 419
 420       obj.type(ir::DataType::FLOAT16);
 421
 422       VERBOSE(Fp32ToFp16Converter) << "Output Operand #" << ind.value() << ": fp16" << std::endl;
 423     }
 424   }
 425 }
 426
 427 void Fp32ToFp16Converter::convertDatas()
 428 {
 429   _lowered_graph.graph().operands().iterate([&](const ir::OperandIndex &ind, ir::Operand &obj) {
 430     const auto type = obj.typeInfo().type();
 431     if (type == ir::DataType::FLOAT32 && obj.isConstant())
 432     {
 433       auto data = obj.data();
 434       assert(data != nullptr);
 435
 436       size_t num_elements = obj.operandSize() / ir::sizeOfDataType(type);
 437       size_t new_ptr_size = num_elements * sizeof(float16);
 438       auto new_ptr = std::make_unique<uint8_t[]>(new_ptr_size);
 439       copyDataFromFp32ToFp16(reinterpret_cast<const float *>(data->base()),
 440                              reinterpret_cast<float16 *>(new_ptr.get()), num_elements);
 441       obj.releaseData();
 442
 443       auto new_data = std::make_unique<ir::CachedData>(new_ptr.get(), new_ptr_size);
 444
 445       obj.data(std::move(new_data));
 446       obj.type(ir::DataType::FLOAT16);
 447       VERBOSE(Fp32ToFp16Converter) << "Constant Operand #" << ind.value() << ": fp16" << std::endl;
 448     }
 449   });
 450 }
 451
 452 void Fp32ToFp16Converter::printOpSequences(const std::string &pre_msg, const std::string &post_msg)
 453 {
 454   if (pre_msg.empty() == false)
 455   {
 456     VERBOSE(Fp32ToFp16Converter) << pre_msg << std::endl;
 457   }
 458
 459   _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, const ir::OpSequence &op_seq) {
 460     VERBOSE(Fp32ToFp16Converter) << ir::getStrFromOpSeq(op_seq, _lowered_graph.graph().operations())
 461                                  << std::endl;
 462   });
 463
 464   if (post_msg.empty() == false)
 465   {
 466     VERBOSE(Fp32ToFp16Converter) << post_msg << std::endl;
 467   }
 468 }
 469
 470 bool Fp32ToFp16Converter::checkOperandType(const ir::OperandIndex &op_ind) const
 471 {
 472   const auto &operands = _lowered_graph.graph().operands();
 473   const auto &obj = operands.at(op_ind);
 474   return (obj.isConstant() == false && obj.typeInfo().type() == ir::DataType::FLOAT32);
 475 }
 476
 477 bool Fp32ToFp16Converter::checkOperandsOfOpSequence(const ir::OpSequence &op_seq) const
 478 {
 479   const auto &operations = _lowered_graph.graph().operations();
 480
 481   // the first node's input
 482   const auto &first_node_ind = op_seq.operations().at(0);
 483   const auto &first_node = operations.at(first_node_ind);
 484   const auto &first_node_inputs = first_node.getInputs();
 485   for (const auto &op_seq_input_ind : op_seq.getInputs() | ir::Remove::UNDEFINED)
 486   {
 487     if (first_node_inputs.contains(op_seq_input_ind) == false)
 488       return false;
 489   }
 490
 491   // the last node's output
 492   size_t last_ind = op_seq.size() - 1;
 493   const auto &last_node_ind = op_seq.operations().at(last_ind);
 494   const auto &last_node = operations.at(last_node_ind);
 495   const auto &last_node_outputs = last_node.getOutputs();
 496   for (const auto &op_seq_output_ind : op_seq.getOutputs())
 497   {
 498     if (last_node_outputs.contains(op_seq_output_ind) == false)
 499       return false;
 500   }
 501
 502   return true;
 503 }
 504
 505 ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &op_ind)
 506 {
 507   auto &operands = _lowered_graph.graph().operands();
 508   const auto &obj = operands.at(op_ind);
 509   auto new_op_ind = operands.emplace(obj.shape(), obj.typeInfo());
 510   return new_op_ind;
 511 }
 512
 513 void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
 514                                                  const ir::OperandIndex &new_op_ind)
 515 {
 516   const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
 517   assert(lower_info != nullptr);
 518   auto new_lower_info = std::make_unique<ir::operand::LowerInfo>();
 519   auto permute_factor = ir::operand::PermuteFactor(lower_info->backend(), lower_info->layout());
 520   new_lower_info->addDefPermuteFactor(permute_factor);
 521   new_lower_info->addUsePermuteFactor(permute_factor);
 522   _lowered_graph.setLowerInfo(new_op_ind, std::move(new_lower_info));
 523 }
 524
 525 void Fp32ToFp16Converter::setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
 526                                                     const ir::OpSequenceIndex &new_op_seq_ind)
 527 {
 528   const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
 529   assert(lower_info != nullptr);
 530
 531   auto new_lower_info =
 532       std::make_unique<ir::operation::LowerInfo>(lower_info->backend(), lower_info->layout());
 533   _lowered_graph.setLowerInfo(new_op_seq_ind, std::move(new_lower_info));
 534 }
 535
 536 void Fp32ToFp16Converter::manipulateInput(const ir::OpSequenceIndex &op_seq_ind,
 537                                           const ir::OperandIndex &op_seq_input_ind,
 538                                           const ir::OperandIndex &new_op_ind)
 539 {
 540   auto &operands = _lowered_graph.graph().operands();
 541   auto &operations = _lowered_graph.graph().operations();
 542
 543   auto &op_seq = _lowered_graph.op_seqs().at(op_seq_ind);
 544
 545   auto &first_node_ind = op_seq.operations().at(0);
 546   auto &first_node = operations.at(first_node_ind);
 547   assert(first_node.getInputs().contains(op_seq_input_ind));
 548
 549   auto &input_obj = operands.at(op_seq_input_ind);
 550   assert(input_obj.isConstant() == false);
 551
 552   auto &new_op_obj = operands.at(new_op_ind);
 553
 554   // The same inputs having the index as op_seq_input_ind are replaced all at once
 555   op_seq.replaceInputs(op_seq_input_ind, new_op_ind);
 556   first_node.replaceInputs(op_seq_input_ind, new_op_ind);
 557
 558   // op_seq_obj doesn't have uses/def
 559   input_obj.removeUse(first_node_ind);
 560   new_op_obj.insertUse(first_node_ind);
 561 }
 562
 563 void Fp32ToFp16Converter::manipulateOutput(const ir::OpSequenceIndex &op_seq_ind,
 564                                            const ir::OperandIndex &op_seq_output_ind,
 565                                            const ir::OperandIndex &new_op_ind)
 566 {
 567   auto &operands = _lowered_graph.graph().operands();
 568   auto &operations = _lowered_graph.graph().operations();
 569
 570   auto &op_seq = _lowered_graph.op_seqs().at(op_seq_ind);
 571
 572   size_t last_ind = op_seq.size() - 1;
 573   auto &last_node_ind = op_seq.operations().at(last_ind);
 574   auto &last_node = operations.at(last_node_ind);
 575   assert(last_node.getOutputs().contains(op_seq_output_ind));
 576
 577   auto &output_obj = operands.at(op_seq_output_ind);
 578   assert(output_obj.isConstant() == false);
 579
 580   auto &new_op_obj = operands.at(new_op_ind);
 581
 582   // The same outputs having the index as op_seq_output_ind are replaced all at once
 583   op_seq.replaceOutputs(op_seq_output_ind, new_op_ind);
 584   last_node.replaceOutputs(op_seq_output_ind, new_op_ind);
 585
 586   // op_seq_obj doesn't have uses/def
 587   output_obj.removeDef(last_node_ind);
 588   new_op_obj.insertDef(last_node_ind);
 589 }
 590
 591 ir::OperationIndex
 592 Fp32ToFp16Converter::newOperationConvertFp32ToFp16(const ir::OperandIndex &op_seq_input_ind,
 593                                                    const ir::OperandIndex &new_op_ind)
 594 {
 595   auto &operands = _lowered_graph.graph().operands();
 596   auto &operations = _lowered_graph.graph().operations();
 597
 598   auto &input_obj = operands.at(op_seq_input_ind);
 599   auto &new_op_obj = operands.at(new_op_ind);
 600
 601   std::unique_ptr<ir::Operation> new_node(
 602       new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
 603   const auto new_node_ind = operations.push(std::move(new_node));
 604
 605   input_obj.insertUse(new_node_ind);
 606   new_op_obj.insertDef(new_node_ind);
 607
 608   return new_node_ind;
 609 }
 610
 611 ir::OperationIndex
 612 Fp32ToFp16Converter::newOperationConvertFp16ToFp32(const ir::OperandIndex &op_seq_output_ind,
 613                                                    const ir::OperandIndex &new_op_ind)
 614 {
 615   auto &operands = _lowered_graph.graph().operands();
 616   auto &operations = _lowered_graph.graph().operations();
 617
 618   auto &output_obj = operands.at(op_seq_output_ind);
 619   auto &new_op_obj = operands.at(new_op_ind);
 620
 621   std::unique_ptr<ir::Operation> new_node(
 622       new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
 623   const auto new_node_ind = operations.push(std::move(new_node));
 624
 625   new_op_obj.insertUse(new_node_ind);
 626   output_obj.insertDef(new_node_ind);
 627
 628   return new_node_ind;
 629 }
 630
 631 ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex &op_seq_ind,
 632                                                        const ir::OperationIndex &node_index)
 633 {
 634   auto &node = _lowered_graph.graph().operations().at(node_index);
 635   const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
 636   assert(lower_info != nullptr);
 637   auto layout = lower_info->layout();
 638
 639   auto op_seq = std::make_unique<ir::OpSequence>(layout);
 640   op_seq->appendOperation(node_index);
 641   op_seq->setOutputs(node.getOutputs());
 642   op_seq->setInputs(node.getInputs());
 643
 644   return _lowered_graph.op_seqs().emplace(std::move(op_seq));
 645 }
 646
 647 // The op_seq(Fp16To32)'s output operand is the next to op_seq (Fp32To16)?
 648 // If so, connect Fp16To32's previous OpSeq to Fp32To16's next OpSeq
 649 //
 650 // Assume that an OpSequence has an operation for easy explaination
 651 //
 652 // BEFORE)
 653 //
 654 // [OPERATION] // OpSeq#0
 655 //    |
 656 //   OP#0
 657 //    |
 658 // [FP16TO32]  // OpSeq#1
 659 //    |
 660 //   OP#1
 661 //    |
 662 // [FP32TO16]  // OpSeq#2
 663 //    |
 664 //   OP#2
 665 //    |
 666 // [OPERATION] // OpSeq#3
 667 //
 668 //
 669 // AFTER)
 670 //
 671 // [OPERATION] // OpSeq#0
 672 //    |
 673 //   OP#0
 674 //    |
 675 // [OPERATION] // OpSeq#3
 676 //
 677 void Fp32ToFp16Converter::removeContiguousConvertOpSequences()
 678 {
 679   // Prepare InputToOpSeqs map
 680   const auto input_to_op_seqs = prepareInputToOpSeqs();
 681
 682   // Find OpSequences to delete while manipulating input of OpSeq.
 683   auto opseq_map_to_delete = findOpSequencesContiguous(input_to_op_seqs);
 684
 685   // Find Operations to delete
 686   auto list_to_delete_op_seqs = getListOpSequences(opseq_map_to_delete);
 687   auto list_to_delete_ops = findOperationsToDelete(list_to_delete_op_seqs);
 688
 689   // Before deleting, manipulateInputs of OpSeq & Operation
 690   manipulateContiguousOpSequences(input_to_op_seqs, opseq_map_to_delete);
 691
 692   // Delete OpSequences & Operations & obj's use/def & operands
 693   deleteContiguousOpSequences(list_to_delete_op_seqs, list_to_delete_ops);
 694 }
 695
 696 Fp32ToFp16Converter::OpSeqIndexToOpSeqIndexList
 697 Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_seqs) const
 698 {
 699   const auto &op_seqs = _lowered_graph.op_seqs();
 700   OpSeqIndexToOpSeqIndexList opseq_map_to_delete;
 701
 702   //
 703   // Assume that an Operation an OpSequence for easy explaination
 704   //
 705   // [OPERATION]
 706   //    |
 707   //   OP#0
 708   //    |
 709   // [FP16TO32]  // op_seq_ind_fp16_to_fp32 & op_seq_fp16_to_fp32
 710   //    |
 711   //   OP#1      // output_ind_fp16_fp32
 712   //    |
 713   // [FP32TO16]  // op_seq_ind
 714   //    |
 715   //   OP#2
 716   //    |
 717   // [OPERATION]
 718   //
 719   for (auto it = _list_fp16_to_fp32.cbegin(); it != _list_fp16_to_fp32.cend(); ++it)
 720   {
 721     // fp16_to_fp32's input/output num is always 1
 722     auto &op_seq_ind_fp16_to_fp32 = *it;
 723     auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
 724     assert(op_seq_fp16_to_fp32.size() == 1);
 725     assert(op_seq_fp16_to_fp32.getInputs().size() == 1);
 726
 727     auto &output_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getOutputs().at(0);
 728     auto found_input_in_op_seqs = input_to_op_seqs.find(output_ind_fp16_to_fp32);
 729     if (found_input_in_op_seqs == input_to_op_seqs.end())
 730     {
 731       continue;
 732     }
 733
 734     // DO NOT FORGET THE CASE
 735     //
 736     //    |
 737     // [FP16TO32]
 738     //    |
 739     //   OP#0---------------------.
 740     //    |                       |
 741     // [FP32TO16]              [FP32TO16]
 742     //    |                       |
 743     //   OP#1                    OP#2
 744     //    |                       |
 745     // [OPERATION]             [OPERATION]
 746     //
 747     for (auto &op_seq_ind : found_input_in_op_seqs->second)
 748     {
 749       auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind);
 750       if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end())
 751       {
 752         if (opseq_map_to_delete.find(op_seq_ind_fp16_to_fp32) == opseq_map_to_delete.end())
 753         {
 754           opseq_map_to_delete[op_seq_ind_fp16_to_fp32].emplace(op_seq_ind);
 755         }
 756         else
 757         {
 758           opseq_map_to_delete[op_seq_ind_fp16_to_fp32].insert(op_seq_ind);
 759         }
 760
 761         VERBOSE(Fp32ToFp16Converter)
 762             << "Contiguous from OpSeq#" << op_seq_ind_fp16_to_fp32.value() << "(ToFp32)"
 763             << " to OpSeq#" << op_seq_ind.value() << "(ToFp16)" << std::endl;
 764       }
 765     }
 766   }
 767
 768   return opseq_map_to_delete;
 769 }
 770
 771 Fp32ToFp16Converter::InputToOpSeqs Fp32ToFp16Converter::prepareInputToOpSeqs() const
 772 {
 773   const auto &op_seqs = _lowered_graph.op_seqs();
 774
 775   InputToOpSeqs input_to_op_seqs;
 776   op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_idx, const ir::OpSequence &op_seq) {
 777     for (auto input : op_seq.getInputs() | ir::Remove::UNDEFINED)
 778     {
 779       auto it = input_to_op_seqs.find(input);
 780       if (it == input_to_op_seqs.end())
 781       {
 782         input_to_op_seqs[input].emplace(op_seq_idx);
 783       }
 784       else
 785       {
 786         input_to_op_seqs[input].insert(op_seq_idx);
 787       }
 788     }
 789   });
 790
 791   return input_to_op_seqs;
 792 }
 793
 794 Fp32ToFp16Converter::OpSeqIndexList
 795 Fp32ToFp16Converter::getListOpSequences(const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete) const
 796 {
 797   OpSeqIndexList list;
 798   for (const auto &it : opseq_map_to_delete)
 799   {
 800     auto &opseq_ind_fp16_to_fp32 = it.first;
 801     if (list.find(opseq_ind_fp16_to_fp32) == list.end())
 802     {
 803       list.emplace(opseq_ind_fp16_to_fp32);
 804     }
 805
 806     for (auto &opseq_ind_fp32_to_fp16 : it.second)
 807     {
 808       if (list.find(opseq_ind_fp32_to_fp16) == list.end())
 809       {
 810         list.emplace(opseq_ind_fp32_to_fp16);
 811       }
 812     }
 813   }
 814   return list;
 815 }
 816
 817 ir::OperandIndexSequence
 818 Fp32ToFp16Converter::findOperationsToDelete(const OpSeqIndexList &list_to_delete_op_seqs) const
 819 {
 820   const auto &operations = _lowered_graph.graph().operations();
 821   const auto &op_seqs = _lowered_graph.op_seqs();
 822
 823   ir::OperandIndexSequence list_to_delete_ops;
 824   for (const auto &op_seq_ind : list_to_delete_op_seqs)
 825   {
 826     const auto &op_seq = op_seqs.at(op_seq_ind);
 827     assert(op_seq.size() == 1);
 828
 829     const auto &first_node_ind = op_seq.operations().at(0);
 830     const auto &first_node = operations.at(first_node_ind);
 831     assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
 832            first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
 833
 834     for (const auto &ind : first_node.getOutputs())
 835     {
 836       list_to_delete_ops.append(ind);
 837     }
 838   }
 839
 840   return list_to_delete_ops;
 841 }
 842
 843 void Fp32ToFp16Converter::manipulateContiguousOpSequences(
 844     const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
 845 {
 846   auto &op_seqs = _lowered_graph.op_seqs();
 847
 848   //
 849   // [OPERATION]
 850   //    |
 851   //   OP#0      // input_ind_fp16_to_fp32
 852   //    |
 853   // [FP16TO32]  // op_seq_ind_fp16_to_fp32 & op_seq_fp16_to_fp32
 854   //    |
 855   //   OP#1
 856   //    |
 857   // [FP32TO16]  // op_seq_ind_fp32_to_fp16, op_seq_fp32_to_fp16
 858   //    |
 859   //   OP#2      // output_ind_fp32_to_fp16
 860   //    |
 861   // [OPERATION] // op_seq_ind_next_to_fp16
 862   //
 863   for (auto it : opseq_map_to_delete)
 864   {
 865     // fp16_to_fp32's input/output num is always 1
 866     auto &op_seq_ind_fp16_to_fp32 = it.first;
 867     auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
 868     auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0);
 869
 870     for (auto &op_seq_ind_fp32_to_fp16 : it.second)
 871     {
 872       auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16);
 873       assert(op_seq_fp32_to_fp16.size() == 1);
 874       assert(op_seq_fp32_to_fp16.getInputs().size() == 1);
 875
 876       auto &output_ind_fp32_to_fp16 = op_seq_fp32_to_fp16.getOutputs().at(0);
 877       auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16);
 878       assert(found_next_to_fp16 != input_to_op_seqs.end());
 879
 880       for (auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
 881       {
 882         manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32);
 883       }
 884       //
 885       // [OPERATION]
 886       //    |
 887       //   OP#0      // input_ind_fp16_to_fp32
 888       //    |
 889       // [OPERATION] // op_seq_ind_next_to_fp16
 890       //
 891     }
 892   }
 893 }
 894
 895 void Fp32ToFp16Converter::deleteContiguousOpSequences(
 896     const OpSeqIndexList &list_to_delete_op_seqs,
 897     const ir::OperandIndexSequence &list_to_delete_ops)
 898 {
 899   auto &operands = _lowered_graph.graph().operands();
 900   auto &operations = _lowered_graph.graph().operations();
 901   auto &op_seqs = _lowered_graph.op_seqs();
 902
 903   for (auto &op_seq_ind : list_to_delete_op_seqs)
 904   {
 905     auto &op_seq = op_seqs.at(op_seq_ind);
 906     assert(op_seq.size() == 1);
 907     VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq #" << op_seq_ind.value() << std::endl;
 908
 909     auto &first_node_ind = op_seq.operations().at(0);
 910     auto &first_node = operations.at(first_node_ind);
 911     assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
 912            first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
 913     VERBOSE(Fp32ToFp16Converter) << "Delete Node #" << first_node_ind.value() << std::endl;
 914
 915     // Uses
 916     for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
 917     {
 918       auto &obj = operands.at(ind);
 919       obj.removeUse(first_node_ind);
 920       VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Use(Node#"
 921                                    << first_node_ind.value() << ") is removed" << std::endl;
 922     }
 923
 924     // Def
 925     for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
 926     {
 927       auto &obj = operands.at(ind);
 928       obj.removeDef(first_node_ind);
 929       VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#"
 930                                    << first_node_ind.value() << ") is removed" << std::endl;
 931     }
 932
 933     // Operation
 934     operations.remove(first_node_ind);
 935     VERBOSE(Fp32ToFp16Converter) << "Node#" << first_node_ind.value() << " is removed" << std::endl;
 936
 937     // OpSequence
 938     op_seqs.remove(op_seq_ind);
 939     VERBOSE(Fp32ToFp16Converter) << "OpSeq#" << op_seq_ind.value() << " is removed" << std::endl;
 940   }
 941
 942   // Operand
 943   for (auto &ind : list_to_delete_ops)
 944   {
 945     operands.remove(ind);
 946     VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << " is removed" << std::endl;
 947   }
 948 }
 949
 950 } // namespace compiler
 951
 952 } // namespace onert