2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "Fp32ToFp16Converter.h"
18 #include "ir/operation/ConvertFp32ToFp16.h"
19 #include "ir/operation/ConvertFp16ToFp32.h"
20 #include "util/logging.h"
29 const std::string kAclClBackendConfigId = "acl_cl";
31 void copyDataFromFp32ToFp16(const float *from, float16 *into, size_t num_elements)
33 for (size_t i = 0; i < num_elements; ++i)
35 into[i] = static_cast<float16>(from[i]);
47 Fp32ToFp16Converter::Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph)
48 : _lowered_graph{lowered_graph}
50 VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
53 // For example, two OpSequences are there and each OpSequence has an Operation
55 // OP#0 // model input
57 // [OPERATION] // OpSeq#0
61 // [OPERATION] // OpSeq#1
63 // OP#2 // model output
66 // AFTER `appendOpSequences()`,
67 // note that model_input and model_output are not changed.
71 // [FP32TO16] // OpSeq#2
75 // [OPERATION] // OpSeq#0
79 // [FP16TO32] // OpSeq#3
83 // [FP32TO16] // OpSeq#4
87 // [OPERATION] // OpSeq#1
91 // [FP16TO32] // OpSeq#5
96 // AFTER `optimize()`,
100 // [FP32TO16] // OpSeq#2
104 // [OPERATION] // OpSeq#0
108 // [OPERATION] // OpSeq#1
112 // [FP16TO32] // OpSeq#5
117 // AFTER `convertOperands()`,
119 // OP#0 // model_input, not fp16
121 // [FP32TO16] // OpSeq#2
125 // [OPERATION] // OpSeq#0
129 // [OPERATION] // OpSeq#1
133 // [FP16TO32] // OpSeq#5
135 // OP#2 // model_output, notfp16
138 // AFTER `convertDatas()`,
140 // OP#0 // model_input, not fp16
142 // [FP32TO16] // OpSeq#2
146 // [OPERATION] // OpSeq#0, constants are fp16
150 // [OPERATION] // OpSeq#1, constants are fp16
154 // [FP16TO32] // OpSeq#5
156 // OP#2 // model_output, notfp16
158 void Fp32ToFp16Converter::run()
160 // Append new OpSequence which includes ConvertFp32ToFp16
161 // and append new OpSequence which includes ConvertFp16ToFp32
164 // Remove unnecessary converting operations
167 // Convert operands' data types from fp32 to fp16
174 printOpSequences("FINAL OpSequences");
177 void Fp32ToFp16Converter::appendOpSequences()
179 _lowered_graph.op_seqs().iterate(
180 [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
181 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
182 assert(lower_info != nullptr);
184 // For now, the only acl_cl supports fully fp16 type
185 // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
187 // To do this, we could check the support by `operation by operation`. After that, we
188 // would partition an op_seq if it contains unsupported operations.
189 if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
192 // OpSeq's input set should be included in the first operation's input set or
193 // OpSeq's output set should be included in the last operation's output set
194 assert(checkOperandsOfOpSequence(op_seq));
196 // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
197 appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
198 appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
205 // OP#0 // model input
207 // [OPERATION] // OpSeq#0
209 // OP#1 // model output
214 // OP#0 // model input
216 // [FP32TO16] // OpSeq#1
220 // [OPERATION] // OpSeq#0
222 // OP#1 // model output
224 void Fp32ToFp16Converter::appendNewOpSeqForConvertFp32ToFp16(const ir::OpSequenceIndex &op_seq_ind,
225 ir::OpSequence &op_seq)
227 // OpSeq's input set is included in the first operation's input set
228 const ir::OperandIndexSequence op_seq_inputs = op_seq.getInputs(); // copied
230 // NOTE Please do not change sequence of op_seq_inputs. It can change the sequence of inputs of
232 for (const auto &op_seq_input_ind :
233 op_seq_inputs | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
235 if (checkOperandType(op_seq_input_ind) == false)
238 // new operand w/ datatype fp32
239 const auto new_op_ind = newCopiedOperand(op_seq_input_ind);
241 // set new lower_info for operand
242 setNewOperandLowerInfo(op_seq_ind, new_op_ind);
244 // manipulate input of operation and op_seq
245 // - replace the first operation's input to new operand
246 // with old operand's removeUse and new operand's appendUse()
247 manipulateInput(op_seq_ind, op_seq_input_ind, new_op_ind);
250 const auto new_node_ind = newOperationConvertFp32ToFp16(op_seq_input_ind, new_op_ind);
253 const auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
255 // set new lower_info for op_seq
256 setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
258 _list_fp32_to_fp16.insert(new_op_seq_ind);
260 VERBOSE(Fp32ToFp16Converter) << "NEW |Fp32To16]"
261 << ir::getStrFromOpSeq(_lowered_graph.op_seqs().at(new_op_seq_ind),
262 _lowered_graph.graph().operations())
270 // OP#0 // model input
272 // [FP32TO16] // OpSeq#1
276 // [OPERATION] // OpSeq#0
278 // OP#1 // model output
283 // OP#0 // model input
285 // [FP32TO16] // OpSeq#1
289 // [OPERATION] // OpSeq#0
293 // [FP16TO32] // OpSeq#2
295 // OP#1 // model output
297 void Fp32ToFp16Converter::appendNewOpSeqForConvertFp16ToFp32(const ir::OpSequenceIndex &op_seq_ind,
298 ir::OpSequence &op_seq)
300 // OpSeq's output set is included in the last operation's output set
301 const ir::OperandIndexSequence op_seq_outputs = op_seq.getOutputs(); // copied
303 // NOTE Please do not change sequence of op_seq_outputs. It can change the sequence of outputs of
305 for (const auto &op_seq_output_ind :
306 op_seq_outputs | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
308 if (checkOperandType(op_seq_output_ind) == false)
311 // new operand w/ datatype fp32
312 const auto new_op_ind = newCopiedOperand(op_seq_output_ind);
314 // set new lower_info for operand
315 setNewOperandLowerInfo(op_seq_ind, new_op_ind);
317 // manipulate output of operation and op_seq
318 // - replace output of the last operation's output to new operand
319 // with old operand's removeDef and new operand's appendDef()
320 manipulateOutput(op_seq_ind, op_seq_output_ind, new_op_ind);
323 auto new_node_ind = newOperationConvertFp16ToFp32(op_seq_output_ind, new_op_ind);
326 auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
328 // set new lower_info for op_seq
329 setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
331 _list_fp16_to_fp32.insert(new_op_seq_ind);
333 VERBOSE(Fp32ToFp16Converter) << "NEW |Fp16To32]"
334 << ir::getStrFromOpSeq(_lowered_graph.op_seqs().at(new_op_seq_ind),
335 _lowered_graph.graph().operations())
340 void Fp32ToFp16Converter::optimize()
342 printOpSequences("BEFORE opt");
344 removeContiguousConvertOpSequences();
346 printOpSequences("AFTER removeContiguousConverts");
348 // TODO Handle Split from the beginning of the model. ex) MODELS/inception_module
352 // OP#0---------------------. // model_input
354 // [FP32TO16] // OpSeq#0 [FP32TO16] // OpSeq#1
358 // [OPERATION] // OpSeq#2 [OPERATION] // OpSeq#3
363 // OP#0 // model_input
365 // [FP32TO16] // OpSeq#4
367 // OP#3---------------------------.
369 // [OPERATION] // OpSeq#2 [OPERATION] // OpSeq#3
372 void Fp32ToFp16Converter::convertOperands()
374 _lowered_graph.op_seqs().iterate(
375 [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
376 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
377 assert(lower_info != nullptr);
378 // For now, the only acl_cl supports fully fp16
379 if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
382 // Convert input,output operands' type to fp16
383 convertOperandsOfOpSequence(op_seq);
387 void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
389 auto &operands = _lowered_graph.graph().operands();
390 const auto &operations = _lowered_graph.graph().operations();
391 const auto &op_seq_inputs = _lowered_graph.graph().getInputs();
392 const auto &op_seq_outputs = _lowered_graph.graph().getOutputs();
394 for (auto &op_idx : op_seq)
396 const auto &node = operations.at(op_idx);
397 for (auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
399 if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind))
402 auto &obj = operands.at(ind);
403 if (obj.isConstant() || obj.typeInfo().type() != ir::DataType::FLOAT32)
406 obj.type(ir::DataType::FLOAT16);
408 VERBOSE(Fp32ToFp16Converter) << "Input Operand #" << ind.value() << ": fp16" << std::endl;
411 for (auto &ind : node.getOutputs())
413 if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind))
416 auto &obj = operands.at(ind);
417 if (obj.isConstant() || obj.typeInfo().type() != ir::DataType::FLOAT32)
420 obj.type(ir::DataType::FLOAT16);
422 VERBOSE(Fp32ToFp16Converter) << "Output Operand #" << ind.value() << ": fp16" << std::endl;
427 void Fp32ToFp16Converter::convertDatas()
429 _lowered_graph.graph().operands().iterate([&](const ir::OperandIndex &ind, ir::Operand &obj) {
430 const auto type = obj.typeInfo().type();
431 if (type == ir::DataType::FLOAT32 && obj.isConstant())
433 auto data = obj.data();
434 assert(data != nullptr);
436 size_t num_elements = obj.operandSize() / ir::sizeOfDataType(type);
437 size_t new_ptr_size = num_elements * sizeof(float16);
438 auto new_ptr = std::make_unique<uint8_t[]>(new_ptr_size);
439 copyDataFromFp32ToFp16(reinterpret_cast<const float *>(data->base()),
440 reinterpret_cast<float16 *>(new_ptr.get()), num_elements);
443 auto new_data = std::make_unique<ir::CachedData>(new_ptr.get(), new_ptr_size);
445 obj.data(std::move(new_data));
446 obj.type(ir::DataType::FLOAT16);
447 VERBOSE(Fp32ToFp16Converter) << "Constant Operand #" << ind.value() << ": fp16" << std::endl;
452 void Fp32ToFp16Converter::printOpSequences(const std::string &pre_msg, const std::string &post_msg)
454 if (pre_msg.empty() == false)
456 VERBOSE(Fp32ToFp16Converter) << pre_msg << std::endl;
459 _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, const ir::OpSequence &op_seq) {
460 VERBOSE(Fp32ToFp16Converter) << ir::getStrFromOpSeq(op_seq, _lowered_graph.graph().operations())
464 if (post_msg.empty() == false)
466 VERBOSE(Fp32ToFp16Converter) << post_msg << std::endl;
470 bool Fp32ToFp16Converter::checkOperandType(const ir::OperandIndex &op_ind) const
472 const auto &operands = _lowered_graph.graph().operands();
473 const auto &obj = operands.at(op_ind);
474 return (obj.isConstant() == false && obj.typeInfo().type() == ir::DataType::FLOAT32);
477 bool Fp32ToFp16Converter::checkOperandsOfOpSequence(const ir::OpSequence &op_seq) const
479 const auto &operations = _lowered_graph.graph().operations();
481 // the first node's input
482 const auto &first_node_ind = op_seq.operations().at(0);
483 const auto &first_node = operations.at(first_node_ind);
484 const auto &first_node_inputs = first_node.getInputs();
485 for (const auto &op_seq_input_ind : op_seq.getInputs() | ir::Remove::UNDEFINED)
487 if (first_node_inputs.contains(op_seq_input_ind) == false)
491 // the last node's output
492 size_t last_ind = op_seq.size() - 1;
493 const auto &last_node_ind = op_seq.operations().at(last_ind);
494 const auto &last_node = operations.at(last_node_ind);
495 const auto &last_node_outputs = last_node.getOutputs();
496 for (const auto &op_seq_output_ind : op_seq.getOutputs())
498 if (last_node_outputs.contains(op_seq_output_ind) == false)
505 ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &op_ind)
507 auto &operands = _lowered_graph.graph().operands();
508 const auto &obj = operands.at(op_ind);
509 auto new_op_ind = operands.emplace(obj.shape(), obj.typeInfo());
513 void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
514 const ir::OperandIndex &new_op_ind)
516 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
517 assert(lower_info != nullptr);
518 auto new_lower_info = std::make_unique<ir::operand::LowerInfo>();
519 auto permute_factor = ir::operand::PermuteFactor(lower_info->backend(), lower_info->layout());
520 new_lower_info->addDefPermuteFactor(permute_factor);
521 new_lower_info->addUsePermuteFactor(permute_factor);
522 _lowered_graph.setLowerInfo(new_op_ind, std::move(new_lower_info));
525 void Fp32ToFp16Converter::setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
526 const ir::OpSequenceIndex &new_op_seq_ind)
528 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
529 assert(lower_info != nullptr);
531 auto new_lower_info =
532 std::make_unique<ir::operation::LowerInfo>(lower_info->backend(), lower_info->layout());
533 _lowered_graph.setLowerInfo(new_op_seq_ind, std::move(new_lower_info));
536 void Fp32ToFp16Converter::manipulateInput(const ir::OpSequenceIndex &op_seq_ind,
537 const ir::OperandIndex &op_seq_input_ind,
538 const ir::OperandIndex &new_op_ind)
540 auto &operands = _lowered_graph.graph().operands();
541 auto &operations = _lowered_graph.graph().operations();
543 auto &op_seq = _lowered_graph.op_seqs().at(op_seq_ind);
545 auto &first_node_ind = op_seq.operations().at(0);
546 auto &first_node = operations.at(first_node_ind);
547 assert(first_node.getInputs().contains(op_seq_input_ind));
549 auto &input_obj = operands.at(op_seq_input_ind);
550 assert(input_obj.isConstant() == false);
552 auto &new_op_obj = operands.at(new_op_ind);
554 // The same inputs having the index as op_seq_input_ind are replaced all at once
555 op_seq.replaceInputs(op_seq_input_ind, new_op_ind);
556 first_node.replaceInputs(op_seq_input_ind, new_op_ind);
558 // op_seq_obj doesn't have uses/def
559 input_obj.removeUse(first_node_ind);
560 new_op_obj.insertUse(first_node_ind);
563 void Fp32ToFp16Converter::manipulateOutput(const ir::OpSequenceIndex &op_seq_ind,
564 const ir::OperandIndex &op_seq_output_ind,
565 const ir::OperandIndex &new_op_ind)
567 auto &operands = _lowered_graph.graph().operands();
568 auto &operations = _lowered_graph.graph().operations();
570 auto &op_seq = _lowered_graph.op_seqs().at(op_seq_ind);
572 size_t last_ind = op_seq.size() - 1;
573 auto &last_node_ind = op_seq.operations().at(last_ind);
574 auto &last_node = operations.at(last_node_ind);
575 assert(last_node.getOutputs().contains(op_seq_output_ind));
577 auto &output_obj = operands.at(op_seq_output_ind);
578 assert(output_obj.isConstant() == false);
580 auto &new_op_obj = operands.at(new_op_ind);
582 // The same outputs having the index as op_seq_output_ind are replaced all at once
583 op_seq.replaceOutputs(op_seq_output_ind, new_op_ind);
584 last_node.replaceOutputs(op_seq_output_ind, new_op_ind);
586 // op_seq_obj doesn't have uses/def
587 output_obj.removeDef(last_node_ind);
588 new_op_obj.insertDef(last_node_ind);
592 Fp32ToFp16Converter::newOperationConvertFp32ToFp16(const ir::OperandIndex &op_seq_input_ind,
593 const ir::OperandIndex &new_op_ind)
595 auto &operands = _lowered_graph.graph().operands();
596 auto &operations = _lowered_graph.graph().operations();
598 auto &input_obj = operands.at(op_seq_input_ind);
599 auto &new_op_obj = operands.at(new_op_ind);
601 std::unique_ptr<ir::Operation> new_node(
602 new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
603 const auto new_node_ind = operations.push(std::move(new_node));
605 input_obj.insertUse(new_node_ind);
606 new_op_obj.insertDef(new_node_ind);
612 Fp32ToFp16Converter::newOperationConvertFp16ToFp32(const ir::OperandIndex &op_seq_output_ind,
613 const ir::OperandIndex &new_op_ind)
615 auto &operands = _lowered_graph.graph().operands();
616 auto &operations = _lowered_graph.graph().operations();
618 auto &output_obj = operands.at(op_seq_output_ind);
619 auto &new_op_obj = operands.at(new_op_ind);
621 std::unique_ptr<ir::Operation> new_node(
622 new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
623 const auto new_node_ind = operations.push(std::move(new_node));
625 new_op_obj.insertUse(new_node_ind);
626 output_obj.insertDef(new_node_ind);
631 ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex &op_seq_ind,
632 const ir::OperationIndex &node_index)
634 auto &node = _lowered_graph.graph().operations().at(node_index);
635 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
636 assert(lower_info != nullptr);
637 auto layout = lower_info->layout();
639 auto op_seq = std::make_unique<ir::OpSequence>(layout);
640 op_seq->appendOperation(node_index);
641 op_seq->setOutputs(node.getOutputs());
642 op_seq->setInputs(node.getInputs());
644 return _lowered_graph.op_seqs().emplace(std::move(op_seq));
647 // The op_seq(Fp16To32)'s output operand is the next to op_seq (Fp32To16)?
648 // If so, connect Fp16To32's previous OpSeq to Fp32To16's next OpSeq
650 // Assume that an OpSequence has an operation for easy explaination
654 // [OPERATION] // OpSeq#0
658 // [FP16TO32] // OpSeq#1
662 // [FP32TO16] // OpSeq#2
666 // [OPERATION] // OpSeq#3
671 // [OPERATION] // OpSeq#0
675 // [OPERATION] // OpSeq#3
677 void Fp32ToFp16Converter::removeContiguousConvertOpSequences()
679 // Prepare InputToOpSeqs map
680 const auto input_to_op_seqs = prepareInputToOpSeqs();
682 // Find OpSequences to delete while manipulating input of OpSeq.
683 auto opseq_map_to_delete = findOpSequencesContiguous(input_to_op_seqs);
685 // Find Operations to delete
686 auto list_to_delete_op_seqs = getListOpSequences(opseq_map_to_delete);
687 auto list_to_delete_ops = findOperationsToDelete(list_to_delete_op_seqs);
689 // Before deleting, manipulateInputs of OpSeq & Operation
690 manipulateContiguousOpSequences(input_to_op_seqs, opseq_map_to_delete);
692 // Delete OpSequences & Operations & obj's use/def & operands
693 deleteContiguousOpSequences(list_to_delete_op_seqs, list_to_delete_ops);
696 Fp32ToFp16Converter::OpSeqIndexToOpSeqIndexList
697 Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_seqs) const
699 const auto &op_seqs = _lowered_graph.op_seqs();
700 OpSeqIndexToOpSeqIndexList opseq_map_to_delete;
703 // Assume that an Operation an OpSequence for easy explaination
709 // [FP16TO32] // op_seq_ind_fp16_to_fp32 & op_seq_fp16_to_fp32
711 // OP#1 // output_ind_fp16_fp32
713 // [FP32TO16] // op_seq_ind
719 for (auto it = _list_fp16_to_fp32.cbegin(); it != _list_fp16_to_fp32.cend(); ++it)
721 // fp16_to_fp32's input/output num is always 1
722 auto &op_seq_ind_fp16_to_fp32 = *it;
723 auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
724 assert(op_seq_fp16_to_fp32.size() == 1);
725 assert(op_seq_fp16_to_fp32.getInputs().size() == 1);
727 auto &output_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getOutputs().at(0);
728 auto found_input_in_op_seqs = input_to_op_seqs.find(output_ind_fp16_to_fp32);
729 if (found_input_in_op_seqs == input_to_op_seqs.end())
734 // DO NOT FORGET THE CASE
739 // OP#0---------------------.
741 // [FP32TO16] [FP32TO16]
745 // [OPERATION] [OPERATION]
747 for (auto &op_seq_ind : found_input_in_op_seqs->second)
749 auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind);
750 if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end())
752 if (opseq_map_to_delete.find(op_seq_ind_fp16_to_fp32) == opseq_map_to_delete.end())
754 opseq_map_to_delete[op_seq_ind_fp16_to_fp32].emplace(op_seq_ind);
758 opseq_map_to_delete[op_seq_ind_fp16_to_fp32].insert(op_seq_ind);
761 VERBOSE(Fp32ToFp16Converter)
762 << "Contiguous from OpSeq#" << op_seq_ind_fp16_to_fp32.value() << "(ToFp32)"
763 << " to OpSeq#" << op_seq_ind.value() << "(ToFp16)" << std::endl;
768 return opseq_map_to_delete;
771 Fp32ToFp16Converter::InputToOpSeqs Fp32ToFp16Converter::prepareInputToOpSeqs() const
773 const auto &op_seqs = _lowered_graph.op_seqs();
775 InputToOpSeqs input_to_op_seqs;
776 op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_idx, const ir::OpSequence &op_seq) {
777 for (auto input : op_seq.getInputs() | ir::Remove::UNDEFINED)
779 auto it = input_to_op_seqs.find(input);
780 if (it == input_to_op_seqs.end())
782 input_to_op_seqs[input].emplace(op_seq_idx);
786 input_to_op_seqs[input].insert(op_seq_idx);
791 return input_to_op_seqs;
794 Fp32ToFp16Converter::OpSeqIndexList
795 Fp32ToFp16Converter::getListOpSequences(const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete) const
798 for (const auto &it : opseq_map_to_delete)
800 auto &opseq_ind_fp16_to_fp32 = it.first;
801 if (list.find(opseq_ind_fp16_to_fp32) == list.end())
803 list.emplace(opseq_ind_fp16_to_fp32);
806 for (auto &opseq_ind_fp32_to_fp16 : it.second)
808 if (list.find(opseq_ind_fp32_to_fp16) == list.end())
810 list.emplace(opseq_ind_fp32_to_fp16);
817 ir::OperandIndexSequence
818 Fp32ToFp16Converter::findOperationsToDelete(const OpSeqIndexList &list_to_delete_op_seqs) const
820 const auto &operations = _lowered_graph.graph().operations();
821 const auto &op_seqs = _lowered_graph.op_seqs();
823 ir::OperandIndexSequence list_to_delete_ops;
824 for (const auto &op_seq_ind : list_to_delete_op_seqs)
826 const auto &op_seq = op_seqs.at(op_seq_ind);
827 assert(op_seq.size() == 1);
829 const auto &first_node_ind = op_seq.operations().at(0);
830 const auto &first_node = operations.at(first_node_ind);
831 assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
832 first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
834 for (const auto &ind : first_node.getOutputs())
836 list_to_delete_ops.append(ind);
840 return list_to_delete_ops;
843 void Fp32ToFp16Converter::manipulateContiguousOpSequences(
844 const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
846 auto &op_seqs = _lowered_graph.op_seqs();
851 // OP#0 // input_ind_fp16_to_fp32
853 // [FP16TO32] // op_seq_ind_fp16_to_fp32 & op_seq_fp16_to_fp32
857 // [FP32TO16] // op_seq_ind_fp32_to_fp16, op_seq_fp32_to_fp16
859 // OP#2 // output_ind_fp32_to_fp16
861 // [OPERATION] // op_seq_ind_next_to_fp16
863 for (auto it : opseq_map_to_delete)
865 // fp16_to_fp32's input/output num is always 1
866 auto &op_seq_ind_fp16_to_fp32 = it.first;
867 auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
868 auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0);
870 for (auto &op_seq_ind_fp32_to_fp16 : it.second)
872 auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16);
873 assert(op_seq_fp32_to_fp16.size() == 1);
874 assert(op_seq_fp32_to_fp16.getInputs().size() == 1);
876 auto &output_ind_fp32_to_fp16 = op_seq_fp32_to_fp16.getOutputs().at(0);
877 auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16);
878 assert(found_next_to_fp16 != input_to_op_seqs.end());
880 for (auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
882 manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32);
887 // OP#0 // input_ind_fp16_to_fp32
889 // [OPERATION] // op_seq_ind_next_to_fp16
895 void Fp32ToFp16Converter::deleteContiguousOpSequences(
896 const OpSeqIndexList &list_to_delete_op_seqs,
897 const ir::OperandIndexSequence &list_to_delete_ops)
899 auto &operands = _lowered_graph.graph().operands();
900 auto &operations = _lowered_graph.graph().operations();
901 auto &op_seqs = _lowered_graph.op_seqs();
903 for (auto &op_seq_ind : list_to_delete_op_seqs)
905 auto &op_seq = op_seqs.at(op_seq_ind);
906 assert(op_seq.size() == 1);
907 VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq #" << op_seq_ind.value() << std::endl;
909 auto &first_node_ind = op_seq.operations().at(0);
910 auto &first_node = operations.at(first_node_ind);
911 assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
912 first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
913 VERBOSE(Fp32ToFp16Converter) << "Delete Node #" << first_node_ind.value() << std::endl;
916 for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
918 auto &obj = operands.at(ind);
919 obj.removeUse(first_node_ind);
920 VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Use(Node#"
921 << first_node_ind.value() << ") is removed" << std::endl;
925 for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
927 auto &obj = operands.at(ind);
928 obj.removeDef(first_node_ind);
929 VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#"
930 << first_node_ind.value() << ") is removed" << std::endl;
934 operations.remove(first_node_ind);
935 VERBOSE(Fp32ToFp16Converter) << "Node#" << first_node_ind.value() << " is removed" << std::endl;
938 op_seqs.remove(op_seq_ind);
939 VERBOSE(Fp32ToFp16Converter) << "OpSeq#" << op_seq_ind.value() << " is removed" << std::endl;
943 for (auto &ind : list_to_delete_ops)
945 operands.remove(ind);
946 VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << " is removed" << std::endl;
950 } // namespace compiler