2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "Fp32ToFp16Converter.h"
18 #include "ir/operation/ConvertFp32ToFp16.h"
19 #include "ir/operation/ConvertFp16ToFp32.h"
20 #include "util/logging.h"
29 const std::string kAclClBackendConfigId = "acl_cl";
31 void copyDataFromFp32ToFp16(const float *from, float16 *into, size_t num_elements)
33 for (size_t i = 0; i < num_elements; ++i)
35 into[i] = static_cast<float16>(from[i]);
47 Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
48 : _lowered_graph{lowered_graph}
50 VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
53 // For example, two OpSequences are there and each OpSequence has an Operation
55 // OP#0 // model input
57 // [OPERATION] // OpSeq#0
61 // [OPERATION] // OpSeq#1
63 // OP#2 // model output
66 // AFTER `appendOpSequences()`,
67 // note that model_input and model_output are not changed.
71 // [FP32TO16] // OpSeq#2
75 // [OPERATION] // OpSeq#0
79 // [FP16TO32] // OpSeq#3
83 // [FP32TO16] // OpSeq#4
87 // [OPERATION] // OpSeq#1
91 // [FP16TO32] // OpSeq#5
96 // AFTER `optimize()`,
100 // [FP32TO16] // OpSeq#2
104 // [OPERATION] // OpSeq#0
108 // [OPERATION] // OpSeq#1
112 // [FP16TO32] // OpSeq#5
117 // AFTER `convertOperands()`,
119 // OP#0 // model_input, not fp16
121 // [FP32TO16] // OpSeq#2
125 // [OPERATION] // OpSeq#0
129 // [OPERATION] // OpSeq#1
133 // [FP16TO32] // OpSeq#5
135 // OP#2 // model_output, notfp16
138 // AFTER `convertDatas()`,
140 // OP#0 // model_input, not fp16
142 // [FP32TO16] // OpSeq#2
146 // [OPERATION] // OpSeq#0, constants are fp16
150 // [OPERATION] // OpSeq#1, constants are fp16
154 // [FP16TO32] // OpSeq#5
156 // OP#2 // model_output, notfp16
158 void Fp32ToFp16Converter::run()
160 // Append new OpSequence which includes ConvertFp32ToFp16
161 // and append new OpSequence which includes ConvertFp16ToFp32
164 // Remove unnecessary converting operations
167 // Convert operands' data types from fp32 to fp16
174 printOpSequences("FINAL OpSequences");
177 void Fp32ToFp16Converter::appendOpSequences()
179 _lowered_graph.op_seqs().iterate(
180 [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
181 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
182 assert(lower_info != nullptr);
184 // For now, the only acl_cl supports fully fp16 type
185 // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
187 // To do this, we could check the support by `operation by operation`. After that, we
188 // would partition an op_seq if it contains unsupported operations.
189 if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
192 // OpSeq's input set should be included in the first operation's input set or
193 // OpSeq's output set should be included in the last operation's output set
194 assert(checkOperandsOfOpSequence(op_seq));
196 // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
197 appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
198 appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
205 // OP#0 // model input
207 // [OPERATION] // OpSeq#0
209 // OP#1 // model output
214 // OP#0 // model input
216 // [FP32TO16] // OpSeq#1
220 // [OPERATION] // OpSeq#0
222 // OP#1 // model output
224 void Fp32ToFp16Converter::appendNewOpSeqForConvertFp32ToFp16(const ir::OpSequenceIndex &op_seq_ind,
225 ir::OpSequence &op_seq)
227 // OpSeq's input set is included in the first operation's input set
228 const ir::OperandIndexSequence op_seq_inputs = op_seq.getInputs(); // copied
230 // NOTE Please do not change sequence of op_seq_inputs. It can change the sequence of inputs of
232 for (const auto &op_seq_input_ind :
233 op_seq_inputs | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
235 if (checkOperandType(op_seq_input_ind) == false)
238 // new operand w/ datatype fp32
239 const auto new_op_ind = newCopiedOperand(op_seq_input_ind);
241 // set new lower_info for operand
242 setNewOperandLowerInfo(op_seq_ind, new_op_ind);
244 // manipulate input of operation and op_seq
245 // - replace the first operation's input to new operand
246 // with old operand's removeUse and new operand's appendUse()
247 manipulateInput(op_seq_ind, op_seq_input_ind, new_op_ind);
250 const auto new_node_ind = newOperationConvertFp32ToFp16(op_seq_input_ind, new_op_ind);
253 const auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
255 // set new lower_info for op_seq
256 setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
258 _list_fp32_to_fp16.insert(new_op_seq_ind);
260 VERBOSE(Fp32ToFp16Converter) << "NEW |Fp32To16]"
261 << ir::getStrFromOpSeq(_lowered_graph.op_seqs().at(new_op_seq_ind),
262 _lowered_graph.graph().operations())
270 // OP#0 // model input
272 // [FP32TO16] // OpSeq#1
276 // [OPERATION] // OpSeq#0
278 // OP#1 // model output
283 // OP#0 // model input
285 // [FP32TO16] // OpSeq#1
289 // [OPERATION] // OpSeq#0
293 // [FP16TO32] // OpSeq#2
295 // OP#1 // model output
297 void Fp32ToFp16Converter::appendNewOpSeqForConvertFp16ToFp32(const ir::OpSequenceIndex &op_seq_ind,
298 ir::OpSequence &op_seq)
300 // OpSeq's output set is included in the last operation's output set
301 const ir::OperandIndexSequence op_seq_outputs = op_seq.getOutputs(); // copied
303 // NOTE Please do not change sequence of op_seq_outputs. It can change the sequence of outputs of
305 for (const auto &op_seq_output_ind :
306 op_seq_outputs | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
308 if (checkOperandType(op_seq_output_ind) == false)
311 // new operand w/ datatype fp32
312 const auto new_op_ind = newCopiedOperand(op_seq_output_ind);
314 // set new lower_info for operand
315 setNewOperandLowerInfo(op_seq_ind, new_op_ind);
317 // manipulate output of operation and op_seq
318 // - replace output of the last operation's output to new operand
319 // with old operand's unsetDef and new operand's appendDef()
320 manipulateOutput(op_seq_ind, op_seq_output_ind, new_op_ind);
323 auto new_node_ind = newOperationConvertFp16ToFp32(op_seq_output_ind, new_op_ind);
326 auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
328 // set new lower_info for op_seq
329 setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
331 _list_fp16_to_fp32.insert(new_op_seq_ind);
333 VERBOSE(Fp32ToFp16Converter) << "NEW |Fp16To32]"
334 << ir::getStrFromOpSeq(_lowered_graph.op_seqs().at(new_op_seq_ind),
335 _lowered_graph.graph().operations())
340 void Fp32ToFp16Converter::optimize()
342 printOpSequences("BEFORE opt");
344 removeContiguousConvertOpSequences();
346 printOpSequences("AFTER removeContiguousConverts");
348 // TODO Handle Split from the beginning of the model. ex) MODELS/inception_module
352 // OP#0---------------------. // model_input
354 // [FP32TO16] // OpSeq#0 [FP32TO16] // OpSeq#1
358 // [OPERATION] // OpSeq#2 [OPERATION] // OpSeq#3
363 // OP#0 // model_input
365 // [FP32TO16] // OpSeq#4
367 // OP#3---------------------------.
369 // [OPERATION] // OpSeq#2 [OPERATION] // OpSeq#3
372 void Fp32ToFp16Converter::convertOperands()
374 _lowered_graph.op_seqs().iterate(
375 [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
376 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
377 assert(lower_info != nullptr);
378 // For now, the only acl_cl supports fully fp16
379 if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
382 // Convert input,output operands' type to fp16
383 convertOperandsOfOpSequence(op_seq);
387 void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
389 auto &operands = _lowered_graph.graph().operands();
390 const auto &operations = _lowered_graph.graph().operations();
391 const auto &op_seq_inputs = _lowered_graph.graph().getInputs();
392 const auto &op_seq_outputs = _lowered_graph.graph().getOutputs();
394 for (auto &op_idx : op_seq)
396 const auto &node = operations.at(op_idx);
397 for (auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
399 if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind))
402 auto &obj = operands.at(ind);
403 if (obj.isConstant() || obj.typeInfo().type() != ir::DataType::FLOAT32)
406 obj.type(ir::DataType::FLOAT16);
408 VERBOSE(Fp32ToFp16Converter) << "Input Operand #" << ind.value() << ": fp16" << std::endl;
411 for (auto &ind : node.getOutputs())
413 if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind))
416 auto &obj = operands.at(ind);
417 if (obj.isConstant() || obj.typeInfo().type() != ir::DataType::FLOAT32)
420 obj.type(ir::DataType::FLOAT16);
422 VERBOSE(Fp32ToFp16Converter) << "Output Operand #" << ind.value() << ": fp16" << std::endl;
427 void Fp32ToFp16Converter::convertDatas()
429 _lowered_graph.graph().operands().iterate([&](const ir::OperandIndex &ind, ir::Operand &obj) {
430 const auto type = obj.typeInfo().type();
431 if (type == ir::DataType::FLOAT32 && obj.isConstant())
433 auto data = obj.data();
434 assert(data != nullptr);
436 size_t num_elements = obj.operandSize() / ir::sizeOfDataType(type);
437 size_t new_ptr_size = num_elements * sizeof(float16);
438 auto new_ptr = std::make_unique<uint8_t[]>(new_ptr_size);
439 copyDataFromFp32ToFp16(reinterpret_cast<const float *>(data->base()),
440 reinterpret_cast<float16 *>(new_ptr.get()), num_elements);
443 auto new_data = std::make_unique<ir::CachedData>(new_ptr.get(), new_ptr_size);
445 obj.data(std::move(new_data));
446 obj.type(ir::DataType::FLOAT16);
447 VERBOSE(Fp32ToFp16Converter) << "Constant Operand #" << ind.value() << ": fp16" << std::endl;
452 void Fp32ToFp16Converter::printOpSequences(const std::string &pre_msg, const std::string &post_msg)
454 if (pre_msg.empty() == false)
456 VERBOSE(Fp32ToFp16Converter) << pre_msg << std::endl;
459 _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, const ir::OpSequence &op_seq) {
460 VERBOSE(Fp32ToFp16Converter) << ir::getStrFromOpSeq(op_seq, _lowered_graph.graph().operations())
464 if (post_msg.empty() == false)
466 VERBOSE(Fp32ToFp16Converter) << post_msg << std::endl;
470 bool Fp32ToFp16Converter::checkOperandType(const ir::OperandIndex &op_ind) const
472 const auto &operands = _lowered_graph.graph().operands();
473 const auto &obj = operands.at(op_ind);
474 return (obj.isConstant() == false && obj.typeInfo().type() == ir::DataType::FLOAT32);
477 bool Fp32ToFp16Converter::checkOperandsOfOpSequence(const ir::OpSequence &op_seq) const
479 const auto &operations = _lowered_graph.graph().operations();
481 // the first node's input
482 const auto &first_node_ind = op_seq.operations().at(0);
483 const auto &first_node = operations.at(first_node_ind);
484 const auto &first_node_inputs = first_node.getInputs();
485 for (const auto &op_seq_input_ind : op_seq.getInputs() | ir::Remove::UNDEFINED)
487 if (first_node_inputs.contains(op_seq_input_ind) == false)
491 // the last node's output
492 size_t last_ind = op_seq.size() - 1;
493 const auto &last_node_ind = op_seq.operations().at(last_ind);
494 const auto &last_node = operations.at(last_node_ind);
495 const auto &last_node_outputs = last_node.getOutputs();
496 for (const auto &op_seq_output_ind : op_seq.getOutputs())
498 if (last_node_outputs.contains(op_seq_output_ind) == false)
505 ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &op_ind)
507 auto &operands = _lowered_graph.graph().operands();
508 const auto &obj = operands.at(op_ind);
509 auto new_op_ind = operands.emplace(obj.shape(), obj.typeInfo());
513 void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
514 const ir::OperandIndex &new_op_ind)
516 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
517 assert(lower_info != nullptr);
518 auto new_lower_info = std::make_unique<ir::operand::LowerInfo>();
519 auto permute_factor = ir::operand::PermuteFactor(lower_info->backend(), lower_info->layout());
520 new_lower_info->addDefPermuteFactor(permute_factor);
521 new_lower_info->addUsePermuteFactor(permute_factor);
522 _lowered_graph.setLowerInfo(new_op_ind, std::move(new_lower_info));
525 void Fp32ToFp16Converter::setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
526 const ir::OpSequenceIndex &new_op_seq_ind)
528 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
529 assert(lower_info != nullptr);
531 auto new_lower_info =
532 std::make_unique<ir::operation::LowerInfo>(lower_info->backend(), lower_info->layout());
533 _lowered_graph.setLowerInfo(new_op_seq_ind, std::move(new_lower_info));
536 void Fp32ToFp16Converter::manipulateInput(const ir::OpSequenceIndex &op_seq_ind,
537 const ir::OperandIndex &op_seq_input_ind,
538 const ir::OperandIndex &new_op_ind)
540 auto &operands = _lowered_graph.graph().operands();
541 auto &operations = _lowered_graph.graph().operations();
543 auto &op_seq = _lowered_graph.op_seqs().at(op_seq_ind);
545 auto &first_node_ind = op_seq.operations().at(0);
546 auto &first_node = operations.at(first_node_ind);
547 assert(first_node.getInputs().contains(op_seq_input_ind));
549 auto &input_obj = operands.at(op_seq_input_ind);
550 assert(input_obj.isConstant() == false);
552 auto &new_op_obj = operands.at(new_op_ind);
554 // The same inputs having the index as op_seq_input_ind are replaced all at once
555 op_seq.replaceInputs(op_seq_input_ind, new_op_ind);
556 first_node.replaceInputs(op_seq_input_ind, new_op_ind);
558 // op_seq_obj doesn't have uses/def
559 input_obj.removeUse(first_node_ind);
560 new_op_obj.insertUse(first_node_ind);
563 void Fp32ToFp16Converter::manipulateOutput(const ir::OpSequenceIndex &op_seq_ind,
564 const ir::OperandIndex &op_seq_output_ind,
565 const ir::OperandIndex &new_op_ind)
567 auto &operands = _lowered_graph.graph().operands();
568 auto &operations = _lowered_graph.graph().operations();
570 auto &op_seq = _lowered_graph.op_seqs().at(op_seq_ind);
572 size_t last_ind = op_seq.size() - 1;
573 auto &last_node_ind = op_seq.operations().at(last_ind);
574 auto &last_node = operations.at(last_node_ind);
575 assert(last_node.getOutputs().contains(op_seq_output_ind));
577 auto &output_obj = operands.at(op_seq_output_ind);
578 assert(output_obj.isConstant() == false);
580 auto &new_op_obj = operands.at(new_op_ind);
582 // The same outputs having the index as op_seq_output_ind are replaced all at once
583 op_seq.replaceOutputs(op_seq_output_ind, new_op_ind);
584 last_node.replaceOutputs(op_seq_output_ind, new_op_ind);
586 // op_seq_obj doesn't have uses/def
587 assert(output_obj.getDef() == last_node_ind);
588 output_obj.unsetDef();
589 new_op_obj.setDef(last_node_ind);
593 Fp32ToFp16Converter::newOperationConvertFp32ToFp16(const ir::OperandIndex &op_seq_input_ind,
594 const ir::OperandIndex &new_op_ind)
596 auto &operands = _lowered_graph.graph().operands();
597 auto &operations = _lowered_graph.graph().operations();
599 auto &input_obj = operands.at(op_seq_input_ind);
600 auto &new_op_obj = operands.at(new_op_ind);
602 std::unique_ptr<ir::Operation> new_node(
603 new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
604 const auto new_node_ind = operations.push(std::move(new_node));
606 input_obj.insertUse(new_node_ind);
607 new_op_obj.setDef(new_node_ind);
613 Fp32ToFp16Converter::newOperationConvertFp16ToFp32(const ir::OperandIndex &op_seq_output_ind,
614 const ir::OperandIndex &new_op_ind)
616 auto &operands = _lowered_graph.graph().operands();
617 auto &operations = _lowered_graph.graph().operations();
619 auto &output_obj = operands.at(op_seq_output_ind);
620 auto &new_op_obj = operands.at(new_op_ind);
622 std::unique_ptr<ir::Operation> new_node(
623 new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
624 const auto new_node_ind = operations.push(std::move(new_node));
626 new_op_obj.insertUse(new_node_ind);
627 output_obj.setDef(new_node_ind);
632 ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex &op_seq_ind,
633 const ir::OperationIndex &node_index)
635 auto &node = _lowered_graph.graph().operations().at(node_index);
636 const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
637 assert(lower_info != nullptr);
638 auto layout = lower_info->layout();
640 auto op_seq = std::make_unique<ir::OpSequence>(layout);
641 op_seq->appendOperation(node_index);
642 op_seq->setOutputs(node.getOutputs());
643 op_seq->setInputs(node.getInputs());
645 return _lowered_graph.op_seqs().emplace(std::move(op_seq));
648 // The op_seq(Fp16To32)'s output operand is the next to op_seq (Fp32To16)?
649 // If so, connect Fp16To32's previous OpSeq to Fp32To16's next OpSeq
651 // Assume that an OpSequence has an operation for easy explaination
655 // [OPERATION] // OpSeq#0
659 // [FP16TO32] // OpSeq#1
663 // [FP32TO16] // OpSeq#2
667 // [OPERATION] // OpSeq#3
672 // [OPERATION] // OpSeq#0
676 // [OPERATION] // OpSeq#3
678 void Fp32ToFp16Converter::removeContiguousConvertOpSequences()
680 // Prepare InputToOpSeqs map
681 const auto input_to_op_seqs = prepareInputToOpSeqs();
683 // Find OpSequences to delete while manipulating input of OpSeq.
684 auto opseq_map_to_delete = findOpSequencesContiguous(input_to_op_seqs);
686 // Find Operations to delete
687 auto list_to_delete_op_seqs = getListOpSequences(opseq_map_to_delete);
688 auto list_to_delete_ops = findOperationsToDelete(list_to_delete_op_seqs);
690 // Before deleting, manipulateInputs of OpSeq & Operation
691 manipulateContiguousOpSequences(input_to_op_seqs, opseq_map_to_delete);
693 // Delete OpSequences & Operations & obj's use/def & operands
694 deleteContiguousOpSequences(list_to_delete_op_seqs, list_to_delete_ops);
697 Fp32ToFp16Converter::OpSeqIndexToOpSeqIndexList
698 Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_seqs) const
700 const auto &op_seqs = _lowered_graph.op_seqs();
701 OpSeqIndexToOpSeqIndexList opseq_map_to_delete;
704 // Assume that an Operation an OpSequence for easy explaination
710 // [FP16TO32] // op_seq_ind_fp16_to_fp32 & op_seq_fp16_to_fp32
712 // OP#1 // output_ind_fp16_fp32
714 // [FP32TO16] // op_seq_ind
720 for (auto it = _list_fp16_to_fp32.cbegin(); it != _list_fp16_to_fp32.cend(); ++it)
722 // fp16_to_fp32's input/output num is always 1
723 auto &op_seq_ind_fp16_to_fp32 = *it;
724 auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
725 assert(op_seq_fp16_to_fp32.size() == 1);
726 assert(op_seq_fp16_to_fp32.getInputs().size() == 1);
728 auto &output_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getOutputs().at(0);
729 auto found_input_in_op_seqs = input_to_op_seqs.find(output_ind_fp16_to_fp32);
730 if (found_input_in_op_seqs == input_to_op_seqs.end())
735 // DO NOT FORGET THE CASE
740 // OP#0---------------------.
742 // [FP32TO16] [FP32TO16]
746 // [OPERATION] [OPERATION]
748 for (auto &op_seq_ind : found_input_in_op_seqs->second)
750 auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind);
751 if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end())
753 if (opseq_map_to_delete.find(op_seq_ind_fp16_to_fp32) == opseq_map_to_delete.end())
755 opseq_map_to_delete[op_seq_ind_fp16_to_fp32].emplace(op_seq_ind);
759 opseq_map_to_delete[op_seq_ind_fp16_to_fp32].insert(op_seq_ind);
762 VERBOSE(Fp32ToFp16Converter)
763 << "Contiguous from OpSeq#" << op_seq_ind_fp16_to_fp32.value() << "(ToFp32)"
764 << " to OpSeq#" << op_seq_ind.value() << "(ToFp16)" << std::endl;
769 return opseq_map_to_delete;
772 Fp32ToFp16Converter::InputToOpSeqs Fp32ToFp16Converter::prepareInputToOpSeqs() const
774 const auto &op_seqs = _lowered_graph.op_seqs();
776 InputToOpSeqs input_to_op_seqs;
777 op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_idx, const ir::OpSequence &op_seq) {
778 for (auto input : op_seq.getInputs() | ir::Remove::UNDEFINED)
780 auto it = input_to_op_seqs.find(input);
781 if (it == input_to_op_seqs.end())
783 input_to_op_seqs[input].emplace(op_seq_idx);
787 input_to_op_seqs[input].insert(op_seq_idx);
792 return input_to_op_seqs;
795 Fp32ToFp16Converter::OpSeqIndexList
796 Fp32ToFp16Converter::getListOpSequences(const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete) const
799 for (const auto &it : opseq_map_to_delete)
801 auto &opseq_ind_fp16_to_fp32 = it.first;
802 if (list.find(opseq_ind_fp16_to_fp32) == list.end())
804 list.emplace(opseq_ind_fp16_to_fp32);
807 for (auto &opseq_ind_fp32_to_fp16 : it.second)
809 if (list.find(opseq_ind_fp32_to_fp16) == list.end())
811 list.emplace(opseq_ind_fp32_to_fp16);
818 ir::OperandIndexSequence
819 Fp32ToFp16Converter::findOperationsToDelete(const OpSeqIndexList &list_to_delete_op_seqs) const
821 const auto &operations = _lowered_graph.graph().operations();
822 const auto &op_seqs = _lowered_graph.op_seqs();
824 ir::OperandIndexSequence list_to_delete_ops;
825 for (const auto &op_seq_ind : list_to_delete_op_seqs)
827 const auto &op_seq = op_seqs.at(op_seq_ind);
828 assert(op_seq.size() == 1);
830 const auto &first_node_ind = op_seq.operations().at(0);
831 const auto &first_node = operations.at(first_node_ind);
832 assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
833 first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
835 for (const auto &ind : first_node.getOutputs())
837 list_to_delete_ops.append(ind);
841 return list_to_delete_ops;
844 void Fp32ToFp16Converter::manipulateContiguousOpSequences(
845 const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
847 auto &op_seqs = _lowered_graph.op_seqs();
852 // OP#0 // input_ind_fp16_to_fp32
854 // [FP16TO32] // op_seq_ind_fp16_to_fp32 & op_seq_fp16_to_fp32
858 // [FP32TO16] // op_seq_ind_fp32_to_fp16, op_seq_fp32_to_fp16
860 // OP#2 // output_ind_fp32_to_fp16
862 // [OPERATION] // op_seq_ind_next_to_fp16
864 for (auto it : opseq_map_to_delete)
866 // fp16_to_fp32's input/output num is always 1
867 auto &op_seq_ind_fp16_to_fp32 = it.first;
868 auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
869 auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0);
871 for (auto &op_seq_ind_fp32_to_fp16 : it.second)
873 auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16);
874 assert(op_seq_fp32_to_fp16.size() == 1);
875 assert(op_seq_fp32_to_fp16.getInputs().size() == 1);
877 auto &output_ind_fp32_to_fp16 = op_seq_fp32_to_fp16.getOutputs().at(0);
878 auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16);
879 assert(found_next_to_fp16 != input_to_op_seqs.end());
881 for (auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
883 manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32);
888 // OP#0 // input_ind_fp16_to_fp32
890 // [OPERATION] // op_seq_ind_next_to_fp16
896 void Fp32ToFp16Converter::deleteContiguousOpSequences(
897 const OpSeqIndexList &list_to_delete_op_seqs,
898 const ir::OperandIndexSequence &list_to_delete_ops)
900 auto &operands = _lowered_graph.graph().operands();
901 auto &operations = _lowered_graph.graph().operations();
902 auto &op_seqs = _lowered_graph.op_seqs();
904 for (auto &op_seq_ind : list_to_delete_op_seqs)
906 auto &op_seq = op_seqs.at(op_seq_ind);
907 assert(op_seq.size() == 1);
908 VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq #" << op_seq_ind.value() << std::endl;
910 auto &first_node_ind = op_seq.operations().at(0);
911 auto &first_node = operations.at(first_node_ind);
912 assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
913 first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
914 VERBOSE(Fp32ToFp16Converter) << "Delete Node #" << first_node_ind.value() << std::endl;
917 for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
919 auto &obj = operands.at(ind);
920 obj.removeUse(first_node_ind);
921 VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Use(Node#"
922 << first_node_ind.value() << ") is removed" << std::endl;
926 for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
928 auto &obj = operands.at(ind);
929 assert(obj.getDef() == first_node_ind);
931 VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#"
932 << first_node_ind.value() << ") is removed" << std::endl;
936 operations.remove(first_node_ind);
937 VERBOSE(Fp32ToFp16Converter) << "Node#" << first_node_ind.value() << " is removed" << std::endl;
940 op_seqs.remove(op_seq_ind);
941 VERBOSE(Fp32ToFp16Converter) << "OpSeq#" << op_seq_ind.value() << " is removed" << std::endl;
945 for (auto &ind : list_to_delete_ops)
947 operands.remove(ind);
948 VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << " is removed" << std::endl;
952 } // namespace compiler