2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #if 0 // This file is temporarily unused
19 #include "Fp32ToFp16Converter.h"
20 #include "ir/operation/ConvertFp32ToFp16.h"
21 #include "ir/operation/ConvertFp16ToFp32.h"
22 #include "util/logging.h"
31 const std::string kAclClBackendConfigId = "acl_cl";
33 void copyDataFromFp32ToFp16(const float *from, float16 *into, size_t num_elements)
35 for (size_t i = 0; i < num_elements; ++i)
37 into[i] = static_cast<float16>(from[i]);
49 Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
50 : _lowered_graph{lowered_graph}
52 VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
55 // For example, two OpSequences are there and each OpSequence has an Operation
57 // OP#0 // model input
59 // [OPERATION] // OpSeq#0
63 // [OPERATION] // OpSeq#1
65 // OP#2 // model output
68 // AFTER `appendOpSequences()`,
69 // note that model_input and model_output are not changed.
73 // [FP32TO16] // OpSeq#2
77 // [OPERATION] // OpSeq#0
81 // [FP16TO32] // OpSeq#3
85 // [FP32TO16] // OpSeq#4
89 // [OPERATION] // OpSeq#1
93 // [FP16TO32] // OpSeq#5
98 // AFTER `optimize()`,
102 // [FP32TO16] // OpSeq#2
106 // [OPERATION] // OpSeq#0
110 // [OPERATION] // OpSeq#1
114 // [FP16TO32] // OpSeq#5
119 // AFTER `convertOperands()`,
121 // OP#0 // model_input, not fp16
123 // [FP32TO16] // OpSeq#2
127 // [OPERATION] // OpSeq#0
131 // [OPERATION] // OpSeq#1
135 // [FP16TO32] // OpSeq#5
137 // OP#2 // model_output, notfp16
140 // AFTER `convertDatas()`,
142 // OP#0 // model_input, not fp16
144 // [FP32TO16] // OpSeq#2
148 // [OPERATION] // OpSeq#0, constants are fp16
152 // [OPERATION] // OpSeq#1, constants are fp16
156 // [FP16TO32] // OpSeq#5
158 // OP#2 // model_output, notfp16
160 void Fp32ToFp16Converter::run()
162 // Append new OpSequence which includes ConvertFp32ToFp16
163 // and append new OpSequence which includes ConvertFp16ToFp32
166 // Remove unnecessary converting operations
169 // Convert operands' data types from fp32 to fp16
176 printOpSequences("FINAL OpSequences");
179 void Fp32ToFp16Converter::appendOpSequences()
181 _lowered_graph.op_seqs().iterate(
182 [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
183 const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
184 assert(lower_info != nullptr);
186 // For now, the only acl_cl supports fully fp16 type
187 // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
189 // To do this, we could check the support by `operation by operation`. After that, we
190 // would partition an op_seq if it contains unsupported operations.
191 if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
194 // OpSeq's input set should be included in the first operation's input set or
195 // OpSeq's output set should be included in the last operation's output set
196 assert(checkOperandsOfOpSequence(op_seq));
198 // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
199 appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
200 appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
207 // OP#0 // model input
209 // [OPERATION] // OpSeq#0
211 // OP#1 // model output
216 // OP#0 // model input
218 // [FP32TO16] // OpSeq#1
222 // [OPERATION] // OpSeq#0
224 // OP#1 // model output
226 void Fp32ToFp16Converter::appendNewOpSeqForConvertFp32ToFp16(const ir::OpSequenceIndex &op_seq_ind,
227 ir::OpSequence &op_seq)
229 // OpSeq's input set is included in the first operation's input set
230 const ir::OperandIndexSequence op_seq_inputs = op_seq.getInputs(); // copied
232 // NOTE Please do not change sequence of op_seq_inputs. It can change the sequence of inputs of
234 for (const auto &op_seq_input_ind :
235 op_seq_inputs | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
237 if (checkOperandType(op_seq_input_ind) == false)
240 // new operand w/ datatype fp32
241 const auto new_op_ind = newCopiedOperand(op_seq_input_ind);
243 // set new lower_info for operand
244 setNewOperandLowerInfo(op_seq_ind, new_op_ind);
246 // manipulate input of operation and op_seq
247 // - replace the first operation's input to new operand
248 // with old operand's removeUse and new operand's appendUse()
249 manipulateInput(op_seq_ind, op_seq_input_ind, new_op_ind);
252 const auto new_node_ind = newOperationConvertFp32ToFp16(op_seq_input_ind, new_op_ind);
255 const auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
257 // set new lower_info for op_seq
258 setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
260 _list_fp32_to_fp16.insert(new_op_seq_ind);
262 VERBOSE(Fp32ToFp16Converter) << "NEW |Fp32To16]"
263 << ir::getStrFromOpSeq(_lowered_graph.op_seqs().at(new_op_seq_ind),
264 _lowered_graph.graph().operations())
272 // OP#0 // model input
274 // [FP32TO16] // OpSeq#1
278 // [OPERATION] // OpSeq#0
280 // OP#1 // model output
285 // OP#0 // model input
287 // [FP32TO16] // OpSeq#1
291 // [OPERATION] // OpSeq#0
295 // [FP16TO32] // OpSeq#2
297 // OP#1 // model output
299 void Fp32ToFp16Converter::appendNewOpSeqForConvertFp16ToFp32(const ir::OpSequenceIndex &op_seq_ind,
300 ir::OpSequence &op_seq)
302 // OpSeq's output set is included in the last operation's output set
303 const ir::OperandIndexSequence op_seq_outputs = op_seq.getOutputs(); // copied
305 // NOTE Please do not change sequence of op_seq_outputs. It can change the sequence of outputs of
307 for (const auto &op_seq_output_ind :
308 op_seq_outputs | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
310 if (checkOperandType(op_seq_output_ind) == false)
313 // new operand w/ datatype fp32
314 const auto new_op_ind = newCopiedOperand(op_seq_output_ind);
316 // set new lower_info for operand
317 setNewOperandLowerInfo(op_seq_ind, new_op_ind);
319 // manipulate output of operation and op_seq
320 // - replace output of the last operation's output to new operand
321 // with old operand's unsetDef and new operand's appendDef()
322 manipulateOutput(op_seq_ind, op_seq_output_ind, new_op_ind);
325 auto new_node_ind = newOperationConvertFp16ToFp32(op_seq_output_ind, new_op_ind);
328 auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
330 // set new lower_info for op_seq
331 setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
333 _list_fp16_to_fp32.insert(new_op_seq_ind);
335 VERBOSE(Fp32ToFp16Converter) << "NEW |Fp16To32]"
336 << ir::getStrFromOpSeq(_lowered_graph.op_seqs().at(new_op_seq_ind),
337 _lowered_graph.graph().operations())
342 void Fp32ToFp16Converter::optimize()
344 printOpSequences("BEFORE opt");
346 removeContiguousConvertOpSequences();
348 printOpSequences("AFTER removeContiguousConverts");
350 // TODO Handle Split from the beginning of the model. ex) MODELS/inception_module
354 // OP#0---------------------. // model_input
356 // [FP32TO16] // OpSeq#0 [FP32TO16] // OpSeq#1
360 // [OPERATION] // OpSeq#2 [OPERATION] // OpSeq#3
365 // OP#0 // model_input
367 // [FP32TO16] // OpSeq#4
369 // OP#3---------------------------.
371 // [OPERATION] // OpSeq#2 [OPERATION] // OpSeq#3
374 void Fp32ToFp16Converter::convertOperands()
376 _lowered_graph.op_seqs().iterate(
377 [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
378 const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
379 assert(lower_info != nullptr);
380 // For now, the only acl_cl supports fully fp16
381 if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
384 // Convert input,output operands' type to fp16
385 convertOperandsOfOpSequence(op_seq);
389 void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
391 auto &operands = _lowered_graph.graph().operands();
392 const auto &operations = _lowered_graph.graph().operations();
393 const auto &op_seq_inputs = _lowered_graph.graph().getInputs();
394 const auto &op_seq_outputs = _lowered_graph.graph().getOutputs();
396 for (const auto &op_idx : op_seq)
398 const auto &node = operations.at(op_idx);
399 for (const auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
401 if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind))
404 auto &obj = operands.at(ind);
405 if (obj.isConstant() || obj.typeInfo().type() != ir::DataType::FLOAT32)
408 obj.type(ir::DataType::FLOAT16);
410 VERBOSE(Fp32ToFp16Converter) << "Input Operand " << ind << ": fp16" << std::endl;
413 for (const auto &ind : node.getOutputs())
415 if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind))
418 auto &obj = operands.at(ind);
419 if (obj.isConstant() || obj.typeInfo().type() != ir::DataType::FLOAT32)
422 obj.type(ir::DataType::FLOAT16);
424 VERBOSE(Fp32ToFp16Converter) << "Output Operand " << ind << ": fp16" << std::endl;
429 void Fp32ToFp16Converter::convertDatas()
431 _lowered_graph.graph().operands().iterate([&](const ir::OperandIndex &ind, ir::Operand &obj) {
432 const auto type = obj.typeInfo().type();
433 if (type == ir::DataType::FLOAT32 && obj.isConstant())
435 auto data = obj.data();
436 assert(data != nullptr);
438 size_t num_elements = obj.operandSize() / ir::sizeOfDataType(type);
439 size_t new_ptr_size = num_elements * sizeof(float16);
440 auto new_ptr = std::make_unique<uint8_t[]>(new_ptr_size);
441 copyDataFromFp32ToFp16(reinterpret_cast<const float *>(data->base()),
442 reinterpret_cast<float16 *>(new_ptr.get()), num_elements);
445 auto new_data = std::make_unique<ir::CachedData>(new_ptr.get(), new_ptr_size);
447 obj.data(std::move(new_data));
448 obj.type(ir::DataType::FLOAT16);
449 VERBOSE(Fp32ToFp16Converter) << "Constant Operand " << ind << ": fp16" << std::endl;
454 void Fp32ToFp16Converter::printOpSequences(const std::string &pre_msg, const std::string &post_msg)
456 if (pre_msg.empty() == false)
458 VERBOSE(Fp32ToFp16Converter) << pre_msg << std::endl;
461 _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, const ir::OpSequence &op_seq) {
462 VERBOSE(Fp32ToFp16Converter) << ir::getStrFromOpSeq(op_seq, _lowered_graph.graph().operations())
466 if (post_msg.empty() == false)
468 VERBOSE(Fp32ToFp16Converter) << post_msg << std::endl;
472 bool Fp32ToFp16Converter::checkOperandType(const ir::OperandIndex &op_ind) const
474 const auto &operands = _lowered_graph.graph().operands();
475 const auto &obj = operands.at(op_ind);
476 return (obj.isConstant() == false && obj.typeInfo().type() == ir::DataType::FLOAT32);
479 bool Fp32ToFp16Converter::checkOperandsOfOpSequence(const ir::OpSequence &op_seq) const
481 const auto &operations = _lowered_graph.graph().operations();
483 // the first node's input
484 const auto &first_node_ind = op_seq.operations().at(0);
485 const auto &first_node = operations.at(first_node_ind);
486 const auto &first_node_inputs = first_node.getInputs();
487 for (const auto &op_seq_input_ind : op_seq.getInputs() | ir::Remove::UNDEFINED)
489 if (first_node_inputs.contains(op_seq_input_ind) == false)
493 // the last node's output
494 size_t last_ind = op_seq.size() - 1;
495 const auto &last_node_ind = op_seq.operations().at(last_ind);
496 const auto &last_node = operations.at(last_node_ind);
497 const auto &last_node_outputs = last_node.getOutputs();
498 for (const auto &op_seq_output_ind : op_seq.getOutputs())
500 if (last_node_outputs.contains(op_seq_output_ind) == false)
507 ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &op_ind)
509 auto &operands = _lowered_graph.graph().operands();
510 const auto &obj = operands.at(op_ind);
511 auto new_op_ind = operands.emplace(obj.shape(), obj.typeInfo());
515 void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
516 const ir::OperandIndex &new_op_ind)
518 const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
519 assert(lower_info != nullptr);
520 auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>();
521 auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout());
522 new_lower_info->addDefPermuteFactor(permute_factor);
523 new_lower_info->addUsePermuteFactor(permute_factor);
524 _lowered_graph.setLowerInfo(new_op_ind, std::move(new_lower_info));
527 void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
528 const ir::OpSequenceIndex &new_op_seq_ind)
530 const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
531 assert(lower_info != nullptr);
533 auto new_lower_info =
534 std::make_unique<compiler::OperationLowerInfo>(lower_info->backend(), lower_info->layout());
535 _lowered_graph.setLowerInfo(new_op_seq_ind, std::move(new_lower_info));
538 void Fp32ToFp16Converter::manipulateInput(const ir::OpSequenceIndex &op_seq_ind,
539 const ir::OperandIndex &op_seq_input_ind,
540 const ir::OperandIndex &new_op_ind)
542 auto &operands = _lowered_graph.graph().operands();
543 auto &operations = _lowered_graph.graph().operations();
545 auto &op_seq = _lowered_graph.op_seqs().at(op_seq_ind);
547 auto &first_node_ind = op_seq.operations().at(0);
548 auto &first_node = operations.at(first_node_ind);
549 assert(first_node.getInputs().contains(op_seq_input_ind));
551 auto &input_obj = operands.at(op_seq_input_ind);
552 assert(input_obj.isConstant() == false);
554 auto &new_op_obj = operands.at(new_op_ind);
556 // The same inputs having the index as op_seq_input_ind are replaced all at once
557 op_seq.replaceInputs(op_seq_input_ind, new_op_ind);
558 first_node.replaceInputs(op_seq_input_ind, new_op_ind);
560 // op_seq_obj doesn't have uses/def
561 input_obj.removeUse(first_node_ind);
562 new_op_obj.insertUse(first_node_ind);
565 void Fp32ToFp16Converter::manipulateOutput(const ir::OpSequenceIndex &op_seq_ind,
566 const ir::OperandIndex &op_seq_output_ind,
567 const ir::OperandIndex &new_op_ind)
569 auto &operands = _lowered_graph.graph().operands();
570 auto &operations = _lowered_graph.graph().operations();
572 auto &op_seq = _lowered_graph.op_seqs().at(op_seq_ind);
574 size_t last_ind = op_seq.size() - 1;
575 auto &last_node_ind = op_seq.operations().at(last_ind);
576 auto &last_node = operations.at(last_node_ind);
577 assert(last_node.getOutputs().contains(op_seq_output_ind));
579 auto &output_obj = operands.at(op_seq_output_ind);
580 assert(output_obj.isConstant() == false);
582 auto &new_op_obj = operands.at(new_op_ind);
584 // The same outputs having the index as op_seq_output_ind are replaced all at once
585 op_seq.replaceOutputs(op_seq_output_ind, new_op_ind);
586 last_node.replaceOutputs(op_seq_output_ind, new_op_ind);
588 // op_seq_obj doesn't have uses/def
589 assert(output_obj.getDef() == last_node_ind);
590 output_obj.unsetDef();
591 new_op_obj.setDef(last_node_ind);
595 Fp32ToFp16Converter::newOperationConvertFp32ToFp16(const ir::OperandIndex &op_seq_input_ind,
596 const ir::OperandIndex &new_op_ind)
598 auto &operands = _lowered_graph.graph().operands();
599 auto &operations = _lowered_graph.graph().operations();
601 auto &input_obj = operands.at(op_seq_input_ind);
602 auto &new_op_obj = operands.at(new_op_ind);
604 std::unique_ptr<ir::Operation> new_node(
605 new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
606 const auto new_node_ind = operations.push(std::move(new_node));
608 input_obj.insertUse(new_node_ind);
609 new_op_obj.setDef(new_node_ind);
615 Fp32ToFp16Converter::newOperationConvertFp16ToFp32(const ir::OperandIndex &op_seq_output_ind,
616 const ir::OperandIndex &new_op_ind)
618 auto &operands = _lowered_graph.graph().operands();
619 auto &operations = _lowered_graph.graph().operations();
621 auto &output_obj = operands.at(op_seq_output_ind);
622 auto &new_op_obj = operands.at(new_op_ind);
624 std::unique_ptr<ir::Operation> new_node(
625 new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
626 const auto new_node_ind = operations.push(std::move(new_node));
628 new_op_obj.insertUse(new_node_ind);
629 output_obj.setDef(new_node_ind);
634 ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex &op_seq_ind,
635 const ir::OperationIndex &node_index)
637 auto &node = _lowered_graph.graph().operations().at(node_index);
638 const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
639 assert(lower_info != nullptr);
640 auto layout = lower_info->layout();
642 auto op_seq = std::make_unique<ir::OpSequence>(layout);
643 op_seq->appendOperation(node_index);
644 op_seq->setOutputs(node.getOutputs());
645 op_seq->setInputs(node.getInputs());
647 return _lowered_graph.op_seqs().emplace(std::move(op_seq));
650 // The op_seq(Fp16To32)'s output operand is the next to op_seq (Fp32To16)?
651 // If so, connect Fp16To32's previous OpSeq to Fp32To16's next OpSeq
653 // Assume that an OpSequence has an operation for easy explaination
657 // [OPERATION] // OpSeq#0
661 // [FP16TO32] // OpSeq#1
665 // [FP32TO16] // OpSeq#2
669 // [OPERATION] // OpSeq#3
674 // [OPERATION] // OpSeq#0
678 // [OPERATION] // OpSeq#3
680 void Fp32ToFp16Converter::removeContiguousConvertOpSequences()
682 // Prepare InputToOpSeqs map
683 const auto input_to_op_seqs = prepareInputToOpSeqs();
685 // Find OpSequences to delete while manipulating input of OpSeq.
686 auto opseq_map_to_delete = findOpSequencesContiguous(input_to_op_seqs);
688 // Find Operations to delete
689 auto list_to_delete_op_seqs = getListOpSequences(opseq_map_to_delete);
690 auto list_to_delete_ops = findOperationsToDelete(list_to_delete_op_seqs);
692 // Before deleting, manipulateInputs of OpSeq & Operation
693 manipulateContiguousOpSequences(input_to_op_seqs, opseq_map_to_delete);
695 // Delete OpSequences & Operations & obj's use/def & operands
696 deleteContiguousOpSequences(list_to_delete_op_seqs, list_to_delete_ops);
699 Fp32ToFp16Converter::OpSeqIndexToOpSeqIndexList
700 Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_seqs) const
702 const auto &op_seqs = _lowered_graph.op_seqs();
703 OpSeqIndexToOpSeqIndexList opseq_map_to_delete;
706 // Assume that an Operation an OpSequence for easy explaination
712 // [FP16TO32] // op_seq_ind_fp16_to_fp32 & op_seq_fp16_to_fp32
714 // OP#1 // output_ind_fp16_fp32
716 // [FP32TO16] // op_seq_ind
722 for (auto it = _list_fp16_to_fp32.cbegin(); it != _list_fp16_to_fp32.cend(); ++it)
724 // fp16_to_fp32's input/output num is always 1
725 auto &op_seq_ind_fp16_to_fp32 = *it;
726 auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
727 assert(op_seq_fp16_to_fp32.size() == 1);
728 assert(op_seq_fp16_to_fp32.getInputs().size() == 1);
730 auto &output_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getOutputs().at(0);
731 auto found_input_in_op_seqs = input_to_op_seqs.find(output_ind_fp16_to_fp32);
732 if (found_input_in_op_seqs == input_to_op_seqs.end())
737 // DO NOT FORGET THE CASE
742 // OP#0---------------------.
744 // [FP32TO16] [FP32TO16]
748 // [OPERATION] [OPERATION]
750 for (const auto &op_seq_ind : found_input_in_op_seqs->second)
752 auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind);
753 if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end())
755 if (opseq_map_to_delete.find(op_seq_ind_fp16_to_fp32) == opseq_map_to_delete.end())
757 opseq_map_to_delete[op_seq_ind_fp16_to_fp32].emplace(op_seq_ind);
761 opseq_map_to_delete[op_seq_ind_fp16_to_fp32].insert(op_seq_ind);
764 VERBOSE(Fp32ToFp16Converter) << "Contiguous from " << op_seq_ind_fp16_to_fp32 << "(ToFp32)"
765 << " to " << op_seq_ind << "(ToFp16)" << std::endl;
770 return opseq_map_to_delete;
773 Fp32ToFp16Converter::InputToOpSeqs Fp32ToFp16Converter::prepareInputToOpSeqs() const
775 const auto &op_seqs = _lowered_graph.op_seqs();
777 InputToOpSeqs input_to_op_seqs;
778 op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_idx, const ir::OpSequence &op_seq) {
779 for (auto &&input : op_seq.getInputs() | ir::Remove::UNDEFINED)
781 auto it = input_to_op_seqs.find(input);
782 if (it == input_to_op_seqs.end())
784 input_to_op_seqs[input].emplace(op_seq_idx);
788 input_to_op_seqs[input].insert(op_seq_idx);
793 return input_to_op_seqs;
796 Fp32ToFp16Converter::OpSeqIndexList
797 Fp32ToFp16Converter::getListOpSequences(const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete) const
800 for (const auto &it : opseq_map_to_delete)
802 const auto &opseq_ind_fp16_to_fp32 = it.first;
803 if (list.find(opseq_ind_fp16_to_fp32) == list.end())
805 list.emplace(opseq_ind_fp16_to_fp32);
808 for (const auto &opseq_ind_fp32_to_fp16 : it.second)
810 if (list.find(opseq_ind_fp32_to_fp16) == list.end())
812 list.emplace(opseq_ind_fp32_to_fp16);
819 ir::OperandIndexSequence
820 Fp32ToFp16Converter::findOperationsToDelete(const OpSeqIndexList &list_to_delete_op_seqs) const
822 const auto &operations = _lowered_graph.graph().operations();
823 const auto &op_seqs = _lowered_graph.op_seqs();
825 ir::OperandIndexSequence list_to_delete_ops;
826 for (const auto &op_seq_ind : list_to_delete_op_seqs)
828 const auto &op_seq = op_seqs.at(op_seq_ind);
829 assert(op_seq.size() == 1);
831 const auto &first_node_ind = op_seq.operations().at(0);
832 const auto &first_node = operations.at(first_node_ind);
833 assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
834 first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
836 for (const auto &ind : first_node.getOutputs())
838 list_to_delete_ops.append(ind);
842 return list_to_delete_ops;
845 void Fp32ToFp16Converter::manipulateContiguousOpSequences(
846 const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
848 auto &op_seqs = _lowered_graph.op_seqs();
853 // OP#0 // input_ind_fp16_to_fp32
855 // [FP16TO32] // op_seq_ind_fp16_to_fp32 & op_seq_fp16_to_fp32
859 // [FP32TO16] // op_seq_ind_fp32_to_fp16, op_seq_fp32_to_fp16
861 // OP#2 // output_ind_fp32_to_fp16
863 // [OPERATION] // op_seq_ind_next_to_fp16
865 for (auto &&it : opseq_map_to_delete)
867 // fp16_to_fp32's input/output num is always 1
868 auto &op_seq_ind_fp16_to_fp32 = it.first;
869 auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
870 auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0);
872 for (const auto &op_seq_ind_fp32_to_fp16 : it.second)
874 auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16);
875 assert(op_seq_fp32_to_fp16.size() == 1);
876 assert(op_seq_fp32_to_fp16.getInputs().size() == 1);
878 auto &output_ind_fp32_to_fp16 = op_seq_fp32_to_fp16.getOutputs().at(0);
879 auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16);
880 assert(found_next_to_fp16 != input_to_op_seqs.end());
882 for (const auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
884 manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32);
889 // OP#0 // input_ind_fp16_to_fp32
891 // [OPERATION] // op_seq_ind_next_to_fp16
897 void Fp32ToFp16Converter::deleteContiguousOpSequences(
898 const OpSeqIndexList &list_to_delete_op_seqs, const ir::OperandIndexSequence &list_to_delete_ops)
900 auto &operands = _lowered_graph.graph().operands();
901 auto &operations = _lowered_graph.graph().operations();
902 auto &op_seqs = _lowered_graph.op_seqs();
904 for (const auto &op_seq_ind : list_to_delete_op_seqs)
906 auto &op_seq = op_seqs.at(op_seq_ind);
907 assert(op_seq.size() == 1);
908 VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq " << op_seq_ind << std::endl;
910 auto &first_node_ind = op_seq.operations().at(0);
911 auto &first_node = operations.at(first_node_ind);
912 assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
913 first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
914 VERBOSE(Fp32ToFp16Converter) << "Delete Node " << first_node_ind << std::endl;
917 for (const auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
919 auto &obj = operands.at(ind);
920 obj.removeUse(first_node_ind);
921 VERBOSE(Fp32ToFp16Converter)
922 << "Operand " << ind << "'s Use(Node" << first_node_ind << ") is removed" << std::endl;
926 for (const auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
928 auto &obj = operands.at(ind);
929 assert(obj.getDef() == first_node_ind);
931 VERBOSE(Fp32ToFp16Converter)
932 << "Operand " << ind << "'s Def(Node" << first_node_ind << ") is removed" << std::endl;
936 operations.remove(first_node_ind);
937 VERBOSE(Fp32ToFp16Converter) << "Node" << first_node_ind << " is removed" << std::endl;
940 op_seqs.remove(op_seq_ind);
941 VERBOSE(Fp32ToFp16Converter) << "OpSeq" << op_seq_ind << " is removed" << std::endl;
945 for (const auto &ind : list_to_delete_ops)
947 operands.remove(ind);
948 VERBOSE(Fp32ToFp16Converter) << "Operand " << ind << " is removed" << std::endl;
952 } // namespace compiler